[Midnightbsd-cvs] src: contrib/opensolaris: Enter ZFS (userland)

Wed Oct 15 14:56:17 EDT 2008

Log Message:
-----------
Enter ZFS (userland)

Added Files:
-----------
    src/cddl/contrib/opensolaris:
        OPENSOLARIS.LICENSE (r1.1)
    src/cddl/contrib/opensolaris/cmd/zdb:
        zdb.8 (r1.1)
        zdb.c (r1.1)
        zdb_il.c (r1.1)
    src/cddl/contrib/opensolaris/cmd/zfs:
        zfs.8 (r1.1)
        zfs_iter.c (r1.1)
        zfs_iter.h (r1.1)
        zfs_main.c (r1.1)
        zfs_util.h (r1.1)
    src/cddl/contrib/opensolaris/cmd/zpool:
        zpool.8 (r1.1)
        zpool_iter.c (r1.1)
        zpool_main.c (r1.1)
        zpool_util.c (r1.1)
        zpool_util.h (r1.1)
        zpool_vdev.c (r1.1)
    src/cddl/contrib/opensolaris/cmd/ztest:
        ztest.c (r1.1)
    src/cddl/contrib/opensolaris/head:
        assert.h (r1.1)
        atomic.h (r1.1)
        libintl.h (r1.1)
        stdio_ext.h (r1.1)
        synch.h (r1.1)
        thread.h (r1.1)
    src/cddl/contrib/opensolaris/lib/libnvpair:
        libnvpair.c (r1.1)
        libnvpair.h (r1.1)
        nvpair_alloc_system.c (r1.1)
    src/cddl/contrib/opensolaris/lib/libuutil/common:
        libuutil.h (r1.1)
        libuutil_common.h (r1.1)
        libuutil_impl.h (r1.1)
        uu_alloc.c (r1.1)
        uu_avl.c (r1.1)
        uu_dprintf.c (r1.1)
        uu_ident.c (r1.1)
        uu_list.c (r1.1)
        uu_misc.c (r1.1)
        uu_open.c (r1.1)
        uu_pname.c (r1.1)
        uu_strtoint.c (r1.1)
    src/cddl/contrib/opensolaris/lib/libzfs/common:
        libzfs.h (r1.1)
        libzfs_changelist.c (r1.1)
        libzfs_config.c (r1.1)
        libzfs_dataset.c (r1.1)
        libzfs_graph.c (r1.1)
        libzfs_impl.h (r1.1)
        libzfs_import.c (r1.1)
        libzfs_mount.c (r1.1)
        libzfs_pool.c (r1.1)
        libzfs_status.c (r1.1)
        libzfs_util.c (r1.1)
    src/cddl/contrib/opensolaris/lib/libzpool/common:
        kernel.c (r1.1)
        taskq.c (r1.1)
        util.c (r1.1)
    src/cddl/contrib/opensolaris/lib/libzpool/common/sys:
        zfs_context.h (r1.1)

-------------- next part --------------

--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <errno.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "zpool_util.h"
+
+/*
+ * Utility function to guarantee malloc() success.
+ */
+void *
+safe_malloc(size_t size)
+{
+	void *data;
+
+	if ((data = calloc(1, size)) == NULL) {
+		(void) fprintf(stderr, "internal error: out of memory\n");
+		exit(1);
+	}
+
+	return (data);
+}
+
+/*
+ * Same as above, but for strdup()
+ */
+char *
+safe_strdup(const char *str)
+{
+	char *ret;
+
+	if ((ret = strdup(str)) == NULL) {
+		(void) fprintf(stderr, "internal error: out of memory\n");
+		exit(1);
+	}
+
+	return (ret);
+}
+
+/*
+ * Display an out of memory error message and abort the current program.
+ */
+void
+zpool_no_memory(void)
+{
+	assert(errno == ENOMEM);
+	(void) fprintf(stderr,
+	    gettext("internal error: out of memory\n"));
+	exit(1);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -0,0 +1,3602 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <solaris.h>
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <libuutil.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <priv.h>
+#include <sys/time.h>
+#include <sys/fs/zfs.h>
+
+#include <sys/stat.h>
+
+#include <libzfs.h>
+
+#include "zpool_util.h"
+
+static int zpool_do_create(int, char **);
+static int zpool_do_destroy(int, char **);
+
+static int zpool_do_add(int, char **);
+static int zpool_do_remove(int, char **);
+
+static int zpool_do_list(int, char **);
+static int zpool_do_iostat(int, char **);
+static int zpool_do_status(int, char **);
+
+static int zpool_do_online(int, char **);
+static int zpool_do_offline(int, char **);
+static int zpool_do_clear(int, char **);
+
+static int zpool_do_attach(int, char **);
+static int zpool_do_detach(int, char **);
+static int zpool_do_replace(int, char **);
+
+static int zpool_do_scrub(int, char **);
+
+static int zpool_do_import(int, char **);
+static int zpool_do_export(int, char **);
+
+static int zpool_do_upgrade(int, char **);
+
+static int zpool_do_history(int, char **);
+
+static int zpool_do_get(int, char **);
+static int zpool_do_set(int, char **);
+
+/*
+ * These libumem hooks provide a reasonable set of defaults for the allocator's
+ * debugging facilities.
+ */
+const char *
+_umem_debug_init(void)
+{
+	return ("default,verbose"); /* $UMEM_DEBUG setting */
+}
+
+const char *
+_umem_logging_init(void)
+{
+	return ("fail,contents"); /* $UMEM_LOGGING setting */
+}
+
+typedef enum {
+	HELP_ADD,
+	HELP_ATTACH,
+	HELP_CLEAR,
+	HELP_CREATE,
+	HELP_DESTROY,
+	HELP_DETACH,
+	HELP_EXPORT,
+	HELP_HISTORY,
+	HELP_IMPORT,
+	HELP_IOSTAT,
+	HELP_LIST,
+	HELP_OFFLINE,
+	HELP_ONLINE,
+	HELP_REPLACE,
+	HELP_REMOVE,
+	HELP_SCRUB,
+	HELP_STATUS,
+	HELP_UPGRADE,
+	HELP_GET,
+	HELP_SET
+} zpool_help_t;
+
+
+typedef struct zpool_command {
+	const char	*name;
+	int		(*func)(int, char **);
+	zpool_help_t	usage;
+} zpool_command_t;
+
+/*
+ * Master command table.  Each ZFS command has a name, associated function, and
+ * usage message.  The usage messages need to be internationalized, so we have
+ * to have a function to return the usage message based on a command index.
+ *
+ * These commands are organized according to how they are displayed in the usage
+ * message.  An empty command (one with a NULL name) indicates an empty line in
+ * the generic usage message.
+ */
+static zpool_command_t command_table[] = {
+	{ "create",	zpool_do_create,	HELP_CREATE		},
+	{ "destroy",	zpool_do_destroy,	HELP_DESTROY		},
+	{ NULL },
+	{ "add",	zpool_do_add,		HELP_ADD		},
+	{ "remove",	zpool_do_remove,	HELP_REMOVE		},
+	{ NULL },
+	{ "list",	zpool_do_list,		HELP_LIST		},
+	{ "iostat",	zpool_do_iostat,	HELP_IOSTAT		},
+	{ "status",	zpool_do_status,	HELP_STATUS		},
+	{ NULL },
+	{ "online",	zpool_do_online,	HELP_ONLINE		},
+	{ "offline",	zpool_do_offline,	HELP_OFFLINE		},
+	{ "clear",	zpool_do_clear,		HELP_CLEAR		},
+	{ NULL },
+	{ "attach",	zpool_do_attach,	HELP_ATTACH		},
+	{ "detach",	zpool_do_detach,	HELP_DETACH		},
+	{ "replace",	zpool_do_replace,	HELP_REPLACE		},
+	{ NULL },
+	{ "scrub",	zpool_do_scrub,		HELP_SCRUB		},
+	{ NULL },
+	{ "import",	zpool_do_import,	HELP_IMPORT		},
+	{ "export",	zpool_do_export,	HELP_EXPORT		},
+	{ "upgrade",	zpool_do_upgrade,	HELP_UPGRADE		},
+	{ NULL },
+	{ "history",	zpool_do_history,	HELP_HISTORY		},
+	{ "get",	zpool_do_get,		HELP_GET		},
+	{ "set",	zpool_do_set,		HELP_SET		},
+};
+
+#define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
+
+zpool_command_t *current_command;
+
+static const char *
+get_usage(zpool_help_t idx) {
+	switch (idx) {
+	case HELP_ADD:
+		return (gettext("\tadd [-fn] <pool> <vdev> ...\n"));
+	case HELP_ATTACH:
+		return (gettext("\tattach [-f] <pool> <device> "
+		    "<new_device>\n"));
+	case HELP_CLEAR:
+		return (gettext("\tclear <pool> [device]\n"));
+	case HELP_CREATE:
+		return (gettext("\tcreate  [-fn] [-R root] [-m mountpoint] "
+		    "<pool> <vdev> ...\n"));
+	case HELP_DESTROY:
+		return (gettext("\tdestroy [-f] <pool>\n"));
+	case HELP_DETACH:
+		return (gettext("\tdetach <pool> <device>\n"));
+	case HELP_EXPORT:
+		return (gettext("\texport [-f] <pool> ...\n"));
+	case HELP_HISTORY:
+		return (gettext("\thistory [<pool>]\n"));
+	case HELP_IMPORT:
+		return (gettext("\timport [-d dir] [-D]\n"
+		    "\timport [-d dir] [-D] [-f] [-o opts] [-R root] -a\n"
+		    "\timport [-d dir] [-D] [-f] [-o opts] [-R root ]"
+		    " <pool | id> [newpool]\n"));
+	case HELP_IOSTAT:
+		return (gettext("\tiostat [-v] [pool] ... [interval "
+		    "[count]]\n"));
+	case HELP_LIST:
+		return (gettext("\tlist [-H] [-o field[,field]*] "
+		    "[pool] ...\n"));
+	case HELP_OFFLINE:
+		return (gettext("\toffline [-t] <pool> <device> ...\n"));
+	case HELP_ONLINE:
+		return (gettext("\tonline <pool> <device> ...\n"));
+	case HELP_REPLACE:
+		return (gettext("\treplace [-f] <pool> <device> "
+		    "[new_device]\n"));
+	case HELP_REMOVE:
+		return (gettext("\tremove <pool> <device>\n"));
+	case HELP_SCRUB:
+		return (gettext("\tscrub [-s] <pool> ...\n"));
+	case HELP_STATUS:
+		return (gettext("\tstatus [-vx] [pool] ...\n"));
+	case HELP_UPGRADE:
+		return (gettext("\tupgrade\n"
+		    "\tupgrade -v\n"
+		    "\tupgrade <-a | pool>\n"));
+	case HELP_GET:
+		return (gettext("\tget <all | property[,property]...> "
+		    "<pool> ...\n"));
+	case HELP_SET:
+		return (gettext("\tset <property=value> <pool> \n"));
+	}
+
+	abort();
+	/* NOTREACHED */
+}
+
+/*
+ * Fields available for 'zpool list'.
+ */
+typedef enum {
+	ZPOOL_FIELD_NAME,
+	ZPOOL_FIELD_SIZE,
+	ZPOOL_FIELD_USED,
+	ZPOOL_FIELD_AVAILABLE,
+	ZPOOL_FIELD_CAPACITY,
+	ZPOOL_FIELD_HEALTH,
+	ZPOOL_FIELD_ROOT
+} zpool_field_t;
+
+#define	MAX_FIELDS	10
+
+typedef struct column_def {
+	const char	*cd_title;
+	size_t		cd_width;
+	enum {
+		left_justify,
+		right_justify
+	}		cd_justify;
+} column_def_t;
+
+static column_def_t column_table[] = {
+	{ "NAME",	20,	left_justify	},
+	{ "SIZE",	6,	right_justify	},
+	{ "USED",	6,	right_justify	},
+	{ "AVAIL",	6,	right_justify	},
+	{ "CAP",	5,	right_justify	},
+	{ "HEALTH",	9,	left_justify	},
+	{ "ALTROOT",	15,	left_justify	}
+};
+
+static char *column_subopts[] = {
+	"name",
+	"size",
+	"used",
+	"available",
+	"capacity",
+	"health",
+	"root",
+	NULL
+};
+
+/*
+ * Callback routine that will print out a pool property value.
+ */
+static zpool_prop_t
+print_prop_cb(zpool_prop_t prop, void *cb)
+{
+	FILE *fp = cb;
+
+	(void) fprintf(fp, "\t%-13s  ", zpool_prop_to_name(prop));
+
+	if (zpool_prop_values(prop) == NULL)
+		(void) fprintf(fp, "-\n");
+	else
+		(void) fprintf(fp, "%s\n", zpool_prop_values(prop));
+
+	return (ZFS_PROP_CONT);
+}
+
+/*
+ * Display usage message.  If we're inside a command, display only the usage for
+ * that command.  Otherwise, iterate over the entire command table and display
+ * a complete usage message.
+ */
+void
+usage(boolean_t requested)
+{
+	int i;
+	FILE *fp = requested ? stdout : stderr;
+
+	if (current_command == NULL) {
+		int i;
+
+		(void) fprintf(fp, gettext("usage: zpool command args ...\n"));
+		(void) fprintf(fp,
+		    gettext("where 'command' is one of the following:\n\n"));
+
+		for (i = 0; i < NCOMMAND; i++) {
+			if (command_table[i].name == NULL)
+				(void) fprintf(fp, "\n");
+			else
+				(void) fprintf(fp, "%s",
+				    get_usage(command_table[i].usage));
+		}
+	} else {
+		(void) fprintf(fp, gettext("usage:\n"));
+		(void) fprintf(fp, "%s", get_usage(current_command->usage));
+
+		if (strcmp(current_command->name, "list") == 0) {
+			(void) fprintf(fp, gettext("\nwhere 'field' is one "
+			    "of the following:\n\n"));
+
+			for (i = 0; column_subopts[i] != NULL; i++)
+				(void) fprintf(fp, "\t%s\n", column_subopts[i]);
+		}
+	}
+
+	if (current_command != NULL &&
+	    ((strcmp(current_command->name, "set") == 0) ||
+	    (strcmp(current_command->name, "get") == 0))) {
+
+		(void) fprintf(fp,
+		    gettext("\nthe following properties are supported:\n"));
+
+		(void) fprintf(fp, "\n\t%-13s  %s\n\n",
+		    "PROPERTY", "VALUES");
+
+		/* Iterate over all properties */
+		(void) zpool_prop_iter(print_prop_cb, fp, B_FALSE);
+	}
+
+	/*
+	 * See comments at end of main().
+	 */
+	if (getenv("ZFS_ABORT") != NULL) {
+		(void) printf("dumping core by request\n");
+		abort();
+	}
+
+	exit(requested ? 0 : 2);
+}
+
+const char *
+state_to_health(int vs_state)
+{
+	switch (vs_state) {
+	case VDEV_STATE_CLOSED:
+	case VDEV_STATE_CANT_OPEN:
+	case VDEV_STATE_OFFLINE:
+		return (dgettext(TEXT_DOMAIN, "FAULTED"));
+	case VDEV_STATE_DEGRADED:
+		return (dgettext(TEXT_DOMAIN, "DEGRADED"));
+	case VDEV_STATE_HEALTHY:
+		return (dgettext(TEXT_DOMAIN, "ONLINE"));
+	}
+
+	return (dgettext(TEXT_DOMAIN, "UNKNOWN"));
+}
+
+const char *
+state_to_name(vdev_stat_t *vs)
+{
+	switch (vs->vs_state) {
+	case VDEV_STATE_CLOSED:
+	case VDEV_STATE_CANT_OPEN:
+		if (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+			return (gettext("FAULTED"));
+		else
+			return (gettext("UNAVAIL"));
+	case VDEV_STATE_OFFLINE:
+		return (gettext("OFFLINE"));
+	case VDEV_STATE_DEGRADED:
+		return (gettext("DEGRADED"));
+	case VDEV_STATE_HEALTHY:
+		return (gettext("ONLINE"));
+	}
+
+	return (gettext("UNKNOWN"));
+}
+
+void
+print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *vname;
+
+	if (name != NULL)
+		(void) printf("\t%*s%s\n", indent, "", name);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return;
+
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+		print_vdev_tree(zhp, vname, child[c], indent + 2);
+		free(vname);
+	}
+}
+
+/*
+ * zpool add [-fn] <pool> <vdev> ...
+ *
+ *	-f	Force addition of devices, even if they appear in use
+ *	-n	Do not add the devices, but display the resulting layout if
+ *		they were to be added.
+ *
+ * Adds the given vdevs to 'pool'.  As with create, the bulk of this work is
+ * handled by get_vdev_spec(), which constructs the nvlist needed to pass to
+ * libzfs.
+ */
+int
+zpool_do_add(int argc, char **argv)
+{
+	boolean_t force = B_FALSE;
+	boolean_t dryrun = B_FALSE;
+	int c;
+	nvlist_t *nvroot;
+	char *poolname;
+	int ret;
+	zpool_handle_t *zhp;
+	nvlist_t *config;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "fn")) != -1) {
+		switch (c) {
+		case 'f':
+			force = B_TRUE;
+			break;
+		case 'n':
+			dryrun = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing vdev specification\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	argc--;
+	argv++;
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+		(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
+		    poolname);
+		zpool_close(zhp);
+		return (1);
+	}
+
+	/* pass off to get_vdev_spec for processing */
+	nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv);
+	if (nvroot == NULL) {
+		zpool_close(zhp);
+		return (1);
+	}
+
+	if (dryrun) {
+		nvlist_t *poolnvroot;
+
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &poolnvroot) == 0);
+
+		(void) printf(gettext("would update '%s' to the following "
+		    "configuration:\n"), zpool_get_name(zhp));
+
+		print_vdev_tree(zhp, poolname, poolnvroot, 0);
+		print_vdev_tree(zhp, NULL, nvroot, 0);
+
+		ret = 0;
+	} else {
+		ret = (zpool_add(zhp, nvroot) != 0);
+		if (!ret) {
+			zpool_log_history(g_zfs, argc + 1 + optind,
+			    argv - 1 - optind, poolname, B_TRUE, B_FALSE);
+		}
+	}
+
+	nvlist_free(nvroot);
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool remove <pool> <vdev>
+ *
+ * Removes the given vdev from the pool.  Currently, this only supports removing
+ * spares from the pool.  Eventually, we'll want to support removing leaf vdevs
+ * (as an alias for 'detach') as well as toplevel vdevs.
+ */
+int
+zpool_do_remove(int argc, char **argv)
+{
+	char *poolname;
+	int ret;
+	zpool_handle_t *zhp;
+
+	argc--;
+	argv++;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing device\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
+	if (!ret) {
+		zpool_log_history(g_zfs, ++argc, --argv, poolname, B_TRUE,
+		    B_FALSE);
+	}
+
+	return (ret);
+}
+
+/*
+ * zpool create [-fn] [-R root] [-m mountpoint] <pool> <dev> ...
+ *
+ *	-f	Force creation, even if devices appear in use
+ *	-n	Do not create the pool, but display the resulting layout if it
+ *		were to be created.
+ *      -R	Create a pool under an alternate root
+ *      -m	Set default mountpoint for the root dataset.  By default it's
+ *      	'/<pool>'
+ *
+ * Creates the named pool according to the given vdev specification.  The
+ * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c.  Once
+ * we get the nvlist back from get_vdev_spec(), we either print out the contents
+ * (if '-n' was specified), or pass it to libzfs to do the creation.
+ */
+int
+zpool_do_create(int argc, char **argv)
+{
+	boolean_t force = B_FALSE;
+	boolean_t dryrun = B_FALSE;
+	int c;
+	nvlist_t *nvroot;
+	char *poolname;
+	int ret;
+	char *altroot = NULL;
+	char *mountpoint = NULL;
+	nvlist_t **child;
+	uint_t children;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":fnR:m:")) != -1) {
+		switch (c) {
+		case 'f':
+			force = B_TRUE;
+			break;
+		case 'n':
+			dryrun = B_TRUE;
+			break;
+		case 'R':
+			altroot = optarg;
+			break;
+		case 'm':
+			mountpoint = optarg;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing vdev specification\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	/*
+	 * As a special case, check for use of '/' in the name, and direct the
+	 * user to use 'zfs create' instead.
+	 */
+	if (strchr(poolname, '/') != NULL) {
+		(void) fprintf(stderr, gettext("cannot create '%s': invalid "
+		    "character '/' in pool name\n"), poolname);
+		(void) fprintf(stderr, gettext("use 'zfs create' to "
+		    "create a dataset\n"));
+		return (1);
+	}
+
+	/* pass off to get_vdev_spec for bulk processing */
+	nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1,
+	    argv + 1);
+	if (nvroot == NULL)
+		return (1);
+
+	/* make_root_vdev() allows 0 toplevel children if there are spares */
+	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0);
+	if (children == 0) {
+		(void) fprintf(stderr, gettext("invalid vdev "
+		    "specification: at least one toplevel vdev must be "
+		    "specified\n"));
+		return (1);
+	}
+
+
+	if (altroot != NULL && altroot[0] != '/') {
+		(void) fprintf(stderr, gettext("invalid alternate root '%s': "
+		    "must be an absolute path\n"), altroot);
+		nvlist_free(nvroot);
+		return (1);
+	}
+
+	/*
+	 * Check the validity of the mountpoint and direct the user to use the
+	 * '-m' mountpoint option if it looks like its in use.
+	 */
+	if (mountpoint == NULL ||
+	    (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
+	    strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
+		char buf[MAXPATHLEN];
+		struct stat64 statbuf;
+
+		if (mountpoint && mountpoint[0] != '/') {
+			(void) fprintf(stderr, gettext("invalid mountpoint "
+			    "'%s': must be an absolute path, 'legacy', or "
+			    "'none'\n"), mountpoint);
+			nvlist_free(nvroot);
+			return (1);
+		}
+
+		if (mountpoint == NULL) {
+			if (altroot != NULL)
+				(void) snprintf(buf, sizeof (buf), "%s/%s",
+				    altroot, poolname);
+			else
+				(void) snprintf(buf, sizeof (buf), "/%s",
+				    poolname);
+		} else {
+			if (altroot != NULL)
+				(void) snprintf(buf, sizeof (buf), "%s%s",
+				    altroot, mountpoint);
+			else
+				(void) snprintf(buf, sizeof (buf), "%s",
+				    mountpoint);
+		}
+
+		if (stat64(buf, &statbuf) == 0 &&
+		    statbuf.st_nlink != 2) {
+			if (mountpoint == NULL)
+				(void) fprintf(stderr, gettext("default "
+				    "mountpoint '%s' exists and is not "
+				    "empty\n"), buf);
+			else
+				(void) fprintf(stderr, gettext("mountpoint "
+				    "'%s' exists and is not empty\n"), buf);
+			(void) fprintf(stderr, gettext("use '-m' "
+			    "option to provide a different default\n"));
+			nvlist_free(nvroot);
+			return (1);
+		}
+	}
+
+
+	if (dryrun) {
+		/*
+		 * For a dry run invocation, print out a basic message and run
+		 * through all the vdevs in the list and print out in an
+		 * appropriate hierarchy.
+		 */
+		(void) printf(gettext("would create '%s' with the "
+		    "following layout:\n\n"), poolname);
+
+		print_vdev_tree(NULL, poolname, nvroot, 0);
+
+		ret = 0;
+	} else {
+		ret = 1;
+		/*
+		 * Hand off to libzfs.
+		 */
+		if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) {
+			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
+			    ZFS_TYPE_FILESYSTEM);
+			if (pool != NULL) {
+				if (mountpoint != NULL)
+					verify(zfs_prop_set(pool,
+					    zfs_prop_to_name(
+					    ZFS_PROP_MOUNTPOINT),
+					    mountpoint) == 0);
+				if (zfs_mount(pool, NULL, 0) == 0)
+					ret = zfs_share_nfs(pool);
+				zfs_close(pool);
+			}
+			zpool_log_history(g_zfs, argc + optind, argv - optind,
+			    poolname, B_TRUE, B_TRUE);
+		} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
+			(void) fprintf(stderr, gettext("pool name may have "
+			    "been omitted\n"));
+		}
+	}
+
+	nvlist_free(nvroot);
+
+	return (ret);
+}
+
+/*
+ * zpool destroy <pool>
+ *
+ * 	-f	Forcefully unmount any datasets
+ *
+ * Destroy the given pool.  Automatically unmounts any datasets in the pool.
+ */
+int
+zpool_do_destroy(int argc, char **argv)
+{
+	boolean_t force = B_FALSE;
+	int c;
+	char *pool;
+	zpool_handle_t *zhp;
+	int ret;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "f")) != -1) {
+		switch (c) {
+		case 'f':
+			force = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	pool = argv[0];
+
+	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
+		/*
+		 * As a special case, check for use of '/' in the name, and
+		 * direct the user to use 'zfs destroy' instead.
+		 */
+		if (strchr(pool, '/') != NULL)
+			(void) fprintf(stderr, gettext("use 'zfs destroy' to "
+			    "destroy a dataset\n"));
+		return (1);
+	}
+
+	if (zpool_disable_datasets(zhp, force) != 0) {
+		(void) fprintf(stderr, gettext("could not destroy '%s': "
+		    "could not unmount datasets\n"), zpool_get_name(zhp));
+		return (1);
+	}
+
+	zpool_log_history(g_zfs, argc + optind, argv - optind, pool, B_TRUE,
+	    B_FALSE);
+
+	ret = (zpool_destroy(zhp) != 0);
+
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool export [-f] <pool> ...
+ *
+ *	-f	Forcefully unmount datasets
+ *
+ * Export the given pools.  By default, the command will attempt to cleanly
+ * unmount any active datasets within the pool.  If the '-f' flag is specified,
+ * then the datasets will be forcefully unmounted.
+ */
+int
+zpool_do_export(int argc, char **argv)
+{
+	boolean_t force = B_FALSE;
+	int c;
+	zpool_handle_t *zhp;
+	int ret;
+	int i;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "f")) != -1) {
+		switch (c) {
+		case 'f':
+			force = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool argument\n"));
+		usage(B_FALSE);
+	}
+
+	ret = 0;
+	for (i = 0; i < argc; i++) {
+		if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) {
+			ret = 1;
+			continue;
+		}
+
+		if (zpool_disable_datasets(zhp, force) != 0) {
+			ret = 1;
+			zpool_close(zhp);
+			continue;
+		}
+
+		zpool_log_history(g_zfs, argc + optind, argv - optind, argv[i],
+		    B_TRUE, B_FALSE);
+
+		if (zpool_export(zhp) != 0)
+			ret = 1;
+
+		zpool_close(zhp);
+	}
+
+	return (ret);
+}
+
+/*
+ * Given a vdev configuration, determine the maximum width needed for the device
+ * name column.
+ */
+static int
+max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
+{
+	char *name = zpool_vdev_name(g_zfs, zhp, nv);
+	nvlist_t **child;
+	uint_t c, children;
+	int ret;
+
+	if (strlen(name) + depth > max)
+		max = strlen(name) + depth;
+
+	free(name);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if ((ret = max_width(zhp, child[c], depth + 2,
+			    max)) > max)
+				max = ret;
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if ((ret = max_width(zhp, child[c], depth + 2,
+			    max)) > max)
+				max = ret;
+	}
+
+
+	return (max);
+}
+
+
+/*
+ * Print the configuration of an exported pool.  Iterate over all vdevs in the
+ * pool, printing out the name and status for each one.
+ */
+void
+print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	vdev_stat_t *vs;
+	char *type, *vname;
+
+	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_MISSING) == 0)
+		return;
+
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
+	(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
+
+	if (vs->vs_aux != 0) {
+		(void) printf("  %-8s  ", state_to_name(vs));
+
+		switch (vs->vs_aux) {
+		case VDEV_AUX_OPEN_FAILED:
+			(void) printf(gettext("cannot open"));
+			break;
+
+		case VDEV_AUX_BAD_GUID_SUM:
+			(void) printf(gettext("missing device"));
+			break;
+
+		case VDEV_AUX_NO_REPLICAS:
+			(void) printf(gettext("insufficient replicas"));
+			break;
+
+		case VDEV_AUX_VERSION_NEWER:
+			(void) printf(gettext("newer version"));
+			break;
+
+		default:
+			(void) printf(gettext("corrupted data"));
+			break;
+		}
+	} else {
+		(void) printf("  %s", state_to_name(vs));
+	}
+	(void) printf("\n");
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return;
+
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+		print_import_config(vname, child[c],
+		    namewidth, depth + 2);
+		free(vname);
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) != 0)
+		return;
+
+	(void) printf(gettext("\tspares\n"));
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+		(void) printf("\t  %s\n", vname);
+		free(vname);
+	}
+}
+
+/*
+ * Display the status for the given pool.
+ */
+static void
+show_import(nvlist_t *config)
+{
+	uint64_t pool_state;
+	vdev_stat_t *vs;
+	char *name;
+	uint64_t guid;
+	char *msgid;
+	nvlist_t *nvroot;
+	int reason;
+	const char *health;
+	uint_t vsc;
+	int namewidth;
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &name) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &guid) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &pool_state) == 0);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+	health = state_to_health(vs->vs_state);
+
+	reason = zpool_import_status(config, &msgid);
+
+	(void) printf(gettext("  pool: %s\n"), name);
+	(void) printf(gettext("    id: %llu\n"), (u_longlong_t)guid);
+	(void) printf(gettext(" state: %s"), health);
+	if (pool_state == POOL_STATE_DESTROYED)
+		(void) printf(gettext(" (DESTROYED)"));
+	(void) printf("\n");
+
+	switch (reason) {
+	case ZPOOL_STATUS_MISSING_DEV_R:
+	case ZPOOL_STATUS_MISSING_DEV_NR:
+	case ZPOOL_STATUS_BAD_GUID_SUM:
+		(void) printf(gettext("status: One or more devices are missing "
+		    "from the system.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_LABEL_R:
+	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
+		(void) printf(gettext("status: One or more devices contains "
+		    "corrupted data.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_DATA:
+		(void) printf(gettext("status: The pool data is corrupted.\n"));
+		break;
+
+	case ZPOOL_STATUS_OFFLINE_DEV:
+		(void) printf(gettext("status: One or more devices "
+		    "are offlined.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_POOL:
+		(void) printf(gettext("status: The pool metadata is "
+		    "corrupted.\n"));
+		break;
+
+	case ZPOOL_STATUS_VERSION_OLDER:
+		(void) printf(gettext("status: The pool is formatted using an "
+		    "older on-disk version.\n"));
+		break;
+
+	case ZPOOL_STATUS_VERSION_NEWER:
+		(void) printf(gettext("status: The pool is formatted using an "
+		    "incompatible version.\n"));
+		break;
+
+	case ZPOOL_STATUS_HOSTID_MISMATCH:
+		(void) printf(gettext("status: The pool was last accessed by "
+		    "another system.\n"));
+		break;
+	default:
+		/*
+		 * No other status can be seen when importing pools.
+		 */
+		assert(reason == ZPOOL_STATUS_OK);
+	}
+
+	/*
+	 * Print out an action according to the overall state of the pool.
+	 */
+	if (vs->vs_state == VDEV_STATE_HEALTHY) {
+		if (reason == ZPOOL_STATUS_VERSION_OLDER)
+			(void) printf(gettext("action: The pool can be "
+			    "imported using its name or numeric identifier, "
+			    "though\n\tsome features will not be available "
+			    "without an explicit 'zpool upgrade'.\n"));
+		else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH)
+			(void) printf(gettext("action: The pool can be "
+			    "imported using its name or numeric "
+			    "identifier and\n\tthe '-f' flag.\n"));
+		else
+			(void) printf(gettext("action: The pool can be "
+			    "imported using its name or numeric "
+			    "identifier.\n"));
+	} else if (vs->vs_state == VDEV_STATE_DEGRADED) {
+		(void) printf(gettext("action: The pool can be imported "
+		    "despite missing or damaged devices.  The\n\tfault "
+		    "tolerance of the pool may be compromised if imported.\n"));
+	} else {
+		switch (reason) {
+		case ZPOOL_STATUS_VERSION_NEWER:
+			(void) printf(gettext("action: The pool cannot be "
+			    "imported.  Access the pool on a system running "
+			    "newer\n\tsoftware, or recreate the pool from "
+			    "backup.\n"));
+			break;
+		case ZPOOL_STATUS_MISSING_DEV_R:
+		case ZPOOL_STATUS_MISSING_DEV_NR:
+		case ZPOOL_STATUS_BAD_GUID_SUM:
+			(void) printf(gettext("action: The pool cannot be "
+			    "imported. Attach the missing\n\tdevices and try "
+			    "again.\n"));
+			break;
+		default:
+			(void) printf(gettext("action: The pool cannot be "
+			    "imported due to damaged devices or data.\n"));
+		}
+	}
+
+	/*
+	 * If the state is "closed" or "can't open", and the aux state
+	 * is "corrupt data":
+	 */
+	if (((vs->vs_state == VDEV_STATE_CLOSED) ||
+	    (vs->vs_state == VDEV_STATE_CANT_OPEN)) &&
+	    (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) {
+		if (pool_state == POOL_STATE_DESTROYED)
+			(void) printf(gettext("\tThe pool was destroyed, "
+			    "but can be imported using the '-Df' flags.\n"));
+		else if (pool_state != POOL_STATE_EXPORTED)
+			(void) printf(gettext("\tThe pool may be active on "
+			    "on another system, but can be imported using\n\t"
+			    "the '-f' flag.\n"));
+	}
+
+	if (msgid != NULL)
+		(void) printf(gettext("   see: http://www.sun.com/msg/%s\n"),
+		    msgid);
+
+	(void) printf(gettext("config:\n\n"));
+
+	namewidth = max_width(NULL, nvroot, 0, 0);
+	if (namewidth < 10)
+		namewidth = 10;
+	print_import_config(name, nvroot, namewidth, 0);
+
+	if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
+		(void) printf(gettext("\n\tAdditional devices are known to "
+		    "be part of this pool, though their\n\texact "
+		    "configuration cannot be determined.\n"));
+	}
+}
+
+/*
+ * Perform the import for the given configuration.  This passes the heavy
+ * lifting off to zpool_import(), and then mounts the datasets contained within
+ * the pool.
+ */
+static int
+do_import(nvlist_t *config, const char *newname, const char *mntopts,
+    const char *altroot, int force, int argc, char **argv)
+{
+	zpool_handle_t *zhp;
+	char *name;
+	uint64_t state;
+	uint64_t version;
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &name) == 0);
+
+	verify(nvlist_lookup_uint64(config,
+	    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
+	verify(nvlist_lookup_uint64(config,
+	    ZPOOL_CONFIG_VERSION, &version) == 0);
+	if (version > ZFS_VERSION) {
+		(void) fprintf(stderr, gettext("cannot import '%s': pool "
+		    "is formatted using a newer ZFS version\n"), name);
+		return (1);
+	} else if (state != POOL_STATE_EXPORTED && !force) {
+		uint64_t hostid;
+
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
+		    &hostid) == 0) {
+			if ((unsigned long)hostid != gethostid()) {
+				char *hostname;
+				uint64_t timestamp;
+				time_t t;
+
+				verify(nvlist_lookup_string(config,
+				    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
+				verify(nvlist_lookup_uint64(config,
+				    ZPOOL_CONFIG_TIMESTAMP, &timestamp) == 0);
+				t = timestamp;
+				(void) fprintf(stderr, gettext("cannot import "
+				    "'%s': pool may be in use from other "
+				    "system, it was last accessed by %s "
+				    "(hostid: 0x%lx) on %s"), name, hostname,
+				    (unsigned long)hostid,
+				    asctime(localtime(&t)));
+				(void) fprintf(stderr, gettext("use '-f' to "
+				    "import anyway\n"));
+				return (1);
+			}
+		} else {
+			(void) fprintf(stderr, gettext("cannot import '%s': "
+			    "pool may be in use from other system\n"), name);
+			(void) fprintf(stderr, gettext("use '-f' to import "
+			    "anyway\n"));
+			return (1);
+		}
+	}
+
+	if (zpool_import(g_zfs, config, newname, altroot) != 0)
+		return (1);
+
+	if (newname != NULL)
+		name = (char *)newname;
+
+	zpool_log_history(g_zfs, argc, argv, name, B_TRUE, B_FALSE);
+
+	verify((zhp = zpool_open(g_zfs, name)) != NULL);
+
+	if (zpool_enable_datasets(zhp, mntopts, 0) != 0) {
+		zpool_close(zhp);
+		return (1);
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * zpool import [-d dir] [-D]
+ *       import [-R root] [-D] [-d dir] [-f] -a
+ *       import [-R root] [-D] [-d dir] [-f] <pool | id> [newpool]
+ *
+ *       -d	Scan in a specific directory, other than /dev/dsk.  More than
+ *		one directory can be specified using multiple '-d' options.
+ *
+ *       -D     Scan for previously destroyed pools or import all or only
+ *              specified destroyed pools.
+ *
+ *       -R	Temporarily import the pool, with all mountpoints relative to
+ *		the given root.  The pool will remain exported when the machine
+ *		is rebooted.
+ *
+ *       -f	Force import, even if it appears that the pool is active.
+ *
+ *       -a	Import all pools found.
+ *
+ * The import command scans for pools to import, and import pools based on pool
+ * name and GUID.  The pool can also be renamed as part of the import process.
+ */
+int
+zpool_do_import(int argc, char **argv)
+{
+	char **searchdirs = NULL;
+	int nsearch = 0;
+	int c;
+	int err;
+	nvlist_t *pools;
+	boolean_t do_all = B_FALSE;
+	boolean_t do_destroyed = B_FALSE;
+	char *altroot = NULL;
+	char *mntopts = NULL;
+	boolean_t do_force = B_FALSE;
+	nvpair_t *elem;
+	nvlist_t *config;
+	uint64_t searchguid;
+	char *searchname;
+	nvlist_t *found_config;
+	boolean_t first;
+	uint64_t pool_state;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) {
+		switch (c) {
+		case 'a':
+			do_all = B_TRUE;
+			break;
+		case 'd':
+			if (searchdirs == NULL) {
+				searchdirs = safe_malloc(sizeof (char *));
+			} else {
+				char **tmp = safe_malloc((nsearch + 1) *
+				    sizeof (char *));
+				bcopy(searchdirs, tmp, nsearch *
+				    sizeof (char *));
+				free(searchdirs);
+				searchdirs = tmp;
+			}
+			searchdirs[nsearch++] = optarg;
+			break;
+		case 'D':
+			do_destroyed = B_TRUE;
+			break;
+		case 'f':
+			do_force = B_TRUE;
+			break;
+		case 'o':
+			mntopts = optarg;
+			break;
+		case 'R':
+			altroot = optarg;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (searchdirs == NULL) {
+		searchdirs = safe_malloc(sizeof (char *));
+		searchdirs[0] = "/dev";
+		nsearch = 1;
+	}
+
+	/* check argument count */
+	if (do_all) {
+		if (argc != 0) {
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+	} else {
+		if (argc > 2) {
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+
+		/*
+		 * Check for the SYS_CONFIG privilege.  We do this explicitly
+		 * here because otherwise any attempt to discover pools will
+		 * silently fail.
+		 */
+		if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
+			(void) fprintf(stderr, gettext("cannot "
+			    "discover pools: permission denied\n"));
+			free(searchdirs);
+			return (1);
+		}
+	}
+
+	if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) {
+		free(searchdirs);
+		return (1);
+	}
+
+	/*
+	 * We now have a list of all available pools in the given directories.
+	 * Depending on the arguments given, we do one of the following:
+	 *
+	 *	<none>	Iterate through all pools and display information about
+	 *		each one.
+	 *
+	 *	-a	Iterate through all pools and try to import each one.
+	 *
+	 *	<id>	Find the pool that corresponds to the given GUID/pool
+	 *		name and import that one.
+	 *
+	 *	-D	Above options applies only to destroyed pools.
+	 */
+	if (argc != 0) {
+		char *endptr;
+
+		errno = 0;
+		searchguid = strtoull(argv[0], &endptr, 10);
+		if (errno != 0 || *endptr != '\0')
+			searchname = argv[0];
+		else
+			searchname = NULL;
+		found_config = NULL;
+	}
+
+	err = 0;
+	elem = NULL;
+	first = B_TRUE;
+	while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+
+		verify(nvpair_value_nvlist(elem, &config) == 0);
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+		    &pool_state) == 0);
+		if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
+			continue;
+		if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
+			continue;
+
+		if (argc == 0) {
+			if (first)
+				first = B_FALSE;
+			else if (!do_all)
+				(void) printf("\n");
+
+			if (do_all)
+				err |= do_import(config, NULL, mntopts,
+				    altroot, do_force, argc + optind,
+				    argv - optind);
+			else
+				show_import(config);
+		} else if (searchname != NULL) {
+			char *name;
+
+			/*
+			 * We are searching for a pool based on name.
+			 */
+			verify(nvlist_lookup_string(config,
+			    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
+
+			if (strcmp(name, searchname) == 0) {
+				if (found_config != NULL) {
+					(void) fprintf(stderr, gettext(
+					    "cannot import '%s': more than "
+					    "one matching pool\n"), searchname);
+					(void) fprintf(stderr, gettext(
+					    "import by numeric ID instead\n"));
+					err = B_TRUE;
+				}
+				found_config = config;
+			}
+		} else {
+			uint64_t guid;
+
+			/*
+			 * Search for a pool by guid.
+			 */
+			verify(nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
+
+			if (guid == searchguid)
+				found_config = config;
+		}
+	}
+
+	/*
+	 * If we were searching for a specific pool, verify that we found a
+	 * pool, and then do the import.
+	 */
+	if (argc != 0 && err == 0) {
+		if (found_config == NULL) {
+			(void) fprintf(stderr, gettext("cannot import '%s': "
+			    "no such pool available\n"), argv[0]);
+			err = B_TRUE;
+		} else {
+			err |= do_import(found_config, argc == 1 ? NULL :
+			    argv[1], mntopts, altroot, do_force, argc + optind,
+			    argv - optind);
+		}
+	}
+
+	/*
+	 * If we were just looking for pools, report an error if none were
+	 * found.
+	 */
+	if (argc == 0 && first)
+		(void) fprintf(stderr,
+		    gettext("no pools available to import\n"));
+
+	nvlist_free(pools);
+	free(searchdirs);
+
+	return (err ? 1 : 0);
+}
+
+typedef struct iostat_cbdata {
+	zpool_list_t *cb_list;
+	int cb_verbose;
+	int cb_iteration;
+	int cb_namewidth;
+} iostat_cbdata_t;
+
+static void
+print_iostat_separator(iostat_cbdata_t *cb)
+{
+	int i = 0;
+
+	for (i = 0; i < cb->cb_namewidth; i++)
+		(void) printf("-");
+	(void) printf("  -----  -----  -----  -----  -----  -----\n");
+}
+
+static void
+print_iostat_header(iostat_cbdata_t *cb)
+{
+	(void) printf("%*s     capacity     operations    bandwidth\n",
+	    cb->cb_namewidth, "");
+	(void) printf("%-*s   used  avail   read  write   read  write\n",
+	    cb->cb_namewidth, "pool");
+	print_iostat_separator(cb);
+}
+
+/*
+ * Display a single statistic.
+ */
+void
+print_one_stat(uint64_t value)
+{
+	char buf[64];
+
+	zfs_nicenum(value, buf, sizeof (buf));
+	(void) printf("  %5s", buf);
+}
+
+/*
+ * Print out all the statistics for the given vdev.  This can either be the
+ * toplevel configuration, or called recursively.  If 'name' is NULL, then this
+ * is a verbose output, and we don't want to display the toplevel pool stats.
+ */
+void
+print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
+    nvlist_t *newnv, iostat_cbdata_t *cb, int depth)
+{
+	nvlist_t **oldchild, **newchild;
+	uint_t c, children;
+	vdev_stat_t *oldvs, *newvs;
+	vdev_stat_t zerovs = { 0 };
+	uint64_t tdelta;
+	double scale;
+	char *vname;
+
+	if (oldnv != NULL) {
+		verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&oldvs, &c) == 0);
+	} else {
+		oldvs = &zerovs;
+	}
+
+	verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&newvs, &c) == 0);
+
+	if (strlen(name) + depth > cb->cb_namewidth)
+		(void) printf("%*s%s", depth, "", name);
+	else
+		(void) printf("%*s%s%*s", depth, "", name,
+		    (int)(cb->cb_namewidth - strlen(name) - depth), "");
+
+	tdelta = newvs->vs_timestamp - oldvs->vs_timestamp;
+
+	if (tdelta == 0)
+		scale = 1.0;
+	else
+		scale = (double)NANOSEC / tdelta;
+
+	/* only toplevel vdevs have capacity stats */
+	if (newvs->vs_space == 0) {
+		(void) printf("      -      -");
+	} else {
+		print_one_stat(newvs->vs_alloc);
+		print_one_stat(newvs->vs_space - newvs->vs_alloc);
+	}
+
+	print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] -
+	    oldvs->vs_ops[ZIO_TYPE_READ])));
+
+	print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] -
+	    oldvs->vs_ops[ZIO_TYPE_WRITE])));
+
+	print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] -
+	    oldvs->vs_bytes[ZIO_TYPE_READ])));
+
+	print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] -
+	    oldvs->vs_bytes[ZIO_TYPE_WRITE])));
+
+	(void) printf("\n");
+
+	if (!cb->cb_verbose)
+		return;
+
+	if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN,
+	    &newchild, &children) != 0)
+		return;
+
+	if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN,
+	    &oldchild, &c) != 0)
+		return;
+
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+		print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
+		    newchild[c], cb, depth + 2);
+		free(vname);
+	}
+}
+
+static int
+refresh_iostat(zpool_handle_t *zhp, void *data)
+{
+	iostat_cbdata_t *cb = data;
+	boolean_t missing;
+
+	/*
+	 * If the pool has disappeared, remove it from the list and continue.
+	 */
+	if (zpool_refresh_stats(zhp, &missing) != 0)
+		return (-1);
+
+	if (missing)
+		pool_list_remove(cb->cb_list, zhp);
+
+	return (0);
+}
+
+/*
+ * Callback to print out the iostats for the given pool.
+ */
+int
+print_iostat(zpool_handle_t *zhp, void *data)
+{
+	iostat_cbdata_t *cb = data;
+	nvlist_t *oldconfig, *newconfig;
+	nvlist_t *oldnvroot, *newnvroot;
+
+	newconfig = zpool_get_config(zhp, &oldconfig);
+
+	if (cb->cb_iteration == 1)
+		oldconfig = NULL;
+
+	verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE,
+	    &newnvroot) == 0);
+
+	if (oldconfig == NULL)
+		oldnvroot = NULL;
+	else
+		verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE,
+		    &oldnvroot) == 0);
+
+	/*
+	 * Print out the statistics for the pool.
+	 */
+	print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0);
+
+	if (cb->cb_verbose)
+		print_iostat_separator(cb);
+
+	return (0);
+}
+
+int
+get_namewidth(zpool_handle_t *zhp, void *data)
+{
+	iostat_cbdata_t *cb = data;
+	nvlist_t *config, *nvroot;
+
+	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (!cb->cb_verbose)
+			cb->cb_namewidth = strlen(zpool_get_name(zhp));
+		else
+			cb->cb_namewidth = max_width(zhp, nvroot, 0, 0);
+	}
+
+	/*
+	 * The width must fall into the range [10,38].  The upper limit is the
+	 * maximum we can have and still fit in 80 columns.
+	 */
+	if (cb->cb_namewidth < 10)
+		cb->cb_namewidth = 10;
+	if (cb->cb_namewidth > 38)
+		cb->cb_namewidth = 38;
+
+	return (0);
+}
+
+/*
+ * zpool iostat [-v] [pool] ... [interval [count]]
+ *
+ *	-v	Display statistics for individual vdevs
+ *
+ * This command can be tricky because we want to be able to deal with pool
+ * creation/destruction as well as vdev configuration changes.  The bulk of this
+ * processing is handled by the pool_list_* routines in zpool_iter.c.  We rely
+ * on pool_list_update() to detect the addition of new pools.  Configuration
+ * changes are all handled within libzfs.
+ */
+int
+zpool_do_iostat(int argc, char **argv)
+{
+	int c;
+	int ret;
+	int npools;
+	unsigned long interval = 0, count = 0;
+	zpool_list_t *list;
+	boolean_t verbose = B_FALSE;
+	iostat_cbdata_t cb;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "v")) != -1) {
+		switch (c) {
+		case 'v':
+			verbose = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/*
+	 * Determine if the last argument is an integer or a pool name
+	 */
+	if (argc > 0 && isdigit(argv[argc - 1][0])) {
+		char *end;
+
+		errno = 0;
+		interval = strtoul(argv[argc - 1], &end, 10);
+
+		if (*end == '\0' && errno == 0) {
+			if (interval == 0) {
+				(void) fprintf(stderr, gettext("interval "
+				    "cannot be zero\n"));
+				usage(B_FALSE);
+			}
+
+			/*
+			 * Ignore the last parameter
+			 */
+			argc--;
+		} else {
+			/*
+			 * If this is not a valid number, just plow on.  The
+			 * user will get a more informative error message later
+			 * on.
+			 */
+			interval = 0;
+		}
+	}
+
+	/*
+	 * If the last argument is also an integer, then we have both a count
+	 * and an integer.
+	 */
+	if (argc > 0 && isdigit(argv[argc - 1][0])) {
+		char *end;
+
+		errno = 0;
+		count = interval;
+		interval = strtoul(argv[argc - 1], &end, 10);
+
+		if (*end == '\0' && errno == 0) {
+			if (interval == 0) {
+				(void) fprintf(stderr, gettext("interval "
+				    "cannot be zero\n"));
+				usage(B_FALSE);
+			}
+
+			/*
+			 * Ignore the last parameter
+			 */
+			argc--;
+		} else {
+			interval = 0;
+		}
+	}
+
+	/*
+	 * Construct the list of all interesting pools.
+	 */
+	ret = 0;
+	if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL)
+		return (1);
+
+	if (pool_list_count(list) == 0 && argc != 0) {
+		pool_list_free(list);
+		return (1);
+	}
+
+	if (pool_list_count(list) == 0 && interval == 0) {
+		pool_list_free(list);
+		(void) fprintf(stderr, gettext("no pools available\n"));
+		return (1);
+	}
+
+	/*
+	 * Enter the main iostat loop.
+	 */
+	cb.cb_list = list;
+	cb.cb_verbose = verbose;
+	cb.cb_iteration = 0;
+	cb.cb_namewidth = 0;
+
+	for (;;) {
+		pool_list_update(list);
+
+		if ((npools = pool_list_count(list)) == 0)
+			break;
+
+		/*
+		 * Refresh all statistics.  This is done as an explicit step
+		 * before calculating the maximum name width, so that any
+		 * configuration changes are properly accounted for.
+		 */
+		(void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb);
+
+		/*
+		 * Iterate over all pools to determine the maximum width
+		 * for the pool / device name column across all pools.
+		 */
+		cb.cb_namewidth = 0;
+		(void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
+
+		/*
+		 * If it's the first time, or verbose mode, print the header.
+		 */
+		if (++cb.cb_iteration == 1 || verbose)
+			print_iostat_header(&cb);
+
+		(void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
+
+		/*
+		 * If there's more than one pool, and we're not in verbose mode
+		 * (which prints a separator for us), then print a separator.
+		 */
+		if (npools > 1 && !verbose)
+			print_iostat_separator(&cb);
+
+		if (verbose)
+			(void) printf("\n");
+
+		/*
+		 * Flush the output so that redirection to a file isn't buffered
+		 * indefinitely.
+		 */
+		(void) fflush(stdout);
+
+		if (interval == 0)
+			break;
+
+		if (count != 0 && --count == 0)
+			break;
+
+		(void) sleep(interval);
+	}
+
+	pool_list_free(list);
+
+	return (ret);
+}
+
+typedef struct list_cbdata {
+	boolean_t	cb_scripted;
+	boolean_t	cb_first;
+	int		cb_fields[MAX_FIELDS];
+	int		cb_fieldcount;
+} list_cbdata_t;
+
+/*
+ * Given a list of columns to display, output appropriate headers for each one.
+ */
+void
+print_header(int *fields, size_t count)
+{
+	int i;
+	column_def_t *col;
+	const char *fmt;
+
+	for (i = 0; i < count; i++) {
+		col = &column_table[fields[i]];
+		if (i != 0)
+			(void) printf("  ");
+		if (col->cd_justify == left_justify)
+			fmt = "%-*s";
+		else
+			fmt = "%*s";
+
+		(void) printf(fmt, i == count - 1 ? strlen(col->cd_title) :
+		    col->cd_width, col->cd_title);
+	}
+
+	(void) printf("\n");
+}
+
+int
+list_callback(zpool_handle_t *zhp, void *data)
+{
+	list_cbdata_t *cbp = data;
+	nvlist_t *config;
+	int i;
+	char buf[ZPOOL_MAXNAMELEN];
+	uint64_t total;
+	uint64_t used;
+	const char *fmt;
+	column_def_t *col;
+
+	if (cbp->cb_first) {
+		if (!cbp->cb_scripted)
+			print_header(cbp->cb_fields, cbp->cb_fieldcount);
+		cbp->cb_first = B_FALSE;
+	}
+
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		config = NULL;
+	} else {
+		config = zpool_get_config(zhp, NULL);
+		total = zpool_get_space_total(zhp);
+		used = zpool_get_space_used(zhp);
+	}
+
+	for (i = 0; i < cbp->cb_fieldcount; i++) {
+		if (i != 0) {
+			if (cbp->cb_scripted)
+				(void) printf("\t");
+			else
+				(void) printf("  ");
+		}
+
+		col = &column_table[cbp->cb_fields[i]];
+
+		switch (cbp->cb_fields[i]) {
+		case ZPOOL_FIELD_NAME:
+			(void) strlcpy(buf, zpool_get_name(zhp), sizeof (buf));
+			break;
+
+		case ZPOOL_FIELD_SIZE:
+			if (config == NULL)
+				(void) strlcpy(buf, "-", sizeof (buf));
+			else
+				zfs_nicenum(total, buf, sizeof (buf));
+			break;
+
+		case ZPOOL_FIELD_USED:
+			if (config == NULL)
+				(void) strlcpy(buf, "-", sizeof (buf));
+			else
+				zfs_nicenum(used, buf, sizeof (buf));
+			break;
+
+		case ZPOOL_FIELD_AVAILABLE:
+			if (config == NULL)
+				(void) strlcpy(buf, "-", sizeof (buf));
+			else
+				zfs_nicenum(total - used, buf, sizeof (buf));
+			break;
+
+		case ZPOOL_FIELD_CAPACITY:
+			if (config == NULL) {
+				(void) strlcpy(buf, "-", sizeof (buf));
+			} else {
+				uint64_t capacity = (total == 0 ? 0 :
+				    (used * 100 / total));
+				(void) snprintf(buf, sizeof (buf), "%llu%%",
+				    (u_longlong_t)capacity);
+			}
+			break;
+
+		case ZPOOL_FIELD_HEALTH:
+			if (config == NULL) {
+				(void) strlcpy(buf, "FAULTED", sizeof (buf));
+			} else {
+				nvlist_t *nvroot;
+				vdev_stat_t *vs;
+				uint_t vsc;
+
+				verify(nvlist_lookup_nvlist(config,
+				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+				verify(nvlist_lookup_uint64_array(nvroot,
+				    ZPOOL_CONFIG_STATS, (uint64_t **)&vs,
+				    &vsc) == 0);
+				(void) strlcpy(buf, state_to_name(vs),
+				    sizeof (buf));
+			}
+			break;
+
+		case ZPOOL_FIELD_ROOT:
+			if (config == NULL)
+				(void) strlcpy(buf, "-", sizeof (buf));
+			else if (zpool_get_root(zhp, buf, sizeof (buf)) != 0)
+				(void) strlcpy(buf, "-", sizeof (buf));
+			break;
+		}
+
+		if (cbp->cb_scripted)
+			(void) printf("%s", buf);
+		else {
+			if (col->cd_justify == left_justify)
+				fmt = "%-*s";
+			else
+				fmt = "%*s";
+
+			(void) printf(fmt, i == cbp->cb_fieldcount - 1 ?
+			    strlen(buf) : col->cd_width, buf);
+		}
+	}
+
+	(void) printf("\n");
+
+	return (0);
+}
+
+/*
+ * zpool list [-H] [-o field[,field]*] [pool] ...
+ *
+ *	-H	Scripted mode.  Don't display headers, and separate fields by
+ *		a single tab.
+ *	-o	List of fields to display.  Defaults to all fields, or
+ *		"name,size,used,available,capacity,health,root"
+ *
+ * List all pools in the system, whether or not they're healthy.  Output space
+ * statistics for each one, as well as health status summary.
+ */
+int
+zpool_do_list(int argc, char **argv)
+{
+	int c;
+	int ret;
+	list_cbdata_t cb = { 0 };
+	static char default_fields[] =
+	    "name,size,used,available,capacity,health,root";
+	char *fields = default_fields;
+	char *value;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":Ho:")) != -1) {
+		switch (c) {
+		case 'H':
+			cb.cb_scripted = B_TRUE;
+			break;
+		case 'o':
+			fields = optarg;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	while (*fields != '\0') {
+		if (cb.cb_fieldcount == MAX_FIELDS) {
+			(void) fprintf(stderr, gettext("too many "
+			    "properties given to -o option\n"));
+			usage(B_FALSE);
+		}
+
+		if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields,
+		    column_subopts, &value)) == -1) {
+			(void) fprintf(stderr, gettext("invalid property "
+			    "'%s'\n"), value);
+			usage(B_FALSE);
+		}
+
+		cb.cb_fieldcount++;
+	}
+
+
+	cb.cb_first = B_TRUE;
+
+	ret = for_each_pool(argc, argv, B_TRUE, NULL, list_callback, &cb);
+
+	if (argc == 0 && cb.cb_first) {
+		(void) printf(gettext("no pools available\n"));
+		return (0);
+	}
+
+	return (ret);
+}
+
+static nvlist_t *
+zpool_get_vdev_by_name(nvlist_t *nv, char *name)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	nvlist_t *match;
+	char *path;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0) {
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+		if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
+			name += sizeof(_PATH_DEV)-1;
+		if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
+			path += sizeof(_PATH_DEV)-1;
+		if (strcmp(name, path) == 0)
+			return (nv);
+		return (NULL);
+	}
+
+	for (c = 0; c < children; c++)
+		if ((match = zpool_get_vdev_by_name(child[c], name)) != NULL)
+			return (match);
+
+	return (NULL);
+}
+
+static int
+zpool_do_attach_or_replace(int argc, char **argv, int replacing)
+{
+	boolean_t force = B_FALSE;
+	int c;
+	nvlist_t *nvroot;
+	char *poolname, *old_disk, *new_disk;
+	zpool_handle_t *zhp;
+	nvlist_t *config;
+	int ret;
+	int log_argc;
+	char **log_argv;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "f")) != -1) {
+		switch (c) {
+		case 'f':
+			force = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	log_argc = argc;
+	log_argv = argv;
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	if (argc < 2) {
+		(void) fprintf(stderr,
+		    gettext("missing <device> specification\n"));
+		usage(B_FALSE);
+	}
+
+	old_disk = argv[1];
+
+	if (argc < 3) {
+		if (!replacing) {
+			(void) fprintf(stderr,
+			    gettext("missing <new_device> specification\n"));
+			usage(B_FALSE);
+		}
+		new_disk = old_disk;
+		argc -= 1;
+		argv += 1;
+	} else {
+		new_disk = argv[2];
+		argc -= 2;
+		argv += 2;
+	}
+
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+		(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
+		    poolname);
+		zpool_close(zhp);
+		return (1);
+	}
+
+	nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv);
+	if (nvroot == NULL) {
+		zpool_close(zhp);
+		return (1);
+	}
+
+	ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
+
+	if (!ret) {
+		zpool_log_history(g_zfs, log_argc, log_argv, poolname, B_TRUE,
+		    B_FALSE);
+	}
+
+	nvlist_free(nvroot);
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool replace [-f] <pool> <device> <new_device>
+ *
+ *	-f	Force attach, even if <new_device> appears to be in use.
+ *
+ * Replace <device> with <new_device>.
+ */
+/* ARGSUSED */
+int
+zpool_do_replace(int argc, char **argv)
+{
+	return (zpool_do_attach_or_replace(argc, argv, B_TRUE));
+}
+
+/*
+ * zpool attach [-f] <pool> <device> <new_device>
+ *
+ *	-f	Force attach, even if <new_device> appears to be in use.
+ *
+ * Attach <new_device> to the mirror containing <device>.  If <device> is not
+ * part of a mirror, then <device> will be transformed into a mirror of
+ * <device> and <new_device>.  In either case, <new_device> will begin life
+ * with a DTL of [0, now], and will immediately begin to resilver itself.
+ */
+int
+zpool_do_attach(int argc, char **argv)
+{
+	return (zpool_do_attach_or_replace(argc, argv, B_FALSE));
+}
+
+/*
+ * zpool detach [-f] <pool> <device>
+ *
+ *	-f	Force detach of <device>, even if DTLs argue against it
+ *		(not supported yet)
+ *
+ * Detach a device from a mirror.  The operation will be refused if <device>
+ * is the last device in the mirror, or if the DTLs indicate that this device
+ * has the only valid copy of some data.
+ */
+/* ARGSUSED */
+int
+zpool_do_detach(int argc, char **argv)
+{
+	int c;
+	char *poolname, *path;
+	zpool_handle_t *zhp;
+	int ret;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "f")) != -1) {
+		switch (c) {
+		case 'f':
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+
+	if (argc < 2) {
+		(void) fprintf(stderr,
+		    gettext("missing <device> specification\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+	path = argv[1];
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	ret = zpool_vdev_detach(zhp, path);
+
+	if (!ret) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
+		    B_TRUE, B_FALSE);
+	}
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool online <pool> <device> ...
+ */
+/* ARGSUSED */
+int
+zpool_do_online(int argc, char **argv)
+{
+	int c, i;
+	char *poolname;
+	zpool_handle_t *zhp;
+	int ret = 0;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "t")) != -1) {
+		switch (c) {
+		case 't':
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing device name\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	for (i = 1; i < argc; i++)
+		if (zpool_vdev_online(zhp, argv[i]) == 0)
+			(void) printf(gettext("Bringing device %s online\n"),
+			    argv[i]);
+		else
+			ret = 1;
+
+	if (!ret) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
+		    B_TRUE, B_FALSE);
+	}
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool offline [-ft] <pool> <device> ...
+ *
+ *	-f	Force the device into the offline state, even if doing
+ *		so would appear to compromise pool availability.
+ *		(not supported yet)
+ *
+ *	-t	Only take the device off-line temporarily.  The offline
+ *		state will not be persistent across reboots.
+ */
+/* ARGSUSED */
+int
+zpool_do_offline(int argc, char **argv)
+{
+	int c, i;
+	char *poolname;
+	zpool_handle_t *zhp;
+	int ret = 0;
+	boolean_t istmp = B_FALSE;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "ft")) != -1) {
+		switch (c) {
+		case 't':
+			istmp = B_TRUE;
+			break;
+		case 'f':
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing device name\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	for (i = 1; i < argc; i++)
+		if (zpool_vdev_offline(zhp, argv[i], istmp) == 0)
+			(void) printf(gettext("Bringing device %s offline\n"),
+			    argv[i]);
+		else
+			ret = 1;
+
+	if (!ret) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
+		    B_TRUE, B_FALSE);
+	}
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool clear <pool> [device]
+ *
+ * Clear all errors associated with a pool or a particular device.
+ */
+int
+zpool_do_clear(int argc, char **argv)
+{
+	int ret = 0;
+	zpool_handle_t *zhp;
+	char *pool, *device;
+
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing pool name\n"));
+		usage(B_FALSE);
+	}
+
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	pool = argv[1];
+	device = argc == 3 ? argv[2] : NULL;
+
+	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
+		return (1);
+
+	if (zpool_clear(zhp, device) != 0)
+		ret = 1;
+
+	if (!ret)
+		zpool_log_history(g_zfs, argc, argv, pool, B_TRUE, B_FALSE);
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+typedef struct scrub_cbdata {
+	int	cb_type;
+	int	cb_argc;
+	char	**cb_argv;
+} scrub_cbdata_t;
+
+int
+scrub_callback(zpool_handle_t *zhp, void *data)
+{
+	scrub_cbdata_t *cb = data;
+	int err;
+
+	/*
+	 * Ignore faulted pools.
+	 */
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		(void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
+		    "currently unavailable\n"), zpool_get_name(zhp));
+		return (1);
+	}
+
+	err = zpool_scrub(zhp, cb->cb_type);
+
+	if (!err) {
+		zpool_log_history(g_zfs, cb->cb_argc, cb->cb_argv,
+		    zpool_get_name(zhp), B_TRUE, B_FALSE);
+	}
+
+	return (err != 0);
+}
+
+/*
+ * zpool scrub [-s] <pool> ...
+ *
+ *	-s	Stop.  Stops any in-progress scrub.
+ */
+int
+zpool_do_scrub(int argc, char **argv)
+{
+	int c;
+	scrub_cbdata_t cb;
+
+	cb.cb_type = POOL_SCRUB_EVERYTHING;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "s")) != -1) {
+		switch (c) {
+		case 's':
+			cb.cb_type = POOL_SCRUB_NONE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	cb.cb_argc = argc;
+	cb.cb_argv = argv;
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+
+	return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
+}
+
+typedef struct status_cbdata {
+	int		cb_count;
+	boolean_t	cb_allpools;
+	boolean_t	cb_verbose;
+	boolean_t	cb_explain;
+	boolean_t	cb_first;
+} status_cbdata_t;
+
+/*
+ * Print out detailed scrub status.
+ */
+void
+print_scrub_status(nvlist_t *nvroot)
+{
+	vdev_stat_t *vs;
+	uint_t vsc;
+	time_t start, end, now;
+	double fraction_done;
+	uint64_t examined, total, minutes_left;
+	char *scrub_type;
+
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+
+	/*
+	 * If there's never been a scrub, there's not much to say.
+	 */
+	if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) {
+		(void) printf(gettext("none requested\n"));
+		return;
+	}
+
+	scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
+	    "resilver" : "scrub";
+
+	start = vs->vs_scrub_start;
+	end = vs->vs_scrub_end;
+	now = time(NULL);
+	examined = vs->vs_scrub_examined;
+	total = vs->vs_alloc;
+
+	if (end != 0) {
+		(void) printf(gettext("%s %s with %llu errors on %s"),
+		    scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
+		    (u_longlong_t)vs->vs_scrub_errors, ctime(&end));
+		return;
+	}
+
+	if (examined == 0)
+		examined = 1;
+	if (examined > total)
+		total = examined;
+
+	fraction_done = (double)examined / total;
+	minutes_left = (uint64_t)((now - start) *
+	    (1 - fraction_done) / fraction_done / 60);
+
+	(void) printf(gettext("%s in progress, %.2f%% done, %lluh%um to go\n"),
+	    scrub_type, 100 * fraction_done,
+	    (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
+}
+
+typedef struct spare_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} spare_cbdata_t;
+
+static boolean_t
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+	uint64_t guid;
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+	    search == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev(child[c], search))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+	spare_cbdata_t *cbp = data;
+	nvlist_t *config, *nvroot;
+
+	config = zpool_get_config(zhp, NULL);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (find_vdev(nvroot, cbp->cb_guid)) {
+		cbp->cb_zhp = zhp;
+		return (1);
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Print out configuration state as requested by status_callback.
+ */
+void
+print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
+    int namewidth, int depth, boolean_t isspare)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	vdev_stat_t *vs;
+	char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+	char *vname;
+	uint64_t notpresent;
+	spare_cbdata_t cb;
+	const char *state;
+
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	state = state_to_name(vs);
+	if (isspare) {
+		/*
+		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
+		 * online drives.
+		 */
+		if (vs->vs_aux == VDEV_AUX_SPARED)
+			state = "INUSE";
+		else if (vs->vs_state == VDEV_STATE_HEALTHY)
+			state = "AVAIL";
+	}
+
+	(void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
+	    name, state);
+
+	if (!isspare) {
+		zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
+		zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
+		zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
+		(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+	}
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+	    &notpresent) == 0) {
+		char *path;
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+		(void) printf("  was %s", path);
+	} else if (vs->vs_aux != 0) {
+		(void) printf("  ");
+
+		switch (vs->vs_aux) {
+		case VDEV_AUX_OPEN_FAILED:
+			(void) printf(gettext("cannot open"));
+			break;
+
+		case VDEV_AUX_BAD_GUID_SUM:
+			(void) printf(gettext("missing device"));
+			break;
+
+		case VDEV_AUX_NO_REPLICAS:
+			(void) printf(gettext("insufficient replicas"));
+			break;
+
+		case VDEV_AUX_VERSION_NEWER:
+			(void) printf(gettext("newer version"));
+			break;
+
+		case VDEV_AUX_SPARED:
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+			    &cb.cb_guid) == 0);
+			if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
+				if (strcmp(zpool_get_name(cb.cb_zhp),
+				    zpool_get_name(zhp)) == 0)
+					(void) printf(gettext("currently in "
+					    "use"));
+				else
+					(void) printf(gettext("in use by "
+					    "pool '%s'"),
+					    zpool_get_name(cb.cb_zhp));
+				zpool_close(cb.cb_zhp);
+			} else {
+				(void) printf(gettext("currently in use"));
+			}
+			break;
+
+		default:
+			(void) printf(gettext("corrupted data"));
+			break;
+		}
+	} else if (vs->vs_scrub_repaired != 0 && children == 0) {
+		/*
+		 * Report bytes resilvered/repaired on leaf devices.
+		 */
+		zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
+		(void) printf(gettext("  %s %s"), repaired,
+		    (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
+		    "resilvered" : "repaired");
+	}
+
+	(void) printf("\n");
+
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+		print_status_config(zhp, vname, child[c],
+		    namewidth, depth + 2, isspare);
+		free(vname);
+	}
+}
+
+static void
+print_error_log(zpool_handle_t *zhp)
+{
+	nvlist_t *nverrlist;
+	nvpair_t *elem;
+	char *pathname;
+	size_t len = MAXPATHLEN * 2;
+
+	if (zpool_get_errlog(zhp, &nverrlist) != 0) {
+		(void) printf("errors: List of errors unavailable "
+		    "(insufficient privileges)\n");
+		return;
+	}
+
+	(void) printf("errors: Permanent errors have been "
+	    "detected in the following files:\n\n");
+
+	pathname = safe_malloc(len);
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) {
+		nvlist_t *nv;
+		uint64_t dsobj, obj;
+
+		verify(nvpair_value_nvlist(elem, &nv) == 0);
+		verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET,
+		    &dsobj) == 0);
+		verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT,
+		    &obj) == 0);
+		zpool_obj_to_path(zhp, dsobj, obj, pathname, len);
+		(void) printf("%7s %s\n", "", pathname);
+	}
+	free(pathname);
+	nvlist_free(nverrlist);
+}
+
+static void
+print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
+    int namewidth)
+{
+	uint_t i;
+	char *name;
+
+	if (nspares == 0)
+		return;
+
+	(void) printf(gettext("\tspares\n"));
+
+	for (i = 0; i < nspares; i++) {
+		name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+		print_status_config(zhp, name, spares[i],
+		    namewidth, 2, B_TRUE);
+		free(name);
+	}
+}
+
+/*
+ * Display a summary of pool status.  Displays a summary such as:
+ *
+ *        pool: tank
+ *	status: DEGRADED
+ *	reason: One or more devices ...
+ *         see: http://www.sun.com/msg/ZFS-xxxx-01
+ *	config:
+ *		mirror		DEGRADED
+ *                c1t0d0	OK
+ *                c2t0d0	UNAVAIL
+ *
+ * When given the '-v' option, we print out the complete config.  If the '-e'
+ * option is specified, then we print out error rate information as well.
+ */
+int
+status_callback(zpool_handle_t *zhp, void *data)
+{
+	status_cbdata_t *cbp = data;
+	nvlist_t *config, *nvroot;
+	char *msgid;
+	int reason;
+	const char *health;
+	uint_t c;
+	vdev_stat_t *vs;
+
+	config = zpool_get_config(zhp, NULL);
+	reason = zpool_get_status(zhp, &msgid);
+
+	cbp->cb_count++;
+
+	/*
+	 * If we were given 'zpool status -x', only report those pools with
+	 * problems.
+	 */
+	if (reason == ZPOOL_STATUS_OK && cbp->cb_explain) {
+		if (!cbp->cb_allpools) {
+			(void) printf(gettext("pool '%s' is healthy\n"),
+			    zpool_get_name(zhp));
+			if (cbp->cb_first)
+				cbp->cb_first = B_FALSE;
+		}
+		return (0);
+	}
+
+	if (cbp->cb_first)
+		cbp->cb_first = B_FALSE;
+	else
+		(void) printf("\n");
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+	health = state_to_name(vs);
+
+	(void) printf(gettext("  pool: %s\n"), zpool_get_name(zhp));
+	(void) printf(gettext(" state: %s\n"), health);
+
+	switch (reason) {
+	case ZPOOL_STATUS_MISSING_DEV_R:
+		(void) printf(gettext("status: One or more devices could not "
+		    "be opened.  Sufficient replicas exist for\n\tthe pool to "
+		    "continue functioning in a degraded state.\n"));
+		(void) printf(gettext("action: Attach the missing device and "
+		    "online it using 'zpool online'.\n"));
+		break;
+
+	case ZPOOL_STATUS_MISSING_DEV_NR:
+		(void) printf(gettext("status: One or more devices could not "
+		    "be opened.  There are insufficient\n\treplicas for the "
+		    "pool to continue functioning.\n"));
+		(void) printf(gettext("action: Attach the missing device and "
+		    "online it using 'zpool online'.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_LABEL_R:
+		(void) printf(gettext("status: One or more devices could not "
+		    "be used because the label is missing or\n\tinvalid.  "
+		    "Sufficient replicas exist for the pool to continue\n\t"
+		    "functioning in a degraded state.\n"));
+		(void) printf(gettext("action: Replace the device using "
+		    "'zpool replace'.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
+		(void) printf(gettext("status: One or more devices could not "
+		    "be used because the label is missing \n\tor invalid.  "
+		    "There are insufficient replicas for the pool to "
+		    "continue\n\tfunctioning.\n"));
+		(void) printf(gettext("action: Destroy and re-create the pool "
+		    "from a backup source.\n"));
+		break;
+
+	case ZPOOL_STATUS_FAILING_DEV:
+		(void) printf(gettext("status: One or more devices has "
+		    "experienced an unrecoverable error.  An\n\tattempt was "
+		    "made to correct the error.  Applications are "
+		    "unaffected.\n"));
+		(void) printf(gettext("action: Determine if the device needs "
+		    "to be replaced, and clear the errors\n\tusing "
+		    "'zpool clear' or replace the device with 'zpool "
+		    "replace'.\n"));
+		break;
+
+	case ZPOOL_STATUS_OFFLINE_DEV:
+		(void) printf(gettext("status: One or more devices has "
+		    "been taken offline by the administrator.\n\tSufficient "
+		    "replicas exist for the pool to continue functioning in "
+		    "a\n\tdegraded state.\n"));
+		(void) printf(gettext("action: Online the device using "
+		    "'zpool online' or replace the device with\n\t'zpool "
+		    "replace'.\n"));
+		break;
+
+	case ZPOOL_STATUS_RESILVERING:
+		(void) printf(gettext("status: One or more devices is "
+		    "currently being resilvered.  The pool will\n\tcontinue "
+		    "to function, possibly in a degraded state.\n"));
+		(void) printf(gettext("action: Wait for the resilver to "
+		    "complete.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_DATA:
+		(void) printf(gettext("status: One or more devices has "
+		    "experienced an error resulting in data\n\tcorruption.  "
+		    "Applications may be affected.\n"));
+		(void) printf(gettext("action: Restore the file in question "
+		    "if possible.  Otherwise restore the\n\tentire pool from "
+		    "backup.\n"));
+		break;
+
+	case ZPOOL_STATUS_CORRUPT_POOL:
+		(void) printf(gettext("status: The pool metadata is corrupted "
+		    "and the pool cannot be opened.\n"));
+		(void) printf(gettext("action: Destroy and re-create the pool "
+		    "from a backup source.\n"));
+		break;
+
+	case ZPOOL_STATUS_VERSION_OLDER:
+		(void) printf(gettext("status: The pool is formatted using an "
+		    "older on-disk format.  The pool can\n\tstill be used, but "
+		    "some features are unavailable.\n"));
+		(void) printf(gettext("action: Upgrade the pool using 'zpool "
+		    "upgrade'.  Once this is done, the\n\tpool will no longer "
+		    "be accessible on older software versions.\n"));
+		break;
+
+	case ZPOOL_STATUS_VERSION_NEWER:
+		(void) printf(gettext("status: The pool has been upgraded to a "
+		    "newer, incompatible on-disk version.\n\tThe pool cannot "
+		    "be accessed on this system.\n"));
+		(void) printf(gettext("action: Access the pool from a system "
+		    "running more recent software, or\n\trestore the pool from "
+		    "backup.\n"));
+		break;
+
+	default:
+		/*
+		 * The remaining errors can't actually be generated, yet.
+		 */
+		assert(reason == ZPOOL_STATUS_OK);
+	}
+
+	if (msgid != NULL)
+		(void) printf(gettext("   see: http://www.sun.com/msg/%s\n"),
+		    msgid);
+
+	if (config != NULL) {
+		int namewidth;
+		uint64_t nerr;
+		nvlist_t **spares;
+		uint_t nspares;
+
+
+		(void) printf(gettext(" scrub: "));
+		print_scrub_status(nvroot);
+
+		namewidth = max_width(zhp, nvroot, 0, 0);
+		if (namewidth < 10)
+			namewidth = 10;
+
+		(void) printf(gettext("config:\n\n"));
+		(void) printf(gettext("\t%-*s  %-8s %5s %5s %5s\n"), namewidth,
+		    "NAME", "STATE", "READ", "WRITE", "CKSUM");
+		print_status_config(zhp, zpool_get_name(zhp), nvroot,
+		    namewidth, 0, B_FALSE);
+
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0)
+			print_spares(zhp, spares, nspares, namewidth);
+
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+		    &nerr) == 0) {
+			nvlist_t *nverrlist = NULL;
+
+			/*
+			 * If the approximate error count is small, get a
+			 * precise count by fetching the entire log and
+			 * uniquifying the results.
+			 */
+			if (nerr < 100 && !cbp->cb_verbose &&
+			    zpool_get_errlog(zhp, &nverrlist) == 0) {
+				nvpair_t *elem;
+
+				elem = NULL;
+				nerr = 0;
+				while ((elem = nvlist_next_nvpair(nverrlist,
+				    elem)) != NULL) {
+					nerr++;
+				}
+			}
+			nvlist_free(nverrlist);
+
+			(void) printf("\n");
+
+			if (nerr == 0)
+				(void) printf(gettext("errors: No known data "
+				    "errors\n"));
+			else if (!cbp->cb_verbose)
+				(void) printf(gettext("errors: %llu data "
+				    "errors, use '-v' for a list\n"),
+				    (u_longlong_t)nerr);
+			else
+				print_error_log(zhp);
+		}
+	} else {
+		(void) printf(gettext("config: The configuration cannot be "
+		    "determined.\n"));
+	}
+
+	return (0);
+}
+
+/*
+ * zpool status [-vx] [pool] ...
+ *
+ *	-v	Display complete error logs
+ *	-x	Display only pools with potential problems
+ *
+ * Describes the health status of all pools or some subset.
+ */
+int
+zpool_do_status(int argc, char **argv)
+{
+	int c;
+	int ret;
+	status_cbdata_t cb = { 0 };
+
+	/* check options */
+	while ((c = getopt(argc, argv, "vx")) != -1) {
+		switch (c) {
+		case 'v':
+			cb.cb_verbose = B_TRUE;
+			break;
+		case 'x':
+			cb.cb_explain = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	cb.cb_first = B_TRUE;
+
+	if (argc == 0)
+		cb.cb_allpools = B_TRUE;
+
+	ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb);
+
+	if (argc == 0 && cb.cb_count == 0)
+		(void) printf(gettext("no pools available\n"));
+	else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
+		(void) printf(gettext("all pools are healthy\n"));
+
+	return (ret);
+}
+
+typedef struct upgrade_cbdata {
+	int	cb_all;
+	int	cb_first;
+	int	cb_newer;
+	int	cb_argc;
+	char	**cb_argv;
+} upgrade_cbdata_t;
+
+static int
+upgrade_cb(zpool_handle_t *zhp, void *arg)
+{
+	upgrade_cbdata_t *cbp = arg;
+	nvlist_t *config;
+	uint64_t version;
+	int ret = 0;
+
+	config = zpool_get_config(zhp, NULL);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+
+	if (!cbp->cb_newer && version < ZFS_VERSION) {
+		if (!cbp->cb_all) {
+			if (cbp->cb_first) {
+				(void) printf(gettext("The following pools are "
+				    "out of date, and can be upgraded.  After "
+				    "being\nupgraded, these pools will no "
+				    "longer be accessible by older software "
+				    "versions.\n\n"));
+				(void) printf(gettext("VER  POOL\n"));
+				(void) printf(gettext("---  ------------\n"));
+				cbp->cb_first = B_FALSE;
+			}
+
+			(void) printf("%2llu   %s\n", (u_longlong_t)version,
+			    zpool_get_name(zhp));
+		} else {
+			cbp->cb_first = B_FALSE;
+			ret = zpool_upgrade(zhp);
+			if (!ret) {
+				zpool_log_history(g_zfs, cbp->cb_argc,
+				    cbp->cb_argv, zpool_get_name(zhp), B_TRUE,
+				    B_FALSE);
+				(void) printf(gettext("Successfully upgraded "
+				    "'%s'\n"), zpool_get_name(zhp));
+			}
+		}
+	} else if (cbp->cb_newer && version > ZFS_VERSION) {
+		assert(!cbp->cb_all);
+
+		if (cbp->cb_first) {
+			(void) printf(gettext("The following pools are "
+			    "formatted using a newer software version and\n"
+			    "cannot be accessed on the current system.\n\n"));
+			(void) printf(gettext("VER  POOL\n"));
+			(void) printf(gettext("---  ------------\n"));
+			cbp->cb_first = B_FALSE;
+		}
+
+		(void) printf("%2llu   %s\n", (u_longlong_t)version,
+		    zpool_get_name(zhp));
+	}
+
+	zpool_close(zhp);
+	return (ret);
+}
+
+/* ARGSUSED */
+static int
+upgrade_one(zpool_handle_t *zhp, void *data)
+{
+	nvlist_t *config;
+	uint64_t version;
+	int ret;
+	upgrade_cbdata_t *cbp = data;
+
+	config = zpool_get_config(zhp, NULL);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+
+	if (version == ZFS_VERSION) {
+		(void) printf(gettext("Pool '%s' is already formatted "
+		    "using the current version.\n"), zpool_get_name(zhp));
+		return (0);
+	}
+
+	ret = zpool_upgrade(zhp);
+
+	if (!ret) {
+		zpool_log_history(g_zfs, cbp->cb_argc, cbp->cb_argv,
+		    zpool_get_name(zhp), B_TRUE, B_FALSE);
+		(void) printf(gettext("Successfully upgraded '%s' "
+		    "from version %llu to version %llu\n"), zpool_get_name(zhp),
+		    (u_longlong_t)version, (u_longlong_t)ZFS_VERSION);
+	}
+
+	return (ret != 0);
+}
+
+/*
+ * zpool upgrade
+ * zpool upgrade -v
+ * zpool upgrade <-a | pool>
+ *
+ * With no arguments, display downrev'd ZFS pool available for upgrade.
+ * Individual pools can be upgraded by specifying the pool, and '-a' will
+ * upgrade all pools.
+ */
+int
+zpool_do_upgrade(int argc, char **argv)
+{
+	int c;
+	upgrade_cbdata_t cb = { 0 };
+	int ret = 0;
+	boolean_t showversions = B_FALSE;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "av")) != -1) {
+		switch (c) {
+		case 'a':
+			cb.cb_all = B_TRUE;
+			break;
+		case 'v':
+			showversions = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	cb.cb_argc = argc;
+	cb.cb_argv = argv;
+	argc -= optind;
+	argv += optind;
+
+	if (showversions) {
+		if (cb.cb_all || argc != 0) {
+			(void) fprintf(stderr, gettext("-v option is "
+			    "incompatible with other arguments\n"));
+			usage(B_FALSE);
+		}
+	} else if (cb.cb_all) {
+		if (argc != 0) {
+			(void) fprintf(stderr, gettext("-a option is "
+			    "incompatible with other arguments\n"));
+			usage(B_FALSE);
+		}
+	}
+
+	(void) printf(gettext("This system is currently running ZFS version "
+	    "%llu.\n\n"), ZFS_VERSION);
+	cb.cb_first = B_TRUE;
+	if (showversions) {
+		(void) printf(gettext("The following versions are "
+		    "supported:\n\n"));
+		(void) printf(gettext("VER  DESCRIPTION\n"));
+		(void) printf("---  -----------------------------------------"
+		    "---------------\n");
+		(void) printf(gettext(" 1   Initial ZFS version\n"));
+		(void) printf(gettext(" 2   Ditto blocks "
+		    "(replicated metadata)\n"));
+		(void) printf(gettext(" 3   Hot spares and double parity "
+		    "RAID-Z\n"));
+		(void) printf(gettext(" 4   zpool history\n"));
+		(void) printf(gettext(" 5   Compression using the gzip "
+		    "algorithm\n"));
+		(void) printf(gettext(" 6   bootfs pool property "));
+		(void) printf(gettext("\nFor more information on a particular "
+		    "version, including supported releases, see:\n\n"));
+		(void) printf("http://www.opensolaris.org/os/community/zfs/"
+		    "version/N\n\n");
+		(void) printf(gettext("Where 'N' is the version number.\n"));
+	} else if (argc == 0) {
+		int notfound;
+
+		ret = zpool_iter(g_zfs, upgrade_cb, &cb);
+		notfound = cb.cb_first;
+
+		if (!cb.cb_all && ret == 0) {
+			if (!cb.cb_first)
+				(void) printf("\n");
+			cb.cb_first = B_TRUE;
+			cb.cb_newer = B_TRUE;
+			ret = zpool_iter(g_zfs, upgrade_cb, &cb);
+			if (!cb.cb_first) {
+				notfound = B_FALSE;
+				(void) printf("\n");
+			}
+		}
+
+		if (ret == 0) {
+			if (notfound)
+				(void) printf(gettext("All pools are formatted "
+				    "using this version.\n"));
+			else if (!cb.cb_all)
+				(void) printf(gettext("Use 'zpool upgrade -v' "
+				    "for a list of available versions and "
+				    "their associated\nfeatures.\n"));
+		}
+	} else {
+		ret = for_each_pool(argc, argv, B_FALSE, NULL,
+		    upgrade_one, &cb);
+	}
+
+	return (ret);
+}
+
+/*
+ * Print out the command history for a specific pool.
+ */
+static int
+get_history_one(zpool_handle_t *zhp, void *data)
+{
+	nvlist_t *nvhis;
+	nvlist_t **records;
+	uint_t numrecords;
+	char *cmdstr;
+	uint64_t dst_time;
+	time_t tsec;
+	struct tm t;
+	char tbuf[30];
+	int ret, i;
+
+	*(boolean_t *)data = B_FALSE;
+
+	(void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp));
+
+	if ((ret = zpool_get_history(zhp, &nvhis)) != 0)
+		return (ret);
+
+	verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,
+	    &records, &numrecords) == 0);
+	for (i = 0; i < numrecords; i++) {
+		if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
+		    &dst_time) == 0) {
+			verify(nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
+			    &cmdstr) == 0);
+			tsec = dst_time;
+			(void) localtime_r(&tsec, &t);
+			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+			(void) printf("%s %s\n", tbuf, cmdstr);
+		}
+	}
+	(void) printf("\n");
+	nvlist_free(nvhis);
+
+	return (ret);
+}
+
+/*
+ * zpool history <pool>
+ *
+ * Displays the history of commands that modified pools.
+ */
+int
+zpool_do_history(int argc, char **argv)
+{
+	boolean_t first = B_TRUE;
+	int ret;
+
+	argc -= optind;
+	argv += optind;
+
+	ret = for_each_pool(argc, argv, B_FALSE,  NULL, get_history_one,
+	    &first);
+
+	if (argc == 0 && first == B_TRUE) {
+		(void) printf(gettext("no pools available\n"));
+		return (0);
+	}
+
+	return (ret);
+}
+
+static int
+get_callback(zpool_handle_t *zhp, void *data)
+{
+	libzfs_get_cbdata_t *cbp = (libzfs_get_cbdata_t *)data;
+	char value[MAXNAMELEN];
+	zfs_source_t srctype;
+	zpool_proplist_t *pl;
+
+	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
+
+		/*
+		 * Skip the special fake placeholder.
+		 */
+		if (pl->pl_prop == ZFS_PROP_NAME &&
+		    pl == cbp->cb_proplist)
+			continue;
+
+		if (zpool_get_prop(zhp, pl->pl_prop,
+		    value, sizeof (value), &srctype) != 0)
+			continue;
+
+		libzfs_print_one_property(zpool_get_name(zhp), cbp,
+		    zpool_prop_to_name(pl->pl_prop), value, srctype, NULL);
+	}
+	return (0);
+}
+
+int
+zpool_do_get(int argc, char **argv)
+{
+	libzfs_get_cbdata_t cb = { 0 };
+	zpool_proplist_t fake_name = { 0 };
+	int ret;
+
+	if (argc < 3)
+		usage(B_FALSE);
+
+	cb.cb_first = B_TRUE;
+	cb.cb_sources = ZFS_SRC_ALL;
+	cb.cb_columns[0] = GET_COL_NAME;
+	cb.cb_columns[1] = GET_COL_PROPERTY;
+	cb.cb_columns[2] = GET_COL_VALUE;
+	cb.cb_columns[3] = GET_COL_SOURCE;
+
+	if (zpool_get_proplist(g_zfs, argv[1],  &cb.cb_proplist) != 0)
+		usage(B_FALSE);
+
+	if (cb.cb_proplist != NULL) {
+		fake_name.pl_prop = ZFS_PROP_NAME;
+		fake_name.pl_width = strlen(gettext("NAME"));
+		fake_name.pl_next = cb.cb_proplist;
+		cb.cb_proplist = &fake_name;
+	}
+
+	ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist,
+	    get_callback, &cb);
+
+	if (cb.cb_proplist == &fake_name)
+		zfs_free_proplist(fake_name.pl_next);
+	else
+		zfs_free_proplist(cb.cb_proplist);
+
+	return (ret);
+}
+
+typedef struct set_cbdata {
+	char *cb_propname;
+	char *cb_value;
+	boolean_t cb_any_successful;
+} set_cbdata_t;
+
+int
+set_callback(zpool_handle_t *zhp, void *data)
+{
+	int error;
+	set_cbdata_t *cb = (set_cbdata_t *)data;
+
+	error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value);
+
+	if (!error)
+		cb->cb_any_successful = B_TRUE;
+
+	return (error);
+}
+
+int
+zpool_do_set(int argc, char **argv)
+{
+	set_cbdata_t cb = { 0 };
+	int error;
+
+	if (argc > 1 && argv[1][0] == '-') {
+		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+		    argv[1][1]);
+		usage(B_FALSE);
+	}
+
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing property=value "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing pool name\n"));
+		usage(B_FALSE);
+	}
+
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many pool names\n"));
+		usage(B_FALSE);
+	}
+
+	cb.cb_propname = argv[1];
+	cb.cb_value = strchr(cb.cb_propname, '=');
+	if (cb.cb_value == NULL) {
+		(void) fprintf(stderr, gettext("missing value in "
+		    "property=value argument\n"));
+		usage(B_FALSE);
+	}
+
+	*(cb.cb_value) = '\0';
+	cb.cb_value++;
+
+	error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL,
+	    set_callback, &cb);
+
+	if (cb.cb_any_successful) {
+		*(cb.cb_value - 1) = '=';
+		zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
+	}
+
+	return (error);
+}
+
+static int
+find_command_idx(char *command, int *idx)
+{
+	int i;
+
+	for (i = 0; i < NCOMMAND; i++) {
+		if (command_table[i].name == NULL)
+			continue;
+
+		if (strcmp(command, command_table[i].name) == 0) {
+			*idx = i;
+			return (0);
+		}
+	}
+	return (1);
+}
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	int i;
+	char *cmdname;
+	int found = 0;
+
+	(void) setlocale(LC_ALL, "");
+	(void) textdomain(TEXT_DOMAIN);
+
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: failed to "
+		    "initialize ZFS library\n"));
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
+	opterr = 0;
+
+	/*
+	 * Make sure the user has specified some command.
+	 */
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing command\n"));
+		usage(B_FALSE);
+	}
+
+	cmdname = argv[1];
+
+	/*
+	 * Special case '-?'
+	 */
+	if (strcmp(cmdname, "-?") == 0)
+		usage(B_TRUE);
+
+	/*
+	 * Run the appropriate command.
+	 */
+	if (find_command_idx(cmdname, &i) == 0) {
+		current_command = &command_table[i];
+		ret = command_table[i].func(argc - 1, argv + 1);
+		found++;
+	}
+
+	/*
+	 * 'freeze' is a vile debugging abomination, so we treat it as such.
+	 */
+	if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
+		char buf[16384];
+		int fd = open(ZFS_DEV, O_RDWR);
+		(void) strcpy((void *)buf, argv[2]);
+		return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf));
+	}
+
+	if (!found) {
+		(void) fprintf(stderr, gettext("unrecognized "
+		    "command '%s'\n"), cmdname);
+		usage(B_FALSE);
+	}
+
+	libzfs_fini(g_zfs);
+
+	/*
+	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
+	 * for the purposes of running ::findleaks.
+	 */
+	if (getenv("ZFS_ABORT") != NULL) {
+		(void) printf("dumping core by request\n");
+		abort();
+	}
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool.8
@@ -0,0 +1,1140 @@
+'\" te
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").  
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved.
+.TH zpool 1M "14 Nov 2006" "SunOS 5.11" "System Administration Commands"
+.SH NAME
+zpool \- configures ZFS storage pools
+.SH SYNOPSIS
+.LP
+.nf
+\fBzpool\fR [\fB-?\fR]
+.fi
+
+.LP
+.nf
+\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR
+.fi
+
+.LP
+.nf
+\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
+.fi
+
+.LP
+.nf
+\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR
+.fi
+
+.LP
+.nf
+\fBzpool remove\fR \fIpool\fR \fIvdev\fR
+.fi
+
+.LP
+.nf
+\fBzpool \fR \fBlist\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]*] [\fIpool\fR] ...
+.fi
+
+.LP
+.nf
+\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]
+.fi
+
+.LP
+.nf
+\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
+.fi
+
+.LP
+.nf
+\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
+.fi
+
+.LP
+.nf
+\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
+.fi
+
+.LP
+.nf
+\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...
+.fi
+
+.LP
+.nf
+\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
+.fi
+
+.LP
+.nf
+\fBzpool detach\fR \fIpool\fR \fIdevice\fR
+.fi
+
+.LP
+.nf
+\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
+.fi
+
+.LP
+.nf
+\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
+.fi
+
+.LP
+.nf
+\fBzpool export\fR [\fB-f\fR] \fIpool\fR
+.fi
+
+.LP
+.nf
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
+.fi
+
+.LP
+.nf
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o \fIopts\fR\fR] [\fB-R \fR\fIroot\fR] \fIpool\fR | \fIid\fR 
+    [\fInewpool\fR]
+.fi
+
+.LP
+.nf
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]
+.fi
+
+.LP
+.nf
+\fBzpool upgrade\fR 
+.fi
+
+.LP
+.nf
+\fBzpool upgrade\fR \fB-v\fR
+.fi
+
+.LP
+.nf
+\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]
+.fi
+
+.LP
+.nf
+\fBzpool history\fR [\fIpool\fR] ...
+.fi
+
+.SH DESCRIPTION
+.LP
+The \fBzpool\fR command configures \fBZFS\fR storage pools. A storage pool is a collection of devices that provides physical storage and data replication for \fBZFS\fR datasets.
+.LP
+All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets. 
+.SS "Virtual Devices (vdevs)"
+.LP
+A "virtual device" describes a single device or a collection of devices organized according to certain performance and fault characteristics. The following virtual devices are supported:
+.sp
+.ne 2
+.mk
+.na
+\fBdisk\fR
+.ad
+.RS 10n
+.rt  
+A block device, typically located under "/dev/dsk". \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion
+of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBfile\fR
+.ad
+.RS 10n
+.rt  
+A regular file. The use of files as a backing store is strongly discouraged. It is designed primarily for experimental purposes, as the fault tolerance of a file is only as good as the file system of which it is a part. A file must be specified by a full path.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBmirror\fR
+.ad
+.RS 10n
+.rt  
+A mirror of two or more devices. Data is replicated in an identical fashion across all components of a mirror. A mirror with \fIN\fR disks of size \fIX\fR can hold \fIX\fR bytes and can withstand (\fIN-1\fR)
+devices failing before data integrity is compromised.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBraidz\fR
+.ad
+.br
+.na
+\fBraidz1\fR
+.ad
+.br
+.na
+\fBraidz2\fR
+.ad
+.RS 10n
+.rt  
+A variation on \fBRAID-5\fR that allows for better distribution of parity and eliminates the "\fBRAID-5\fR write hole" (in which data and parity become inconsistent after a power loss). Data and parity is striped across all disks within a \fBraidz\fR group.
+.sp
+A \fBraidz\fR group can have either single- or double-parity, meaning that the \fBraidz\fR group can sustain one or two failures respectively without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group
+and the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
+.sp
+A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand one device failing before
+data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBspare\fR
+.ad
+.RS 10n
+.rt  
+A special pseudo-\fBvdev\fR which keeps track of available hot spares for a pool. For more information, see the "Hot Spares" section.
+.RE
+
+.LP
+Virtual devices cannot be nested, so a mirror or \fBraidz\fR virtual device can only contain files or disks. Mirrors of mirrors (or other combinations) are not allowed.
+.LP
+A pool can have any number of virtual devices at the top of the configuration (known as "root vdevs"). Data is dynamically distributed across all top-level devices to balance data among devices. As new virtual devices are added, \fBZFS\fR automatically places data
+on the newly available devices.
+.LP
+Virtual devices are specified one at a time on the command line, separated by whitespace. The keywords "mirror" and "raidz" are used to distinguish where a group ends and another begins. For example, the following creates two root vdevs, each a mirror of two disks:
+.sp
+.in +2
+.nf
+\fB# zpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
+.fi
+.in -2
+.sp
+
+.SS "Device Failure and Recovery"
+.LP
+\fBZFS\fR supports a rich set of mechanisms for handling device failure and data corruption. All metadata and data is checksummed, and \fBZFS\fR automatically repairs bad data from a good copy when corruption is detected.
+.LP
+In order to take advantage of these features, a pool must make use of some form of redundancy, using either mirrored or \fBraidz\fR groups. While \fBZFS\fR supports running in a non-redundant configuration, where each root vdev is simply a disk or file, this is
+strongly discouraged. A single case of bit corruption can render some or all of your data unavailable.
+.LP
+A pool's health status is described by one of three states: online, degraded, or faulted. An online pool has all devices operating normally. A degraded pool is one in which one or more devices have failed, but the data is still available due to a redundant configuration. A faulted pool has
+one or more failed devices, and there is insufficient redundancy to replicate the missing data.
+.SS "Hot Spares"
+.LP
+\fBZFS\fR allows devices to be associated with pools as "hot spares". These devices are not actively used in the pool, but when an active device fails, it is automatically replaced by a hot spare. To create a pool with hot spares, specify a "spare" \fBvdev\fR with any number of devices. For example, 
+.sp
+.in +2
+.nf
+# zpool create pool mirror c0d0 c1d0 spare c2d0 c3d0
+.fi
+.in -2
+.sp
+
+.LP
+Spares can be shared across multiple pools, and can be added with the "zpool add" command and removed with the "zpool remove" command. Once a spare replacement is initiated, a new "spare" \fBvdev\fR is created within the configuration that
+will remain there until the original device is replaced. At this point, the hot spare becomes available again if another device fails.
+.LP
+An in-progress spare replacement can be cancelled by detaching the hot spare. If the original faulted device is detached, then the hot spare assumes its place in the configuration, and is removed from the spare list of all active pools.
+.SS "Alternate Root Pools"
+.LP
+The "zpool create -R" and "zpool import -R" commands allow users to create and import a pool with a different root path. By default, whenever a pool is created or imported on a system, it is permanently added so that it is available whenever the system boots. For
+removable media, or when in recovery situations, this may not always be desirable. An alternate root pool does not persist on the system. Instead, it exists only until exported or the system is rebooted, at which point it will have to be imported again.
+.LP
+In addition, all mount points in the pool are prefixed with the given root, so a pool can be constrained to a particular area of the file system. This is most useful when importing unknown pools from removable media, as the mount points of any file systems cannot be trusted.
+.LP
+When creating an alternate root pool, the default mount point is "/", rather than the normal default "/\fIpool\fR".
+.SS "Subcommands"
+.LP
+All subcommands that modify state are logged persistently to the pool in their original form.
+.LP
+The \fBzpool\fR command provides subcommands to create and destroy storage pools, add capacity to storage pools, and provide information about the storage pools. The following subcommands are supported:
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool\fR \fB-?\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays a help message.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool
+names "mirror", "raidz", and "spare" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
+.sp
+The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses,
+such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
+.sp
+The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within
+a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
+.sp
+Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 17n
+.rt  
+Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-n\fR\fR
+.ad
+.RS 17n
+.rt  
+Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR \fIroot\fR\fR
+.ad
+.RS 17n
+.rt  
+Creates the pool with an alternate \fIroot\fR. See the "Alternate Root Pools" section. The root dataset has its mount point set to "/" as part of this operation.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-m\fR \fImountpoint\fR\fR
+.ad
+.RS 17n
+.rt  
+Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR". The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount
+points, see \fBzfs\fR(1M).
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
+.ad
+.sp .6
+.RS 4n
+Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forces any active datasets contained within the pool to be unmounted.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev ...\fR\fR
+.ad
+.sp .6
+.RS 4n
+Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create"
+subcommand.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-n\fR\fR
+.ad
+.RS 6n
+.rt  
+Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
+.RE
+
+Do not add a disk that is currently configured as a quorum device to a zpool. Once a disk is in a zpool, that disk can then be configured as a quorum device.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool remove\fR \fIpool\fR \fIvdev\fR\fR
+.ad
+.sp .6
+.RS 4n
+Removes the given \fBvdev\fR from the pool. This command currently only supports removing hot spares. Devices which are part of a mirror can be removed using the "zpool detach" command. \fBRaidz\fR and top-level \fBvdevs\fR cannot
+be removed from a pool.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield*\fR]] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 12n
+.rt  
+Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIfield\fR\fR
+.ad
+.RS 12n
+.rt  
+Comma-separated list of fields to display. Each field must be one of:
+.sp
+.in +2
+.nf
+name            Pool name
+size            Total size
+used            Amount of space used
+available       Amount of space available
+capacity        Percentage of pool space used
+health          Health status
+.fi
+.in -2
+.sp
+
+The default is all fields.
+.RE
+
+This command reports actual physical space available to the storage pool. The physical space can be different from the total amount of space that any contained datasets can actually use. The amount of space used in a \fBraidz\fR configuration depends on the characteristics of
+the data being written. In addition, \fBZFS\fR reserves some space for internal accounting that the \fBzfs\fR(1M) command takes into account, but the \fBzpool\fR command does not. For non-full pools of a reasonable size, these effects should be invisible. For small pools, or pools that are close to being completely full, these discrepancies may become more noticeable.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]\fR
+.ad
+.sp .6
+.RS 4n
+Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for
+every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.RS 6n
+.rt  
+Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed.
+.sp
+If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-x\fR\fR
+.ad
+.RS 6n
+.rt  
+Only display status for pools that are exhibiting errors or are otherwise unavailable.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.RS 6n
+.rt  
+Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
+.sp
+This command is not applicable to spares.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-t\fR\fR
+.ad
+.RS 6n
+.rt  
+Temporary. Upon reboot, the specified physical device reverts to its previous state.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Brings the specified physical device online.
+.sp
+This command is not applicable to spares.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
+.ad
+.sp .6
+.RS 4n
+Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically
+transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
+.ad
+.sp .6
+.RS 4n
+Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
+.ad
+.sp .6
+.RS 4n
+Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
+.sp
+The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
+.sp
+If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path
+as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool
+status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
+.sp
+Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to
+discover silent errors due to hardware faults or disk failure.
+.sp
+Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR\fR
+.ad
+.RS 6n
+.rt  
+Stop scrubbing.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+.sp
+Before exporting the pool, all datasets within the pool are unmounted.
+.sp
+For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]\fR
+.ad
+.sp .6
+.RS 4n
+Lists pools available to import. If the \fB-d\fR option is not specified, this command searches for devices in "/dev/dsk". The \fB-d\fR option can be specified multiple times, and all directories are searched. If the device appears to be part of
+an exported pool, this command displays a summary of the pool with the name of the pool, a numeric identifier, as well as the \fIvdev\fR layout and current health of the device for each device or file. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command, are not listed unless the \fB-D\fR option is specified. 
+.sp
+The numeric identifier is unique, and can be used instead of the pool name when multiple exported pools of the same name are available.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR \fIdir\fR\fR
+.ad
+.RS 10n
+.rt  
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-D\fR\fR
+.ad
+.RS 10n
+.rt  
+Lists destroyed pools only.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o\fR \fIopts\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR | \fIid\fR [\fInewpool\fR]\fR
+.ad
+.sp .6
+.RS 4n
+Imports a specific pool. A pool can be identified by its name or the numeric identifier. If \fInewpool\fR is specified, the pool is imported using the name \fInewpool\fR. Otherwise, it is imported with the same name as its exported name.
+.sp
+If a device is removed from a system without running "\fBzpool export\fR" first, the device appears as potentially active. It cannot be determined if this was a failed export, or whether the device is really in use from another host. To import a pool in this state,
+the \fB-f\fR option is required.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR \fIdir\fR\fR
+.ad
+.RS 11n
+.rt  
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-D\fR\fR
+.ad
+.RS 11n
+.rt  
+Imports destroyed pool. The \fB-f\fR option is also required.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 11n
+.rt  
+Forces import, even if the pool appears to be potentially active.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIopts\fR\fR
+.ad
+.RS 11n
+.rt  
+Comma-separated list of mount options to use when mounting datasets within the pool. See \fBzfs\fR(1M) for a description of dataset properties and mount
+options.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR \fIroot\fR\fR
+.ad
+.RS 11n
+.rt  
+Imports pool(s) with an alternate \fIroot\fR. See the "Alternate Root Pools" section.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]\fR
+.ad
+.sp .6
+.RS 4n
+Imports all pools found in the search directories. Identical to the previous command, except that all pools with a sufficient number of devices available are imported. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command,
+will not be imported unless the \fB-D\fR option is specified.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR \fIdir\fR\fR
+.ad
+.RS 10n
+.rt  
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-D\fR\fR
+.ad
+.RS 10n
+.rt  
+Imports destroyed pools only. The \fB-f\fR option is also required.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 10n
+.rt  
+Forces import, even if the pool appears to be potentially active.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool upgrade\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with
+a more recent version are also displayed, although these pools will be inaccessible on the system.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool upgrade\fR \fB-v\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supportedversions are displayed, along with an explanation of the features provided with each version.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]\fR
+.ad
+.sp .6
+.RS 4n
+Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-a\fR\fR
+.ad
+.RS 6n
+.rt  
+Upgrades all pools.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool history\fR [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays the command history of the specified pools (or all pools if no pool is specified).
+.RE
+
+.SH EXAMPLES
+.LP
+\fBExample 1 \fRCreating a RAID-Z Storage Pool
+.LP
+The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR that consists of six disks.
+
+.sp
+.in +2
+.nf
+\fB# zpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 2 \fRCreating a Mirrored Storage Pool
+.LP
+The following command creates a pool with two mirrors, where each mirror contains two disks.
+
+.sp
+.in +2
+.nf
+\fB# zpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 3 \fRCreating a ZFS Storage Pool by Using Slices
+.LP
+The following command creates an unmirrored pool using two disk slices.
+
+.sp
+.in +2
+.nf
+\fB# zpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 4 \fRCreating a ZFS Storage Pool by Using Files
+.LP
+The following command creates an unmirrored pool using files. While not recommended, a pool based on files can be useful for experimental purposes.
+
+.sp
+.in +2
+.nf
+\fB# zpool create tank /path/to/file/a /path/to/file/b\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 5 \fRAdding a Mirror to a ZFS Storage Pool
+.LP
+The following command adds two mirrored disks to the pool "\fItank\fR", assuming the pool is already made up of two-way mirrors. The additional space is immediately available to any datasets within the pool.
+
+.sp
+.in +2
+.nf
+\fB# zpool add tank mirror c1t0d0 c1t1d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 6 \fRListing Available ZFS Storage Pools
+.LP
+The following command lists all available pools on the system. In this case, the pool \fIzion\fR is faulted due to a missing device.
+
+.LP
+The results from this command are similar to the following:
+
+.sp
+.in +2
+.nf
+\fB# zpool list\fR
+    NAME              SIZE    USED   AVAIL    CAP  HEALTH     ALTROOT
+    pool             67.5G   2.92M   67.5G     0%  ONLINE     -
+    tank             67.5G   2.92M   67.5G     0%  ONLINE     -
+    zion                 -       -       -     0%  FAULTED    -
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 7 \fRDestroying a ZFS Storage Pool
+.LP
+The following command destroys the pool "\fItank\fR" and any datasets contained within.
+
+.sp
+.in +2
+.nf
+\fB# zpool destroy -f tank\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 8 \fRExporting a ZFS Storage Pool
+.LP
+The following command exports the devices in pool \fItank\fR so that they can be relocated or later imported.
+
+.sp
+.in +2
+.nf
+\fB# zpool export tank\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 9 \fRImporting a ZFS Storage Pool
+.LP
+The following command displays available pools, and then imports the pool "tank" for use on the system.
+
+.LP
+The results from this command are similar to the following:
+
+.sp
+.in +2
+.nf
+\fB# zpool import\fR
+ pool: tank
+   id: 15451357997522795478
+state: ONLINE
+action: The pool can be imported using its name or numeric identifier.
+config:
+
+       tank        ONLINE
+         mirror    ONLINE
+           c1t2d0  ONLINE
+           c1t3d0  ONLINE
+
+\fB# zpool import tank\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 10 \fRUpgrading All ZFS Storage Pools to the Current Version
+.LP
+The following command upgrades all ZFS Storage pools to the current version of the software.
+
+.sp
+.in +2
+.nf
+\fB# zpool upgrade -a\fR
+This system is currently running ZFS version 2.
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 11 \fRManaging Hot Spares
+.LP
+The following command creates a new pool with an available hot spare:
+
+.sp
+.in +2
+.nf
+\fB# zpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+If one of the disks were to fail, the pool would be reduced to the degraded state. The failed device can be replaced using the following command:
+
+.sp
+.in +2
+.nf
+\fB# zpool replace tank c0t0d0 c0t3d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+Once the data has been resilvered, the spare is automatically removed and is made available should another device fails.  The hot spare can be permanently removed from the pool using the following command:
+
+.sp
+.in +2
+.nf
+\fB# zpool remove tank c0t2d0\fR
+.fi
+.in -2
+.sp
+
+.SH EXIT STATUS
+.LP
+The following exit values are returned:
+.sp
+.ne 2
+.mk
+.na
+\fB\fB0\fR\fR
+.ad
+.RS 5n
+.rt  
+Successful completion. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB1\fR\fR
+.ad
+.RS 5n
+.rt  
+An error occurred.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB2\fR\fR
+.ad
+.RS 5n
+.rt  
+Invalid command line options were specified.
+.RE
+
+.SH ATTRIBUTES
+.LP
+See \fBattributes\fR(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+tab() box;
+cw(2.75i) |cw(2.75i) 
+lw(2.75i) |lw(2.75i) 
+.
+ATTRIBUTE TYPEATTRIBUTE VALUE
+_
+AvailabilitySUNWzfsu
+_
+Interface StabilityEvolving
+.TE
+
+.SH SEE ALSO
+.LP
+\fBzfs\fR(1M), \fBattributes\fR(5)
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
@@ -0,0 +1,245 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <solaris.h>
+#include <libintl.h>
+#include <libuutil.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include <libzfs.h>
+
+#include "zpool_util.h"
+
+/*
+ * Private interface for iterating over pools specified on the command line.
+ * Most consumers will call for_each_pool, but in order to support iostat, we
+ * allow fined grained control through the zpool_list_t interface.
+ */
+
+typedef struct zpool_node {
+	zpool_handle_t	*zn_handle;
+	uu_avl_node_t	zn_avlnode;
+	int		zn_mark;
+} zpool_node_t;
+
+struct zpool_list {
+	boolean_t	zl_findall;
+	uu_avl_t	*zl_avl;
+	uu_avl_pool_t	*zl_pool;
+};
+
+/* ARGSUSED */
+static int
+zpool_compare(const void *larg, const void *rarg, void *unused)
+{
+	zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle;
+	zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle;
+	const char *lname = zpool_get_name(l);
+	const char *rname = zpool_get_name(r);
+
+	return (strcmp(lname, rname));
+}
+
+/*
+ * Callback function for pool_list_get().  Adds the given pool to the AVL tree
+ * of known pools.
+ */
+static int
+add_pool(zpool_handle_t *zhp, void *data)
+{
+	zpool_list_t *zlp = data;
+	zpool_node_t *node = safe_malloc(sizeof (zpool_node_t));
+	uu_avl_index_t idx;
+
+	node->zn_handle = zhp;
+	uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
+	if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
+		uu_avl_insert(zlp->zl_avl, node, idx);
+	} else {
+		zpool_close(zhp);
+		free(node);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Create a list of pools based on the given arguments.  If we're given no
+ * arguments, then iterate over all pools in the system and add them to the AVL
+ * tree.  Otherwise, add only those pool explicitly specified on the command
+ * line.
+ */
+zpool_list_t *
+pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
+{
+	zpool_list_t *zlp;
+
+	zlp = safe_malloc(sizeof (zpool_list_t));
+
+	zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t),
+	    offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT);
+
+	if (zlp->zl_pool == NULL)
+		zpool_no_memory();
+
+	if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL,
+	    UU_DEFAULT)) == NULL)
+		zpool_no_memory();
+
+	if (argc == 0) {
+		(void) zpool_iter(g_zfs, add_pool, zlp);
+		zlp->zl_findall = B_TRUE;
+	} else {
+		int i;
+
+		for (i = 0; i < argc; i++) {
+			zpool_handle_t *zhp;
+
+			if ((zhp = zpool_open_canfail(g_zfs,
+			    argv[i])) != NULL && add_pool(zhp, zlp) == 0) {
+				if (proplist &&
+				    zpool_expand_proplist(zhp, proplist) != 0)
+					*err = B_TRUE;
+			} else
+				*err = B_TRUE;
+		}
+	}
+
+	return (zlp);
+}
+
+/*
+ * Search for any new pools, adding them to the list.  We only add pools when no
+ * options were given on the command line.  Otherwise, we keep the list fixed as
+ * those that were explicitly specified.
+ */
+void
+pool_list_update(zpool_list_t *zlp)
+{
+	if (zlp->zl_findall)
+		(void) zpool_iter(g_zfs, add_pool, zlp);
+}
+
+/*
+ * Iterate over all pools in the list, executing the callback for each
+ */
+int
+pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func,
+    void *data)
+{
+	zpool_node_t *node, *next_node;
+	int ret = 0;
+
+	for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) {
+		next_node = uu_avl_next(zlp->zl_avl, node);
+		if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL ||
+		    unavail)
+			ret |= func(node->zn_handle, data);
+	}
+
+	return (ret);
+}
+
+/*
+ * Remove the given pool from the list.  When running iostat, we want to remove
+ * those pools that no longer exist.
+ */
+void
+pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp)
+{
+	zpool_node_t search, *node;
+
+	search.zn_handle = zhp;
+	if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) {
+		uu_avl_remove(zlp->zl_avl, node);
+		zpool_close(node->zn_handle);
+		free(node);
+	}
+}
+
+/*
+ * Free all the handles associated with this list.
+ */
+void
+pool_list_free(zpool_list_t *zlp)
+{
+	uu_avl_walk_t *walk;
+	zpool_node_t *node;
+
+	if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) {
+		(void) fprintf(stderr,
+		    gettext("internal error: out of memory"));
+		exit(1);
+	}
+
+	while ((node = uu_avl_walk_next(walk)) != NULL) {
+		uu_avl_remove(zlp->zl_avl, node);
+		zpool_close(node->zn_handle);
+		free(node);
+	}
+
+	uu_avl_walk_end(walk);
+	uu_avl_destroy(zlp->zl_avl);
+	uu_avl_pool_destroy(zlp->zl_pool);
+
+	free(zlp);
+}
+
+/*
+ * Returns the number of elements in the pool list.
+ */
+int
+pool_list_count(zpool_list_t *zlp)
+{
+	return (uu_avl_numnodes(zlp->zl_avl));
+}
+
+/*
+ * High level function which iterates over all pools given on the command line,
+ * using the pool_list_* interfaces.
+ */
+int
+for_each_pool(int argc, char **argv, boolean_t unavail,
+    zpool_proplist_t **proplist, zpool_iter_f func, void *data)
+{
+	zpool_list_t *list;
+	int ret = 0;
+
+	if ((list = pool_list_get(argc, argv, proplist, &ret)) == NULL)
+		return (1);
+
+	if (pool_list_iter(list, unavail, func, data) != 0)
+		ret = 1;
+
+	pool_list_free(list);
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
@@ -0,0 +1,883 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Functions to convert between a list of vdevs and an nvlist representing the
+ * configuration.  Each entry in the list can be one of:
+ *
+ * 	Device vdevs
+ * 		disk=(path=..., devid=...)
+ * 		file=(path=...)
+ *
+ * 	Group vdevs
+ * 		raidz[1|2]=(...)
+ * 		mirror=(...)
+ *
+ * 	Hot spares
+ *
+ * While the underlying implementation supports it, group vdevs cannot contain
+ * other group vdevs.  All userland verification of devices is contained within
+ * this file.  If successful, the nvlist returned can be passed directly to the
+ * kernel; we've done as much verification as possible in userland.
+ *
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
+ * The only function exported by this file is 'get_vdev_spec'.  The function
+ * performs several passes:
+ *
+ * 	1. Construct the vdev specification.  Performs syntax validation and
+ *         makes sure each device is valid.
+ * 	2. Check for devices in use.  Using libdiskmgt, makes sure that no
+ *         devices are also in use.  Some can be overridden using the 'force'
+ *         flag, others cannot.
+ * 	3. Check for replication errors if the 'force' flag is not specified.
+ *         validates that the replication level is consistent across the
+ *         entire pool.
+ */
+
+#include <assert.h>
+#include <devid.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <paths.h>
+#include <sys/stat.h>
+#include <sys/disk.h>
+#include <sys/mntent.h>
+#include <libgeom.h>
+
+#include <libzfs.h>
+
+#include "zpool_util.h"
+
+/*
+ * For any given vdev specification, we can have multiple errors.  The
+ * vdev_error() function keeps track of whether we have seen an error yet, and
+ * prints out a header if its the first error we've seen.
+ */
+boolean_t error_seen;
+boolean_t is_force;
+
+/*PRINTFLIKE1*/
+static void
+vdev_error(const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!error_seen) {
+		(void) fprintf(stderr, gettext("invalid vdev specification\n"));
+		if (!is_force)
+			(void) fprintf(stderr, gettext("use '-f' to override "
+			    "the following errors:\n"));
+		else
+			(void) fprintf(stderr, gettext("the following errors "
+			    "must be manually repaired:\n"));
+		error_seen = B_TRUE;
+	}
+
+	va_start(ap, fmt);
+	(void) vfprintf(stderr, fmt, ap);
+	va_end(ap);
+}
+
+/*
+ * Validate a GEOM provider.
+ */
+static int
+check_provider(const char *name, boolean_t force, boolean_t isspare)
+{
+	struct gmesh mesh;
+	struct gclass *mp;
+	struct ggeom *gp;
+	struct gprovider *pp;
+	int rv;
+
+	/* XXX: What to do with isspare? */
+
+	if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+		name += sizeof(_PATH_DEV) - 1;
+
+	rv = geom_gettree(&mesh);
+	assert(rv == 0);
+
+	pp = NULL;
+	LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
+		LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
+			LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+				if (strcmp(pp->lg_name, name) == 0)
+					goto out;
+			}
+		}
+	}
+out:
+	rv = -1;
+	if (pp == NULL)
+		vdev_error("no such provider %s\n", name);
+	else {
+		int acr, acw, ace;
+
+		VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
+		if (acw == 0 && ace == 0)
+			rv = 0;
+		else
+			vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
+	}
+	geom_deletetree(&mesh);
+	return (rv);
+}
+
+static boolean_t
+is_provider(const char *name)
+{
+	int fd;
+
+	fd = g_open(name, 0);
+	if (fd >= 0) {
+		g_close(fd);
+		return (B_TRUE);
+	}
+	return (B_FALSE);
+
+}
+/*
+ * Create a leaf vdev.  Determine if this is a GEOM provider.
+ * Valid forms for a leaf vdev are:
+ *
+ * 	/dev/xxx	Complete path to a GEOM provider
+ * 	xxx		Shorthand for /dev/xxx
+ */
+nvlist_t *
+make_leaf_vdev(const char *arg)
+{
+	char ident[DISK_IDENT_SIZE], path[MAXPATHLEN];
+	struct stat64 statbuf;
+	nvlist_t *vdev = NULL;
+	char *type = NULL;
+	boolean_t wholedisk = B_FALSE;
+
+	if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+		strlcpy(path, arg, sizeof (path));
+	else
+		snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
+
+	if (is_provider(path))
+		type = VDEV_TYPE_DISK;
+	else {
+		(void) fprintf(stderr, gettext("cannot use '%s': must be a "
+		    "GEOM provider\n"), path);
+		return (NULL);
+	}
+
+	/*
+	 * Finally, we have the complete device or file, and we know that it is
+	 * acceptable to use.  Construct the nvlist to describe this vdev.  All
+	 * vdevs have a 'path' element, and devices also have a 'devid' element.
+	 */
+	verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
+	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
+	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
+	if (strcmp(type, VDEV_TYPE_DISK) == 0)
+		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
+		    (uint64_t)B_FALSE) == 0);
+
+	/*
+	 * For a whole disk, defer getting its devid until after labeling it.
+	 */
+	if (1 || (S_ISBLK(statbuf.st_mode) && !wholedisk)) {
+		/*
+		 * Get the devid for the device.
+		 */
+		int fd;
+		ddi_devid_t devid;
+		char *minor = NULL, *devid_str = NULL;
+
+		if ((fd = open(path, O_RDONLY)) < 0) {
+			(void) fprintf(stderr, gettext("cannot open '%s': "
+			    "%s\n"), path, strerror(errno));
+			nvlist_free(vdev);
+			return (NULL);
+		}
+
+		if (devid_get(fd, &devid) == 0) {
+			if (devid_get_minor_name(fd, &minor) == 0 &&
+			    (devid_str = devid_str_encode(devid, minor)) !=
+			    NULL) {
+				verify(nvlist_add_string(vdev,
+				    ZPOOL_CONFIG_DEVID, devid_str) == 0);
+			}
+			if (devid_str != NULL)
+				devid_str_free(devid_str);
+			if (minor != NULL)
+				devid_str_free(minor);
+			devid_free(devid);
+		}
+
+		(void) close(fd);
+	}
+
+	return (vdev);
+}
+
+/*
+ * Go through and verify the replication level of the pool is consistent.
+ * Performs the following checks:
+ *
+ * 	For the new spec, verifies that devices in mirrors and raidz are the
+ * 	same size.
+ *
+ * 	If the current configuration already has inconsistent replication
+ * 	levels, ignore any other potential problems in the new spec.
+ *
+ * 	Otherwise, make sure that the current spec (if there is one) and the new
+ * 	spec have consistent replication levels.
+ */
+typedef struct replication_level {
+	char *zprl_type;
+	uint64_t zprl_children;
+	uint64_t zprl_parity;
+} replication_level_t;
+
+/*
+ * Given a list of toplevel vdevs, return the current replication level.  If
+ * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
+ * an error message will be displayed for each self-inconsistent vdev.
+ */
+replication_level_t *
+get_replication(nvlist_t *nvroot, boolean_t fatal)
+{
+	nvlist_t **top;
+	uint_t t, toplevels;
+	nvlist_t **child;
+	uint_t c, children;
+	nvlist_t *nv;
+	char *type;
+	replication_level_t lastrep, rep, *ret;
+	boolean_t dontreport;
+
+	ret = safe_malloc(sizeof (replication_level_t));
+
+	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &top, &toplevels) == 0);
+
+	lastrep.zprl_type = NULL;
+	for (t = 0; t < toplevels; t++) {
+		nv = top[t];
+
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+
+		if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+		    &child, &children) != 0) {
+			/*
+			 * This is a 'file' or 'disk' vdev.
+			 */
+			rep.zprl_type = type;
+			rep.zprl_children = 1;
+			rep.zprl_parity = 0;
+		} else {
+			uint64_t vdev_size;
+
+			/*
+			 * This is a mirror or RAID-Z vdev.  Go through and make
+			 * sure the contents are all the same (files vs. disks),
+			 * keeping track of the number of elements in the
+			 * process.
+			 *
+			 * We also check that the size of each vdev (if it can
+			 * be determined) is the same.
+			 */
+			rep.zprl_type = type;
+			rep.zprl_children = 0;
+
+			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+				verify(nvlist_lookup_uint64(nv,
+				    ZPOOL_CONFIG_NPARITY,
+				    &rep.zprl_parity) == 0);
+				assert(rep.zprl_parity != 0);
+			} else {
+				rep.zprl_parity = 0;
+			}
+
+			/*
+			 * The 'dontreport' variable indicatest that we've
+			 * already reported an error for this spec, so don't
+			 * bother doing it again.
+			 */
+			type = NULL;
+			dontreport = 0;
+			vdev_size = -1ULL;
+			for (c = 0; c < children; c++) {
+				nvlist_t *cnv = child[c];
+				char *path;
+				struct stat64 statbuf;
+				uint64_t size = -1ULL;
+				char *childtype;
+				int fd, err;
+
+				rep.zprl_children++;
+
+				verify(nvlist_lookup_string(cnv,
+				    ZPOOL_CONFIG_TYPE, &childtype) == 0);
+
+				/*
+				 * If this is a a replacing or spare vdev, then
+				 * get the real first child of the vdev.
+				 */
+				if (strcmp(childtype,
+				    VDEV_TYPE_REPLACING) == 0 ||
+				    strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
+					nvlist_t **rchild;
+					uint_t rchildren;
+
+					verify(nvlist_lookup_nvlist_array(cnv,
+					    ZPOOL_CONFIG_CHILDREN, &rchild,
+					    &rchildren) == 0);
+					assert(rchildren == 2);
+					cnv = rchild[0];
+
+					verify(nvlist_lookup_string(cnv,
+					    ZPOOL_CONFIG_TYPE,
+					    &childtype) == 0);
+				}
+
+				verify(nvlist_lookup_string(cnv,
+				    ZPOOL_CONFIG_PATH, &path) == 0);
+
+				/*
+				 * If we have a raidz/mirror that combines disks
+				 * with files, report it as an error.
+				 */
+				if (!dontreport && type != NULL &&
+				    strcmp(type, childtype) != 0) {
+					if (ret != NULL)
+						free(ret);
+					ret = NULL;
+					if (fatal)
+						vdev_error(gettext(
+						    "mismatched replication "
+						    "level: %s contains both "
+						    "files and devices\n"),
+						    rep.zprl_type);
+					else
+						return (NULL);
+					dontreport = B_TRUE;
+				}
+
+				/*
+				 * According to stat(2), the value of 'st_size'
+				 * is undefined for block devices and character
+				 * devices.  But there is no effective way to
+				 * determine the real size in userland.
+				 *
+				 * Instead, we'll take advantage of an
+				 * implementation detail of spec_size().  If the
+				 * device is currently open, then we (should)
+				 * return a valid size.
+				 *
+				 * If we still don't get a valid size (indicated
+				 * by a size of 0 or MAXOFFSET_T), then ignore
+				 * this device altogether.
+				 */
+				if ((fd = open(path, O_RDONLY)) >= 0) {
+					err = fstat64(fd, &statbuf);
+					(void) close(fd);
+				} else {
+					err = stat64(path, &statbuf);
+				}
+
+				if (err != 0 || statbuf.st_size == 0)
+					continue;
+
+				size = statbuf.st_size;
+
+				/*
+				 * Also check the size of each device.  If they
+				 * differ, then report an error.
+				 */
+				if (!dontreport && vdev_size != -1ULL &&
+				    size != vdev_size) {
+					if (ret != NULL)
+						free(ret);
+					ret = NULL;
+					if (fatal)
+						vdev_error(gettext(
+						    "%s contains devices of "
+						    "different sizes\n"),
+						    rep.zprl_type);
+					else
+						return (NULL);
+					dontreport = B_TRUE;
+				}
+
+				type = childtype;
+				vdev_size = size;
+			}
+		}
+
+		/*
+		 * At this point, we have the replication of the last toplevel
+		 * vdev in 'rep'.  Compare it to 'lastrep' to see if its
+		 * different.
+		 */
+		if (lastrep.zprl_type != NULL) {
+			if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
+				if (ret != NULL)
+					free(ret);
+				ret = NULL;
+				if (fatal)
+					vdev_error(gettext(
+					    "mismatched replication level: "
+					    "both %s and %s vdevs are "
+					    "present\n"),
+					    lastrep.zprl_type, rep.zprl_type);
+				else
+					return (NULL);
+			} else if (lastrep.zprl_parity != rep.zprl_parity) {
+				if (ret)
+					free(ret);
+				ret = NULL;
+				if (fatal)
+					vdev_error(gettext(
+					    "mismatched replication level: "
+					    "both %llu and %llu device parity "
+					    "%s vdevs are present\n"),
+					    lastrep.zprl_parity,
+					    rep.zprl_parity,
+					    rep.zprl_type);
+				else
+					return (NULL);
+			} else if (lastrep.zprl_children != rep.zprl_children) {
+				if (ret)
+					free(ret);
+				ret = NULL;
+				if (fatal)
+					vdev_error(gettext(
+					    "mismatched replication level: "
+					    "both %llu-way and %llu-way %s "
+					    "vdevs are present\n"),
+					    lastrep.zprl_children,
+					    rep.zprl_children,
+					    rep.zprl_type);
+				else
+					return (NULL);
+			}
+		}
+		lastrep = rep;
+	}
+
+	if (ret != NULL)
+		*ret = rep;
+
+	return (ret);
+}
+
+/*
+ * Check the replication level of the vdev spec against the current pool.  Calls
+ * get_replication() to make sure the new spec is self-consistent.  If the pool
+ * has a consistent replication level, then we ignore any errors.  Otherwise,
+ * report any difference between the two.
+ */
+int
+check_replication(nvlist_t *config, nvlist_t *newroot)
+{
+	replication_level_t *current = NULL, *new;
+	int ret;
+
+	/*
+	 * If we have a current pool configuration, check to see if it's
+	 * self-consistent.  If not, simply return success.
+	 */
+	if (config != NULL) {
+		nvlist_t *nvroot;
+
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if ((current = get_replication(nvroot, B_FALSE)) == NULL)
+			return (0);
+	}
+
+	/*
+	 * Get the replication level of the new vdev spec, reporting any
+	 * inconsistencies found.
+	 */
+	if ((new = get_replication(newroot, B_TRUE)) == NULL) {
+		free(current);
+		return (-1);
+	}
+
+	/*
+	 * Check to see if the new vdev spec matches the replication level of
+	 * the current pool.
+	 */
+	ret = 0;
+	if (current != NULL) {
+		if (strcmp(current->zprl_type, new->zprl_type) != 0) {
+			vdev_error(gettext(
+			    "mismatched replication level: pool uses %s "
+			    "and new vdev is %s\n"),
+			    current->zprl_type, new->zprl_type);
+			ret = -1;
+		} else if (current->zprl_parity != new->zprl_parity) {
+			vdev_error(gettext(
+			    "mismatched replication level: pool uses %llu "
+			    "device parity and new vdev uses %llu\n"),
+			    current->zprl_parity, new->zprl_parity);
+			ret = -1;
+		} else if (current->zprl_children != new->zprl_children) {
+			vdev_error(gettext(
+			    "mismatched replication level: pool uses %llu-way "
+			    "%s and new vdev uses %llu-way %s\n"),
+			    current->zprl_children, current->zprl_type,
+			    new->zprl_children, new->zprl_type);
+			ret = -1;
+		}
+	}
+
+	free(new);
+	if (current != NULL)
+		free(current);
+
+	return (ret);
+}
+
+/*
+ * Determine if the given path is a hot spare within the given configuration.
+ */
+static boolean_t
+is_spare(nvlist_t *config, const char *path)
+{
+	int fd;
+	pool_state_t state;
+	char *name = NULL;
+	nvlist_t *label;
+	uint64_t guid, spareguid;
+	nvlist_t *nvroot;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	boolean_t inuse;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (B_FALSE);
+
+	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
+	    !inuse ||
+	    state != POOL_STATE_SPARE ||
+	    zpool_read_label(fd, &label) != 0) {
+		free(name);
+		(void) close(fd);
+		return (B_FALSE);
+	}
+	free(name);
+
+	(void) close(fd);
+	verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
+	nvlist_free(label);
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			verify(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &spareguid) == 0);
+			if (spareguid == guid)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Go through and find any devices that are in use.  We rely on libdiskmgt for
+ * the majority of this task.
+ */
+int
+check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
+    int isspare)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *type, *path;
+	int ret;
+	char buf[MAXPATHLEN];
+	uint64_t wholedisk;
+
+	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0) {
+
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+
+		/*
+		 * As a generic check, we look to see if this is a replace of a
+		 * hot spare within the same pool.  If so, we allow it
+		 * regardless of what libdiskmgt or zpool_in_use() says.
+		 */
+		if (isreplacing) {
+			(void) strlcpy(buf, path, sizeof (buf));
+			if (is_spare(config, buf))
+				return (0);
+		}
+
+		if (strcmp(type, VDEV_TYPE_DISK) == 0)
+			ret = check_provider(path, force, isspare);
+
+		return (ret);
+	}
+
+	for (c = 0; c < children; c++)
+		if ((ret = check_in_use(config, child[c], force,
+		    isreplacing, B_FALSE)) != 0)
+			return (ret);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = check_in_use(config, child[c], force,
+			    isreplacing, B_TRUE)) != 0)
+				return (ret);
+
+	return (0);
+}
+
+const char *
+is_grouping(const char *type, int *mindev)
+{
+	if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
+		if (mindev != NULL)
+			*mindev = 2;
+		return (VDEV_TYPE_RAIDZ);
+	}
+
+	if (strcmp(type, "raidz2") == 0) {
+		if (mindev != NULL)
+			*mindev = 3;
+		return (VDEV_TYPE_RAIDZ);
+	}
+
+	if (strcmp(type, "mirror") == 0) {
+		if (mindev != NULL)
+			*mindev = 2;
+		return (VDEV_TYPE_MIRROR);
+	}
+
+	if (strcmp(type, "spare") == 0) {
+		if (mindev != NULL)
+			*mindev = 1;
+		return (VDEV_TYPE_SPARE);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Construct a syntactically valid vdev specification,
+ * and ensure that all devices and files exist and can be opened.
+ * Note: we don't bother freeing anything in the error paths
+ * because the program is just going to exit anyway.
+ */
+nvlist_t *
+construct_spec(int argc, char **argv)
+{
+	nvlist_t *nvroot, *nv, **top, **spares;
+	int t, toplevels, mindev, nspares;
+	const char *type;
+
+	top = NULL;
+	toplevels = 0;
+	spares = NULL;
+	nspares = 0;
+
+	while (argc > 0) {
+		nv = NULL;
+
+		/*
+		 * If it's a mirror or raidz, the subsequent arguments are
+		 * its leaves -- until we encounter the next mirror or raidz.
+		 */
+		if ((type = is_grouping(argv[0], &mindev)) != NULL) {
+			nvlist_t **child = NULL;
+			int c, children = 0;
+
+			if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+			    spares != NULL) {
+				(void) fprintf(stderr, gettext("invalid vdev "
+				    "specification: 'spare' can be "
+				    "specified only once\n"));
+				return (NULL);
+			}
+
+			for (c = 1; c < argc; c++) {
+				if (is_grouping(argv[c], NULL) != NULL)
+					break;
+				children++;
+				child = realloc(child,
+				    children * sizeof (nvlist_t *));
+				if (child == NULL)
+					zpool_no_memory();
+				if ((nv = make_leaf_vdev(argv[c])) == NULL)
+					return (NULL);
+				child[children - 1] = nv;
+			}
+
+			if (children < mindev) {
+				(void) fprintf(stderr, gettext("invalid vdev "
+				    "specification: %s requires at least %d "
+				    "devices\n"), argv[0], mindev);
+				return (NULL);
+			}
+
+			argc -= c;
+			argv += c;
+
+			if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
+				spares = child;
+				nspares = children;
+				continue;
+			} else {
+				verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
+				    0) == 0);
+				verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
+				    type) == 0);
+				if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+					verify(nvlist_add_uint64(nv,
+					    ZPOOL_CONFIG_NPARITY,
+					    mindev - 1) == 0);
+				}
+				verify(nvlist_add_nvlist_array(nv,
+				    ZPOOL_CONFIG_CHILDREN, child,
+				    children) == 0);
+
+				for (c = 0; c < children; c++)
+					nvlist_free(child[c]);
+				free(child);
+			}
+		} else {
+			/*
+			 * We have a device.  Pass off to make_leaf_vdev() to
+			 * construct the appropriate nvlist describing the vdev.
+			 */
+			if ((nv = make_leaf_vdev(argv[0])) == NULL)
+				return (NULL);
+			argc--;
+			argv++;
+		}
+
+		toplevels++;
+		top = realloc(top, toplevels * sizeof (nvlist_t *));
+		if (top == NULL)
+			zpool_no_memory();
+		top[toplevels - 1] = nv;
+	}
+
+	if (toplevels == 0 && nspares == 0) {
+		(void) fprintf(stderr, gettext("invalid vdev "
+		    "specification: at least one toplevel vdev must be "
+		    "specified\n"));
+		return (NULL);
+	}
+
+	/*
+	 * Finally, create nvroot and add all top-level vdevs to it.
+	 */
+	verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
+	verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+	    VDEV_TYPE_ROOT) == 0);
+	verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    top, toplevels) == 0);
+	if (nspares != 0)
+		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    spares, nspares) == 0);
+
+	for (t = 0; t < toplevels; t++)
+		nvlist_free(top[t]);
+	for (t = 0; t < nspares; t++)
+		nvlist_free(spares[t]);
+	if (spares)
+		free(spares);
+	free(top);
+
+	return (nvroot);
+}
+
+/*
+ * Get and validate the contents of the given vdev specification.  This ensures
+ * that the nvlist returned is well-formed, that all the devices exist, and that
+ * they are not currently in use by any other known consumer.  The 'poolconfig'
+ * parameter is the current configuration of the pool when adding devices
+ * existing pool, and is used to perform additional checks, such as changing the
+ * replication level of the pool.  It can be 'NULL' to indicate that this is a
+ * new pool.  The 'force' flag controls whether devices should be forcefully
+ * added, even if they appear in use.
+ */
+nvlist_t *
+make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
+    boolean_t isreplacing, int argc, char **argv)
+{
+	nvlist_t *newroot;
+
+	is_force = force;
+
+	/*
+	 * Construct the vdev specification.  If this is successful, we know
+	 * that we have a valid specification, and that all devices can be
+	 * opened.
+	 */
+	if ((newroot = construct_spec(argc, argv)) == NULL)
+		return (NULL);
+
+	/*
+	 * Validate each device to make sure that its not shared with another
+	 * subsystem.  We do this even if 'force' is set, because there are some
+	 * uses (such as a dedicated dump device) that even '-f' cannot
+	 * override.
+	 */
+	if (check_in_use(poolconfig, newroot, force, isreplacing,
+	    B_FALSE) != 0) {
+		nvlist_free(newroot);
+		return (NULL);
+	}
+
+	/*
+	 * Check the replication level of the given vdevs and report any errors
+	 * found.  We include the existing pool spec, if any, as we need to
+	 * catch changes against the existing replication level.
+	 */
+	if (check_rep && check_replication(poolconfig, newroot) != 0) {
+		nvlist_free(newroot);
+		return (NULL);
+	}
+
+	return (newroot);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	ZPOOL_UTIL_H
+#define	ZPOOL_UTIL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libnvpair.h>
+#include <libzfs.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Basic utility functions
+ */
+void *safe_malloc(size_t);
+char *safe_strdup(const char *);
+void zpool_no_memory(void);
+
+/*
+ * Virtual device functions
+ */
+nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
+    boolean_t isreplace, int argc, char **argv);
+
+/*
+ * Pool list functions
+ */
+int for_each_pool(int, char **, boolean_t unavail, zpool_proplist_t **,
+    zpool_iter_f, void *);
+
+typedef struct zpool_list zpool_list_t;
+
+zpool_list_t *pool_list_get(int, char **, zpool_proplist_t **, int *);
+void pool_list_update(zpool_list_t *);
+int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
+void pool_list_free(zpool_list_t *);
+int pool_list_count(zpool_list_t *);
+void pool_list_remove(zpool_list_t *, zpool_handle_t *);
+
+libzfs_handle_t *g_zfs;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* ZPOOL_UTIL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
@@ -0,0 +1,3855 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <fcntl.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/zap.h>
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
+
+/*
+ * Given a single type (not a mask of types), return the type in a human
+ * readable form.
+ */
+const char *
+zfs_type_to_name(zfs_type_t type)
+{
+	switch (type) {
+	case ZFS_TYPE_FILESYSTEM:
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	case ZFS_TYPE_SNAPSHOT:
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	case ZFS_TYPE_VOLUME:
+		return (dgettext(TEXT_DOMAIN, "volume"));
+	}
+
+	return (NULL);
+}
+
+/*
+ * Given a path and mask of ZFS types, return a string describing this dataset.
+ * This is used when we fail to open a dataset and we cannot get an exact type.
+ * We guess what the type would have been based on the path and the mask of
+ * acceptable types.
+ */
+static const char *
+path_to_str(const char *path, int types)
+{
+	/*
+	 * When given a single type, always report the exact type.
+	 */
+	if (types == ZFS_TYPE_SNAPSHOT)
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	if (types == ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	if (types == ZFS_TYPE_VOLUME)
+		return (dgettext(TEXT_DOMAIN, "volume"));
+
+	/*
+	 * The user is requesting more than one type of dataset.  If this is the
+	 * case, consult the path itself.  If we're looking for a snapshot, and
+	 * a '@' is found, then report it as "snapshot".  Otherwise, remove the
+	 * snapshot attribute and try again.
+	 */
+	if (types & ZFS_TYPE_SNAPSHOT) {
+		if (strchr(path, '@') != NULL)
+			return (dgettext(TEXT_DOMAIN, "snapshot"));
+		return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
+	}
+
+
+	/*
+	 * The user has requested either filesystems or volumes.
+	 * We have no way of knowing a priori what type this would be, so always
+	 * report it as "filesystem" or "volume", our two primitive types.
+	 */
+	if (types & ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+
+	assert(types & ZFS_TYPE_VOLUME);
+	return (dgettext(TEXT_DOMAIN, "volume"));
+}
+
+/*
+ * Validate a ZFS path.  This is used even before trying to open the dataset, to
+ * provide a more meaningful error message.  We place a more useful message in
+ * 'buf' detailing exactly why the name was not valid.
+ */
+static int
+zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type)
+{
+	namecheck_err_t why;
+	char what;
+
+	if (dataset_namecheck(path, &why, &what) != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is too long"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in name"), what);
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool doesn't begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "reserved disk name"));
+				break;
+			}
+		}
+
+		return (0);
+	}
+
+	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot delimiter '@' in filesystem name"));
+		return (0);
+	}
+
+	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing '@' delimiter in snapshot name"));
+		return (0);
+	}
+
+	return (-1);
+}
+
+int
+zfs_name_valid(const char *name, zfs_type_t type)
+{
+	return (zfs_validate_name(NULL, name, type));
+}
+
+/*
+ * This function takes the raw DSL properties, and filters out the user-defined
+ * properties into a separate nvlist.
+ */
+static int
+process_user_props(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvpair_t *elem;
+	nvlist_t *propval;
+
+	nvlist_free(zhp->zfs_user_props);
+
+	if (nvlist_alloc(&zhp->zfs_user_props, NV_UNIQUE_NAME, 0) != 0)
+		return (no_memory(hdl));
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
+		if (!zfs_prop_user(nvpair_name(elem)))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propval) == 0);
+		if (nvlist_add_nvlist(zhp->zfs_user_props,
+		    nvpair_name(elem), propval) != 0)
+			return (no_memory(hdl));
+	}
+
+	return (0);
+}
+
+/*
+ * Utility function to gather stats (objset and zpl) for the given object.
+ */
+static int
+get_stats(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
+
+	(void) strlcpy(zhp->zfs_root, zc.zc_value, sizeof (zhp->zfs_root));
+
+	if (zhp->zfs_props) {
+		nvlist_free(zhp->zfs_props);
+		zhp->zfs_props = NULL;
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zfs_props) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	if (process_user_props(zhp) != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Refresh the properties currently stored in the handle.
+ */
+void
+zfs_refresh_properties(zfs_handle_t *zhp)
+{
+	(void) get_stats(zhp);
+}
+
+/*
+ * Makes a handle from the given dataset name.  Used by zfs_open() and
+ * zfs_iter_* to create child handles on the fly.
+ */
+zfs_handle_t *
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
+{
+	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+
+	if (zhp == NULL)
+		return (NULL);
+
+	zhp->zfs_hdl = hdl;
+
+top:
+	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
+
+	if (get_stats(zhp) != 0) {
+		free(zhp);
+		return (NULL);
+	}
+
+	if (zhp->zfs_dmustats.dds_inconsistent) {
+		zfs_cmd_t zc = { 0 };
+
+		/*
+		 * If it is dds_inconsistent, then we've caught it in
+		 * the middle of a 'zfs receive' or 'zfs destroy', and
+		 * it is inconsistent from the ZPL's point of view, so
+		 * can't be mounted.  However, it could also be that we
+		 * have crashed in the middle of one of those
+		 * operations, in which case we need to get rid of the
+		 * inconsistent state.  We do that by either rolling
+		 * back to the previous snapshot (which will fail if
+		 * there is none), or destroying the filesystem.  Note
+		 * that if we are still in the middle of an active
+		 * 'receive' or 'destroy', then the rollback and destroy
+		 * will fail with EBUSY and we will drive on as usual.
+		 */
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
+			(void) zvol_remove_link(hdl, zhp->zfs_name);
+			zc.zc_objset_type = DMU_OST_ZVOL;
+		} else {
+			zc.zc_objset_type = DMU_OST_ZFS;
+		}
+
+		/* If we can successfully roll it back, reget the stats */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
+			goto top;
+		/*
+		 * If we can sucessfully destroy it, pretend that it
+		 * never existed.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
+			free(zhp);
+			errno = ENOENT;
+			return (NULL);
+		}
+	}
+
+	/*
+	 * We've managed to open the dataset and gather statistics.  Determine
+	 * the high-level type.
+	 */
+	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();
+
+	if (zhp->zfs_dmustats.dds_is_snapshot)
+		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();	/* we should never see any other types */
+
+	return (zhp);
+}
+
+/*
+ * Opens the given snapshot, filesystem, or volume.   The 'types'
+ * argument is a mask of acceptable types.  The function will print an
+ * appropriate error message and return NULL if it can't be opened.
+ */
+zfs_handle_t *
+zfs_open(libzfs_handle_t *hdl, const char *path, int types)
+{
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
+
+	/*
+	 * Validate the name before we even try to open it.
+	 */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_ANY)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+		return (NULL);
+	}
+
+	/*
+	 * Try to get stats for the dataset, which will tell us if it exists.
+	 */
+	errno = 0;
+	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
+		(void) zfs_standard_error(hdl, errno, errbuf);
+		return (NULL);
+	}
+
+	if (!(types & zhp->zfs_type)) {
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Release a ZFS handle.  Nothing to do but free the associated memory.
+ */
+void
+zfs_close(zfs_handle_t *zhp)
+{
+	if (zhp->zfs_mntopts)
+		free(zhp->zfs_mntopts);
+	nvlist_free(zhp->zfs_props);
+	nvlist_free(zhp->zfs_user_props);
+	free(zhp);
+}
+
+/*
+ * Given a numeric suffix, convert the value into a number of bits that the
+ * resulting value must be shifted.
+ */
+static int
+str2shift(libzfs_handle_t *hdl, const char *buf)
+{
+	const char *ends = "BKMGTPEZ";
+	int i;
+
+	if (buf[0] == '\0')
+		return (0);
+	for (i = 0; i < strlen(ends); i++) {
+		if (toupper(buf[0]) == ends[i])
+			break;
+	}
+	if (i == strlen(ends)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid numeric suffix '%s'"), buf);
+		return (-1);
+	}
+
+	/*
+	 * We want to allow trailing 'b' characters for 'GB' or 'Mb'.  But don't
+	 * allow 'BB' - that's just weird.
+	 */
+	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
+	    toupper(buf[0]) != 'B'))
+		return (10*i);
+
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "invalid numeric suffix '%s'"), buf);
+	return (-1);
+}
+
+/*
+ * Convert a string of the form '100G' into a real number.  Used when setting
+ * properties or creating a volume.  'buf' is used to place an extended error
+ * message for the caller to use.
+ */
+static int
+nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
+{
+	char *end;
+	int shift;
+
+	*num = 0;
+
+	/* Check to see if this looks like a number.  */
+	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "bad numeric value '%s'"), value);
+		return (-1);
+	}
+
+	/* Rely on stroll() to process the numeric portion.  */
+	errno = 0;
+	*num = strtoll(value, &end, 10);
+
+	/*
+	 * Check for ERANGE, which indicates that the value is too large to fit
+	 * in a 64-bit value.
+	 */
+	if (errno == ERANGE) {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "numeric value is too large"));
+		return (-1);
+	}
+
+	/*
+	 * If we have a decimal value, then do the computation with floating
+	 * point arithmetic.  Otherwise, use standard arithmetic.
+	 */
+	if (*end == '.') {
+		double fval = strtod(value, &end);
+
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		fval *= pow(2, shift);
+
+		if (fval > UINT64_MAX) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num = (uint64_t)fval;
+	} else {
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		/* Check for overflow */
+		if (shift >= 64 || (*num << shift) >> shift != *num) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num <<= shift;
+	}
+
+	return (0);
+}
+
+int
+zfs_nicestrtonum(libzfs_handle_t *hdl, const char *str, uint64_t *val)
+{
+	return (nicestrtonum(hdl, str, val));
+}
+
+/*
+ * The prop_parse_*() functions are designed to allow flexibility in callers
+ * when setting properties.  At the DSL layer, all properties are either 64-bit
+ * numbers or strings.  We want the user to be able to ignore this fact and
+ * specify properties as native values (boolean, for example) or as strings (to
+ * simplify command line utilities).  This also handles converting index types
+ * (compression, checksum, etc) from strings to their on-disk index.
+ */
+
+static int
+prop_parse_boolean(libzfs_handle_t *hdl, nvpair_t *elem, uint64_t *val)
+{
+	uint64_t ret;
+
+	switch (nvpair_type(elem)) {
+	case DATA_TYPE_STRING:
+		{
+			char *value;
+			verify(nvpair_value_string(elem, &value) == 0);
+
+			if (strcmp(value, "on") == 0) {
+				ret = 1;
+			} else if (strcmp(value, "off") == 0) {
+				ret = 0;
+			} else {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' must be 'on' or 'off'"),
+				    nvpair_name(elem));
+				return (-1);
+			}
+			break;
+		}
+
+	case DATA_TYPE_UINT64:
+		{
+			verify(nvpair_value_uint64(elem, &ret) == 0);
+			if (ret > 1) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a boolean value"),
+				    nvpair_name(elem));
+				return (-1);
+			}
+			break;
+		}
+
+	case DATA_TYPE_BOOLEAN_VALUE:
+		{
+			boolean_t value;
+			verify(nvpair_value_boolean_value(elem, &value) == 0);
+			ret = value;
+			break;
+		}
+
+	default:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be a boolean value"),
+		    nvpair_name(elem));
+		return (-1);
+	}
+
+	*val = ret;
+	return (0);
+}
+
+static int
+prop_parse_number(libzfs_handle_t *hdl, nvpair_t *elem, zfs_prop_t prop,
+    uint64_t *val)
+{
+	uint64_t ret;
+	boolean_t isnone = B_FALSE;
+
+	switch (nvpair_type(elem)) {
+	case DATA_TYPE_STRING:
+		{
+			char *value;
+			(void) nvpair_value_string(elem, &value);
+			if (strcmp(value, "none") == 0) {
+				isnone = B_TRUE;
+				ret = 0;
+			} else if (nicestrtonum(hdl, value, &ret) != 0) {
+				return (-1);
+			}
+			break;
+		}
+
+	case DATA_TYPE_UINT64:
+		(void) nvpair_value_uint64(elem, &ret);
+		break;
+
+	default:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be a number"),
+		    nvpair_name(elem));
+		return (-1);
+	}
+
+	/*
+	 * Quota special: force 'none' and don't allow 0.
+	 */
+	if (ret == 0 && !isnone && prop == ZFS_PROP_QUOTA) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "use 'none' to disable quota"));
+		return (-1);
+	}
+
+	*val = ret;
+	return (0);
+}
+
+static int
+prop_parse_index(libzfs_handle_t *hdl, nvpair_t *elem, zfs_prop_t prop,
+    uint64_t *val)
+{
+	char *propname = nvpair_name(elem);
+	char *value;
+
+	if (nvpair_type(elem) != DATA_TYPE_STRING) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be a string"), propname);
+		return (-1);
+	}
+
+	(void) nvpair_value_string(elem, &value);
+
+	if (zfs_prop_string_to_index(prop, value, val) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be one of '%s'"), propname,
+		    zfs_prop_values(prop));
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Check if the bootfs name has the same pool name as it is set to.
+ * Assuming bootfs is a valid dataset name.
+ */
+static boolean_t
+bootfs_poolname_valid(char *pool, char *bootfs)
+{
+	char ch, *pname;
+
+	/* get the pool name from the bootfs name */
+	pname = bootfs;
+	while (*bootfs && !isspace(*bootfs) && *bootfs != '/')
+		bootfs++;
+
+	ch = *bootfs;
+	*bootfs = 0;
+
+	if (strcmp(pool, pname) == 0) {
+		*bootfs = ch;
+		return (B_TRUE);
+	}
+
+	*bootfs = ch;
+	return (B_FALSE);
+}
+
+/*
+ * Given an nvlist of properties to set, validates that they are correct, and
+ * parses any numeric properties (index, boolean, etc) if they are specified as
+ * strings.
+ */
+nvlist_t *
+zfs_validate_properties(libzfs_handle_t *hdl, zfs_type_t type, char *pool_name,
+    nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
+{
+	nvpair_t *elem;
+	const char *propname;
+	zfs_prop_t prop;
+	uint64_t intval;
+	char *strval;
+	nvlist_t *ret;
+	int isuser;
+
+	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	if (type == ZFS_TYPE_SNAPSHOT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot properties cannot be modified"));
+		(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+		goto error;
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+		propname = nvpair_name(elem);
+
+		/*
+		 * Make sure this property is valid and applies to this type.
+		 */
+		if ((prop = zfs_name_to_prop_common(propname, type))
+		    == ZFS_PROP_INVAL) {
+			isuser = zfs_prop_user(propname);
+			if (!isuser || (isuser && (type & ZFS_TYPE_POOL))) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid property '%s'"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			} else {
+				/*
+				 * If this is a user property, make sure it's a
+				 * string, and that it's less than
+				 * ZAP_MAXNAMELEN.
+				 */
+				if (nvpair_type(elem) != DATA_TYPE_STRING) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' must be a string"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (strlen(nvpair_name(elem)) >=
+				    ZAP_MAXNAMELEN) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "property name '%s' is too long"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+			}
+
+			(void) nvpair_value_string(elem, &strval);
+			if (nvlist_add_string(ret, propname, strval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+			continue;
+		}
+
+		/*
+		 * Normalize the name, to get rid of shorthand abbrevations.
+		 */
+		propname = zfs_prop_to_name(prop);
+
+		if (!zfs_prop_valid_for_type(prop, type)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' does not "
+			    "apply to datasets of this type"), propname);
+			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+			goto error;
+		}
+
+		if (zfs_prop_readonly(prop) &&
+		    (prop != ZFS_PROP_VOLBLOCKSIZE || zhp != NULL)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
+			    propname);
+			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
+			goto error;
+		}
+
+		/*
+		 * Convert any properties to the internal DSL value types.
+		 */
+		strval = NULL;
+		switch (zfs_prop_get_type(prop)) {
+		case prop_type_boolean:
+			if (prop_parse_boolean(hdl, elem, &intval) != 0) {
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case prop_type_string:
+			if (nvpair_type(elem) != DATA_TYPE_STRING) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a string"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			(void) nvpair_value_string(elem, &strval);
+			if (strlen(strval) >= ZFS_MAXPROPLEN) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is too long"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case prop_type_number:
+			if (prop_parse_number(hdl, elem, prop, &intval) != 0) {
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case prop_type_index:
+			if (prop_parse_index(hdl, elem, prop, &intval) != 0) {
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		default:
+			abort();
+		}
+
+		/*
+		 * Add the result to our return set of properties.
+		 */
+		if (strval) {
+			if (nvlist_add_string(ret, propname, strval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+		} else if (nvlist_add_uint64(ret, propname, intval) != 0) {
+			(void) no_memory(hdl);
+			goto error;
+		}
+
+		/*
+		 * Perform some additional checks for specific properties.
+		 */
+		switch (prop) {
+		case ZFS_PROP_RECORDSIZE:
+		case ZFS_PROP_VOLBLOCKSIZE:
+			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
+			if (intval < SPA_MINBLOCKSIZE ||
+			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be power of 2 from %u "
+				    "to %uk"), propname,
+				    (uint_t)SPA_MINBLOCKSIZE,
+				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZFS_PROP_SHAREISCSI:
+			if (strcmp(strval, "off") != 0 &&
+			    strcmp(strval, "on") != 0 &&
+			    strcmp(strval, "type=disk") != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be 'on', 'off', or 'type=disk'"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			break;
+
+		case ZFS_PROP_MOUNTPOINT:
+			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
+			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
+				break;
+
+			if (strval[0] != '/') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be an absolute path, "
+				    "'none', or 'legacy'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			/*FALLTHRU*/
+
+		case ZFS_PROP_SHARENFS:
+			/*
+			 * For the mountpoint and sharenfs properties, check if
+			 * it can be set in a global/non-global zone based on
+			 * the zoned property value:
+			 *
+			 *		global zone	    non-global zone
+			 * --------------------------------------------------
+			 * zoned=on	mountpoint (no)	    mountpoint (yes)
+			 *		sharenfs (no)	    sharenfs (no)
+			 *
+			 * zoned=off	mountpoint (yes)	N/A
+			 *		sharenfs (yes)
+			 */
+			if (zoned) {
+				if (getzoneid() == GLOBAL_ZONEID) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set on "
+					    "dataset in a non-global zone"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				} else if (prop == ZFS_PROP_SHARENFS) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set in "
+					    "a non-global zone"), propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				}
+			} else if (getzoneid() != GLOBAL_ZONEID) {
+				/*
+				 * If zoned property is 'off', this must be in
+				 * a globle zone. If not, something is wrong.
+				 */
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' cannot be set while dataset "
+				    "'zoned' property is set"), propname);
+				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+				goto error;
+			}
+
+			break;
+
+		case ZFS_PROP_BOOTFS:
+			/*
+			 * bootfs property value has to be a dataset name and
+			 * the dataset has to be in the same pool as it sets to.
+			 */
+			if (strval[0] != '\0' && (!zfs_name_valid(strval,
+			    ZFS_TYPE_FILESYSTEM) || !bootfs_poolname_valid(
+			    pool_name, strval))) {
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
+				    "is an invalid name"), strval);
+				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+				goto error;
+			}
+			break;
+		}
+
+		/*
+		 * For changes to existing volumes, we have some additional
+		 * checks to enforce.
+		 */
+		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
+			uint64_t volsize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLSIZE);
+			uint64_t blocksize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLBLOCKSIZE);
+			char buf[64];
+
+			switch (prop) {
+			case ZFS_PROP_RESERVATION:
+				if (intval > volsize) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' is greater than current "
+					    "volume size"), propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				if (intval % blocksize != 0) {
+					zfs_nicenum(blocksize, buf,
+					    sizeof (buf));
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' must be a multiple of "
+					    "volume block size (%s)"),
+					    propname, buf);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (intval == 0) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be zero"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If this is an existing volume, and someone is setting the volsize,
+	 * make sure that it matches the reservation, or add it if necessary.
+	 */
+	if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
+	    nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+	    &intval) == 0) {
+		uint64_t old_volsize = zfs_prop_get_int(zhp,
+		    ZFS_PROP_VOLSIZE);
+		uint64_t old_reservation = zfs_prop_get_int(zhp,
+		    ZFS_PROP_RESERVATION);
+		uint64_t new_reservation;
+
+		if (old_volsize == old_reservation &&
+		    nvlist_lookup_uint64(ret,
+		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
+		    &new_reservation) != 0) {
+			if (nvlist_add_uint64(ret,
+			    zfs_prop_to_name(ZFS_PROP_RESERVATION),
+			    intval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+		}
+	}
+
+	return (ret);
+
+error:
+	nvlist_free(ret);
+	return (NULL);
+}
+
+/*
+ * Given a property name and value, set the property for the given dataset.
+ */
+int
+zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	prop_changelist_t *cl = NULL;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *nvl = NULL, *realprops;
+	zfs_prop_t prop;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zfs_name);
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+	    nvlist_add_string(nvl, propname, propval) != 0) {
+		(void) no_memory(hdl);
+		goto error;
+	}
+
+	if ((realprops = zfs_validate_properties(hdl, zhp->zfs_type, NULL, nvl,
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
+		goto error;
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	prop = zfs_name_to_prop(propname);
+
+	/* We don't support those properties on FreeBSD. */
+	switch (prop) {
+	case ZFS_PROP_SHAREISCSI:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_ACLMODE:
+	case ZFS_PROP_ACLINHERIT:
+	case ZFS_PROP_ISCSIOPTIONS:
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    "property '%s' not supported on FreeBSD", propname);
+		ret = zfs_error(hdl, EZFS_PERM, errbuf);
+		goto error;
+	}
+
+	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
+		goto error;
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	if ((ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvl, NULL) != 0)
+		goto error;
+
+	ret = ioctl(hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
+
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOSPC:
+			/*
+			 * For quotas and reservations, ENOSPC indicates
+			 * something different; setting a quota or reservation
+			 * doesn't use any disk space.
+			 */
+			switch (prop) {
+			case ZFS_PROP_QUOTA:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is less than current used or "
+				    "reserved space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			case ZFS_PROP_RESERVATION:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is greater than available space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			default:
+				(void) zfs_standard_error(hdl, errno, errbuf);
+				break;
+			}
+			break;
+
+		case EBUSY:
+			if (prop == ZFS_PROP_VOLBLOCKSIZE)
+				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
+			else
+				(void) zfs_standard_error(hdl, EBUSY, errbuf);
+			break;
+
+		case EROFS:
+			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to allow gzip compression"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+#ifdef _ILP32
+			if (prop == ZFS_PROP_VOLSIZE) {
+				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+				break;
+			}
+#endif
+			/* FALLTHROUGH */
+		default:
+			(void) zfs_standard_error(hdl, errno, errbuf);
+		}
+	} else {
+		/*
+		 * Refresh the statistics so the new property value
+		 * is reflected.
+		 */
+		if ((ret = changelist_postfix(cl)) == 0)
+			(void) get_stats(zhp);
+	}
+
+error:
+	nvlist_free(nvl);
+	zcmd_free_nvlists(&zc);
+	if (cl)
+		changelist_free(cl);
+	return (ret);
+}
+
+/*
+ * Given a property, inherit the value from the parent dataset.
+ */
+int
+zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	prop_changelist_t *cl;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	zfs_prop_t prop;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
+
+	if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) {
+		/*
+		 * For user properties, the amount of work we have to do is very
+		 * small, so just do it here.
+		 */
+		if (!zfs_prop_user(propname)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+		if (ioctl(zhp->zfs_hdl->libzfs_fd,
+		    ZFS_IOC_SET_PROP, &zc) != 0)
+			return (zfs_standard_error(hdl, errno, errbuf));
+
+		return (0);
+	}
+
+	/*
+	 * Verify that this property is inheritable.
+	 */
+	if (zfs_prop_readonly(prop))
+		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
+
+	if (!zfs_prop_inheritable(prop))
+		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
+
+	/*
+	 * Check to see if the value applies to this type
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
+
+	/*
+	 * Normalize the name, to get rid of shorthand abbrevations.
+	 */
+	propname = zfs_prop_to_name(prop);
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	/*
+	 * Determine datasets which will be affected by this change, if any.
+	 */
+	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
+		return (-1);
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	if ((ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd,
+	    ZFS_IOC_SET_PROP, &zc)) != 0) {
+		return (zfs_standard_error(hdl, errno, errbuf));
+	} else {
+
+		if ((ret = changelist_postfix(cl)) != 0)
+			goto error;
+
+		/*
+		 * Refresh the statistics so the new property is reflected.
+		 */
+		(void) get_stats(zhp);
+	}
+
+error:
+	changelist_free(cl);
+	return (ret);
+}
+
+void
+nicebool(int value, char *buf, size_t buflen)
+{
+	if (value)
+		(void) strlcpy(buf, "on", buflen);
+	else
+		(void) strlcpy(buf, "off", buflen);
+}
+
+/*
+ * True DSL properties are stored in an nvlist.  The following two functions
+ * extract them appropriately.
+ */
+static uint64_t
+getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	uint64_t value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZFS_PROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
+	} else {
+		value = zfs_prop_default_numeric(prop);
+		*source = "";
+	}
+
+	return (value);
+}
+
+static char *
+getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	char *value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_string(nv, ZFS_PROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
+	} else {
+		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
+			value = "";
+		*source = "";
+	}
+
+	return (value);
+}
+
+/*
+ * Internal function for getting a numeric property.  Both zfs_prop_get() and
+ * zfs_prop_get_int() are built using this interface.
+ *
+ * Certain properties can be overridden using 'mount -o'.  In this case, scan
+ * the contents of the /etc/mnttab entry, searching for the appropriate options.
+ * If they differ from the on-disk values, report the current values and mark
+ * the source "temporary".
+ */
+static int
+get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
+    char **source, uint64_t *val)
+{
+	struct mnttab mnt;
+	char *mntopt_on = NULL;
+	char *mntopt_off = NULL;
+
+	*source = NULL;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+		mntopt_on = MNTOPT_ATIME;
+		mntopt_off = MNTOPT_NOATIME;
+		break;
+
+	case ZFS_PROP_DEVICES:
+		mntopt_on = MNTOPT_DEVICES;
+		mntopt_off = MNTOPT_NODEVICES;
+		break;
+
+	case ZFS_PROP_EXEC:
+		mntopt_on = MNTOPT_EXEC;
+		mntopt_off = MNTOPT_NOEXEC;
+		break;
+
+	case ZFS_PROP_READONLY:
+		mntopt_on = MNTOPT_RO;
+		mntopt_off = MNTOPT_RW;
+		break;
+
+	case ZFS_PROP_SETUID:
+		mntopt_on = MNTOPT_SETUID;
+		mntopt_off = MNTOPT_NOSETUID;
+		break;
+
+	case ZFS_PROP_XATTR:
+		mntopt_on = MNTOPT_XATTR;
+		mntopt_off = MNTOPT_NOXATTR;
+		break;
+	}
+
+	/*
+	 * Because looking up the mount options is potentially expensive
+	 * (iterating over all of /etc/mnttab), we defer its calculation until
+	 * we're looking up a property which requires its presence.
+	 */
+	if (!zhp->zfs_mntcheck &&
+	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
+		struct mnttab entry, search = { 0 };
+		FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
+
+		search.mnt_special = (char *)zhp->zfs_name;
+		search.mnt_fstype = MNTTYPE_ZFS;
+		rewind(mnttab);
+
+		if (getmntany(mnttab, &entry, &search) == 0) {
+			zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
+			    entry.mnt_mntopts);
+			if (zhp->zfs_mntopts == NULL)
+				return (-1);
+		}
+
+		zhp->zfs_mntcheck = B_TRUE;
+	}
+
+	if (zhp->zfs_mntopts == NULL)
+		mnt.mnt_mntopts = "";
+	else
+		mnt.mnt_mntopts = zhp->zfs_mntopts;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_EXEC:
+	case ZFS_PROP_READONLY:
+	case ZFS_PROP_SETUID:
+	case ZFS_PROP_XATTR:
+		*val = getprop_uint64(zhp, prop, source);
+
+		if (hasmntopt(&mnt, mntopt_on) && !*val) {
+			*val = B_TRUE;
+			if (src)
+				*src = ZFS_SRC_TEMPORARY;
+		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
+			*val = B_FALSE;
+			if (src)
+				*src = ZFS_SRC_TEMPORARY;
+		}
+		break;
+
+	case ZFS_PROP_RECORDSIZE:
+	case ZFS_PROP_COMPRESSION:
+	case ZFS_PROP_ZONED:
+	case ZFS_PROP_CREATION:
+	case ZFS_PROP_COMPRESSRATIO:
+	case ZFS_PROP_REFERENCED:
+	case ZFS_PROP_USED:
+	case ZFS_PROP_CREATETXG:
+	case ZFS_PROP_AVAILABLE:
+	case ZFS_PROP_VOLSIZE:
+	case ZFS_PROP_VOLBLOCKSIZE:
+		*val = getprop_uint64(zhp, prop, source);
+		break;
+
+	case ZFS_PROP_CANMOUNT:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val == 0)
+			*source = zhp->zfs_name;
+		else
+			*source = "";	/* default */
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_RESERVATION:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val == 0)
+			*source = "";	/* default */
+		else
+			*source = zhp->zfs_name;
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		*val = (zhp->zfs_mntopts != NULL);
+		break;
+
+	case ZFS_PROP_NUMCLONES:
+		*val = zhp->zfs_dmustats.dds_num_clones;
+		break;
+
+	default:
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "cannot get non-numeric property"));
+		return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
+		    dgettext(TEXT_DOMAIN, "internal error")));
+	}
+
+	return (0);
+}
+
+/*
+ * Calculate the source type, given the raw source string.
+ */
+static void
+get_source(zfs_handle_t *zhp, zfs_source_t *srctype, char *source,
+    char *statbuf, size_t statlen)
+{
+	if (statbuf == NULL || *srctype == ZFS_SRC_TEMPORARY)
+		return;
+
+	if (source == NULL) {
+		*srctype = ZFS_SRC_NONE;
+	} else if (source[0] == '\0') {
+		*srctype = ZFS_SRC_DEFAULT;
+	} else {
+		if (strcmp(source, zhp->zfs_name) == 0) {
+			*srctype = ZFS_SRC_LOCAL;
+		} else {
+			(void) strlcpy(statbuf, source, statlen);
+			*srctype = ZFS_SRC_INHERITED;
+		}
+	}
+
+}
+
+/*
+ * Retrieve a property from the given object.  If 'literal' is specified, then
+ * numbers are left as exact values.  Otherwise, numbers are converted to a
+ * human-readable form.
+ *
+ * Returns 0 on success, or -1 on error.
+ */
+int
+zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
+    zfs_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
+{
+	char *source = NULL;
+	uint64_t val;
+	char *str;
+	const char *root;
+	const char *strval;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (-1);
+
+	if (src)
+		*src = ZFS_SRC_NONE;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+	case ZFS_PROP_READONLY:
+	case ZFS_PROP_SETUID:
+	case ZFS_PROP_ZONED:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_EXEC:
+	case ZFS_PROP_CANMOUNT:
+	case ZFS_PROP_XATTR:
+		/*
+		 * Basic boolean values are built on top of
+		 * get_numeric_property().
+		 */
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		nicebool(val, propbuf, proplen);
+
+		break;
+
+	case ZFS_PROP_AVAILABLE:
+	case ZFS_PROP_RECORDSIZE:
+	case ZFS_PROP_CREATETXG:
+	case ZFS_PROP_REFERENCED:
+	case ZFS_PROP_USED:
+	case ZFS_PROP_VOLSIZE:
+	case ZFS_PROP_VOLBLOCKSIZE:
+	case ZFS_PROP_NUMCLONES:
+		/*
+		 * Basic numeric values are built on top of
+		 * get_numeric_property().
+		 */
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		if (literal)
+			(void) snprintf(propbuf, proplen, "%llu",
+			    (u_longlong_t)val);
+		else
+			zfs_nicenum(val, propbuf, proplen);
+		break;
+
+	case ZFS_PROP_COMPRESSION:
+	case ZFS_PROP_CHECKSUM:
+	case ZFS_PROP_SNAPDIR:
+#ifdef	ZFS_NO_ACL
+	case ZFS_PROP_ACLMODE:
+	case ZFS_PROP_ACLINHERIT:
+	case ZFS_PROP_COPIES:
+		val = getprop_uint64(zhp, prop, &source);
+		verify(zfs_prop_index_to_string(prop, val, &strval) == 0);
+		(void) strlcpy(propbuf, strval, proplen);
+		break;
+#else	/* ZFS_NO_ACL */
+	case ZFS_PROP_ACLMODE:
+	case ZFS_PROP_ACLINHERIT:
+		(void) strlcpy(propbuf, "<unsupported>", proplen);
+		break;
+#endif	/* ZFS_NO_ACL */
+
+	case ZFS_PROP_CREATION:
+		/*
+		 * 'creation' is a time_t stored in the statistics.  We convert
+		 * this into a string unless 'literal' is specified.
+		 */
+		{
+			val = getprop_uint64(zhp, prop, &source);
+			time_t time = (time_t)val;
+			struct tm t;
+
+			if (literal ||
+			    localtime_r(&time, &t) == NULL ||
+			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
+			    &t) == 0)
+				(void) snprintf(propbuf, proplen, "%llu", val);
+		}
+		break;
+
+	case ZFS_PROP_MOUNTPOINT:
+		/*
+		 * Getting the precise mountpoint can be tricky.
+		 *
+		 *  - for 'none' or 'legacy', return those values.
+		 *  - for default mountpoints, construct it as /zfs/<dataset>
+		 *  - for inherited mountpoints, we want to take everything
+		 *    after our ancestor and append it to the inherited value.
+		 *
+		 * If the pool has an alternate root, we want to prepend that
+		 * root to any values we return.
+		 */
+		root = zhp->zfs_root;
+		str = getprop_string(zhp, prop, &source);
+
+		if (str[0] == '\0') {
+			(void) snprintf(propbuf, proplen, "%s/zfs/%s",
+			    root, zhp->zfs_name);
+		} else if (str[0] == '/') {
+			const char *relpath = zhp->zfs_name + strlen(source);
+
+			if (relpath[0] == '/')
+				relpath++;
+			if (str[1] == '\0')
+				str++;
+
+			if (relpath[0] == '\0')
+				(void) snprintf(propbuf, proplen, "%s%s",
+				    root, str);
+			else
+				(void) snprintf(propbuf, proplen, "%s%s%s%s",
+				    root, str, relpath[0] == '@' ? "" : "/",
+				    relpath);
+		} else {
+			/* 'legacy' or 'none' */
+			(void) strlcpy(propbuf, str, proplen);
+		}
+
+		break;
+
+	case ZFS_PROP_SHARENFS:
+	case ZFS_PROP_SHAREISCSI:
+	case ZFS_PROP_ISCSIOPTIONS:
+		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
+		    proplen);
+		break;
+
+	case ZFS_PROP_ORIGIN:
+		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
+		    proplen);
+		/*
+		 * If there is no parent at all, return failure to indicate that
+		 * it doesn't apply to this dataset.
+		 */
+		if (propbuf[0] == '\0')
+			return (-1);
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_RESERVATION:
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+
+		/*
+		 * If quota or reservation is 0, we translate this into 'none'
+		 * (unless literal is set), and indicate that it's the default
+		 * value.  Otherwise, we print the number nicely and indicate
+		 * that its set locally.
+		 */
+		if (val == 0) {
+			if (literal)
+				(void) strlcpy(propbuf, "0", proplen);
+			else
+				(void) strlcpy(propbuf, "none", proplen);
+		} else {
+			if (literal)
+				(void) snprintf(propbuf, proplen, "%llu",
+				    (u_longlong_t)val);
+			else
+				zfs_nicenum(val, propbuf, proplen);
+		}
+		break;
+
+	case ZFS_PROP_COMPRESSRATIO:
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
+		    val / 100, (longlong_t)val % 100);
+		break;
+
+	case ZFS_PROP_TYPE:
+		switch (zhp->zfs_type) {
+		case ZFS_TYPE_FILESYSTEM:
+			str = "filesystem";
+			break;
+		case ZFS_TYPE_VOLUME:
+			str = "volume";
+			break;
+		case ZFS_TYPE_SNAPSHOT:
+			str = "snapshot";
+			break;
+		default:
+			abort();
+		}
+		(void) snprintf(propbuf, proplen, "%s", str);
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		/*
+		 * The 'mounted' property is a pseudo-property that described
+		 * whether the filesystem is currently mounted.  Even though
+		 * it's a boolean value, the typical values of "on" and "off"
+		 * don't make sense, so we translate to "yes" and "no".
+		 */
+		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
+		    src, &source, &val) != 0)
+			return (-1);
+		if (val)
+			(void) strlcpy(propbuf, "yes", proplen);
+		else
+			(void) strlcpy(propbuf, "no", proplen);
+		break;
+
+	case ZFS_PROP_NAME:
+		/*
+		 * The 'name' property is a pseudo-property derived from the
+		 * dataset name.  It is presented as a real property to simplify
+		 * consumers.
+		 */
+		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
+		break;
+
+	default:
+		abort();
+	}
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Utility function to get the given numeric property.  Does no validation that
+ * the given property is the appropriate type; should only be used with
+ * hard-coded property types.
+ */
+uint64_t
+zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
+{
+	char *source;
+	zfs_source_t sourcetype = ZFS_SRC_NONE;
+	uint64_t val;
+
+	(void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
+
+	return (val);
+}
+
+/*
+ * Similar to zfs_prop_get(), but returns the value as an integer.
+ */
+int
+zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
+    zfs_source_t *src, char *statbuf, size_t statlen)
+{
+	char *source;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
+		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
+		    zfs_prop_to_name(prop)));
+
+	if (src)
+		*src = ZFS_SRC_NONE;
+
+	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
+		return (-1);
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Returns the name of the given zfs handle.
+ */
+const char *
+zfs_get_name(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_name);
+}
+
+/*
+ * Returns the type of the given zfs handle.
+ */
+zfs_type_t
+zfs_get_type(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_type);
+}
+
+/*
+ * Iterate over all child filesystems
+ */
+int
+zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Silently ignore errors, as the only plausible explanation is
+		 * that the pool has since been removed.
+		 */
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all snapshots
+ */
+int
+zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.  Silently ignore this case, and return success.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all children, snapshots and filesystems
+ */
+int
+zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	int ret;
+
+	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
+		return (ret);
+
+	return (zfs_iter_snapshots(zhp, func, data));
+}
+
+/*
+ * Given a complete name, return just the portion that refers to the parent.
+ * Can return NULL if this is a pool.
+ */
+static int
+parent_name(const char *path, char *buf, size_t buflen)
+{
+	char *loc;
+
+	if ((loc = strrchr(path, '/')) == NULL)
+		return (-1);
+
+	(void) strncpy(buf, path, MIN(buflen, loc - path));
+	buf[loc - path] = '\0';
+
+	return (0);
+}
+
+/*
+ * Checks to make sure that the given path has a parent, and that it exists.  We
+ * also fetch the 'zoned' property, which is used to validate property settings
+ * when creating new datasets.
+ */
+static int
+check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	char *slash;
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
+	    path);
+
+	/* get parent, and check to see if this is just a pool */
+	if (parent_name(path, parent, sizeof (parent)) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "missing dataset name"));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	}
+
+	/* check to see if the pool exists */
+	if ((slash = strchr(parent, '/')) == NULL)
+		slash = parent + strlen(parent);
+	(void) strncpy(zc.zc_name, parent, slash - parent);
+	zc.zc_name[slash - parent] = '\0';
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+	    errno == ENOENT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no such pool '%s'"), zc.zc_name);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* check to see if the parent dataset exists */
+	if ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
+		switch (errno) {
+		case ENOENT:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent does not exist"));
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	/* we are in a non-global zone, but parent is in the global zone */
+	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
+		(void) zfs_standard_error(hdl, EPERM, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	/* make sure parent is a filesystem */
+	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "parent is not a filesystem"));
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Create a new filesystem or volume.
+ */
+int
+zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
+    nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	uint64_t size = 0;
+	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+	char errbuf[1024];
+	uint64_t zoned;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), path);
+
+	/* validate the path, taking care to note the extended error message */
+	if (!zfs_validate_name(hdl, path, type))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, path, &zoned) != 0)
+		return (-1);
+
+	/*
+	 * The failure modes when creating a dataset of a different type over
+	 * one that already exists is a little strange.  In particular, if you
+	 * try to create a dataset on top of an existing dataset, the ioctl()
+	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
+	 * first try to see if the dataset exists.
+	 */
+	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+	}
+
+	if (type == ZFS_TYPE_VOLUME)
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	if (props && (props = zfs_validate_properties(hdl, type, NULL, props,
+	    zoned, NULL, errbuf)) == 0)
+		return (-1);
+
+	if (type == ZFS_TYPE_VOLUME) {
+		/*
+		 * If we are creating a volume, the size and block size must
+		 * satisfy a few restraints.  First, the blocksize must be a
+		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
+		 * volsize must be a multiple of the block size, and cannot be
+		 * zero.
+		 */
+		if (props == NULL || nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing volume size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if ((ret = nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    &blocksize)) != 0) {
+			if (ret == ENOENT) {
+				blocksize = zfs_prop_default_numeric(
+				    ZFS_PROP_VOLBLOCKSIZE);
+			} else {
+				nvlist_free(props);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "missing volume block size"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+			}
+		}
+
+		if (size == 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size cannot be zero"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if (size % blocksize != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size must be a multiple of volume block "
+			    "size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+	}
+
+	if (props &&
+	    zcmd_write_src_nvlist(hdl, &zc, props, NULL) != 0)
+		return (-1);
+	nvlist_free(props);
+
+	/* create the dataset */
+	ret = ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
+
+	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(hdl, path);
+		if (ret) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully created, but device links "
+			    "were not created"));
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/* check for failure */
+	if (ret != 0) {
+		char parent[ZFS_MAXNAMELEN];
+		(void) parent_name(path, parent, sizeof (parent));
+
+		switch (errno) {
+		case ENOENT:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EINVAL:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent '%s' is not a filesystem"), parent);
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume block size must be power of 2 from "
+			    "%u to %uk"),
+			    (uint_t)SPA_MINBLOCKSIZE,
+			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+
+#ifdef _ILP32
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+			if (type == ZFS_TYPE_VOLUME)
+				return (zfs_error(hdl, EZFS_VOLTOOBIG,
+				    errbuf));
+#endif
+			/* FALLTHROUGH */
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Destroys the given dataset.  The caller must make sure that the filesystem
+ * isn't mounted, and that there are no active dependents.
+ */
+int
+zfs_destroy(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp)) {
+		/*
+		 * Unconditionally unshare this zvol ignoring failure as it
+		 * indicates only that the volume wasn't shared initially.
+		 */
+		(void) zfs_unshare_iscsi(zhp);
+
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+	}
+
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) != 0) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
+		    zhp->zfs_name));
+	}
+
+	remove_mountpoint(zhp);
+
+	return (0);
+}
+
+struct destroydata {
+	char *snapname;
+	boolean_t gotone;
+	boolean_t closezhp;
+};
+
+static int
+zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct destroydata *dd = arg;
+	zfs_handle_t *szhp;
+	char name[ZFS_MAXNAMELEN];
+	boolean_t closezhp = dd->closezhp;
+	int rv;
+
+	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+	(void) strlcat(name, "@", sizeof (name));
+	(void) strlcat(name, dd->snapname, sizeof (name));
+
+	szhp = make_dataset_handle(zhp->zfs_hdl, name);
+	if (szhp) {
+		dd->gotone = B_TRUE;
+		zfs_close(szhp);
+	}
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		(void) zvol_remove_link(zhp->zfs_hdl, name);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	dd->closezhp = B_TRUE;
+	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
+	if (closezhp)
+		zfs_close(zhp);
+	return (rv);
+}
+
+/*
+ * Destroys all snapshots with the given name in zhp & descendants.
+ */
+int
+zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	struct destroydata dd = { 0 };
+
+	dd.snapname = snapname;
+	(void) zfs_remove_link_cb(zhp, &dd);
+
+	if (!dd.gotone) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
+		    zhp->zfs_name, snapname));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY_SNAPS, &zc);
+	if (ret != 0) {
+		char errbuf[1024];
+
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s@%s'"), zc.zc_name, snapname);
+
+		switch (errno) {
+		case EEXIST:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot is cloned"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Clones the given dataset.  The target must be of the same type as the source.
+ */
+int
+zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	int ret;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_type_t type;
+	uint64_t zoned;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), target);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, target, &zoned) != 0)
+		return (-1);
+
+	(void) parent_name(target, parent, sizeof (parent));
+
+	/* do the clone */
+	if (ZFS_IS_VOLUME(zhp)) {
+		zc.zc_objset_type = DMU_OST_ZVOL;
+		type = ZFS_TYPE_VOLUME;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+		type = ZFS_TYPE_FILESYSTEM;
+	}
+
+	if (props) {
+		if ((props = zfs_validate_properties(hdl, type, NULL, props,
+		    zoned, zhp, errbuf)) == NULL)
+			return (-1);
+
+		if (zcmd_write_src_nvlist(hdl, &zc, props, NULL) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+
+		nvlist_free(props);
+	}
+
+	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOENT:
+			/*
+			 * The parent doesn't exist.  We should have caught this
+			 * above, but there may a race condition that has since
+			 * destroyed the parent.
+			 *
+			 * At this point, we don't know whether it's the source
+			 * that doesn't exist anymore, or whether the target
+			 * dataset doesn't exist.
+			 */
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+
+		case EXDEV:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "source and target pools differ"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
+			    errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	} else if (ZFS_IS_VOLUME(zhp)) {
+		ret = zvol_create_link(zhp->zfs_hdl, target);
+	}
+
+	return (ret);
+}
+
+typedef struct promote_data {
+	char cb_mountpoint[MAXPATHLEN];
+	const char *cb_target;
+	const char *cb_errbuf;
+	uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+	zfs_handle_t *szhp;
+	char snapname[MAXPATHLEN];
+	int rv = 0;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* Remove the device link if it's a zvol. */
+	if (ZFS_IS_VOLUME(zhp))
+		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
+
+	/* Check for conflicting names */
+	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
+	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
+	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+	if (szhp != NULL) {
+		zfs_close(szhp);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot name '%s' from origin \n"
+		    "conflicts with '%s' from target"),
+		    zhp->zfs_name, snapname);
+		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
+	}
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+promote_snap_done_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
+		/* Create the device link if it's a zvol. */
+		if (ZFS_IS_VOLUME(zhp))
+			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Promotes the given clone fs to be the clone parent.
+ */
+int
+zfs_promote(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = { 0 };
+	char parent[MAXPATHLEN];
+	char *cp;
+	int ret;
+	zfs_handle_t *pzhp;
+	promote_data_t pd;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot promote '%s'"), zhp->zfs_name);
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be promoted"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+
+	(void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent));
+	if (parent[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "not a cloned filesystem"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+	cp = strchr(parent, '@');
+	*cp = '\0';
+
+	/* Walk the snapshots we will be moving */
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
+	if (pzhp == NULL)
+		return (-1);
+	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+	zfs_close(pzhp);
+	pd.cb_target = zhp->zfs_name;
+	pd.cb_errbuf = errbuf;
+	pzhp = zfs_open(hdl, parent, ZFS_TYPE_ANY);
+	if (pzhp == NULL)
+		return (-1);
+	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+	if (ret != 0) {
+		zfs_close(pzhp);
+		return (-1);
+	}
+
+	/* issue the ioctl */
+	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of,
+	    sizeof (zc.zc_value));
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	ret = ioctl(hdl->libzfs_fd, ZFS_IOC_PROMOTE, &zc);
+
+	if (ret != 0) {
+		int save_errno = errno;
+
+		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
+		zfs_close(pzhp);
+
+		switch (save_errno) {
+		case EEXIST:
+			/*
+			 * There is a conflicting snapshot name.  We
+			 * should have caught this above, but they could
+			 * have renamed something in the mean time.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "conflicting snapshot name from parent '%s'"),
+			    parent);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, save_errno, errbuf));
+		}
+	} else {
+		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
+	}
+
+	zfs_close(pzhp);
+	return (ret);
+}
+
+struct createdata {
+	const char *cd_snapname;
+	int cd_ifexists;
+};
+
+static int
+zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct createdata *cd = arg;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		char name[MAXPATHLEN];
+
+		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+		(void) strlcat(name, "@", sizeof (name));
+		(void) strlcat(name, cd->cd_snapname, sizeof (name));
+		(void) zvol_create_link_common(zhp->zfs_hdl, name,
+		    cd->cd_ifexists);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Takes a snapshot of the given dataset.
+ */
+int
+zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive)
+{
+	const char *delim;
+	char *parent;
+	zfs_handle_t *zhp;
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot '%s'"), path);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* make sure the parent exists and is of the appropriate type */
+	delim = strchr(path, '@');
+	if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
+		return (-1);
+	(void) strncpy(parent, path, delim - path);
+	parent[delim - path] = '\0';
+
+	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_VOLUME)) == NULL) {
+		free(parent);
+		return (-1);
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
+	zc.zc_cookie = recursive;
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT, &zc);
+
+	/*
+	 * if it was recursive, the one that actually failed will be in
+	 * zc.zc_name.
+	 */
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
+	if (ret == 0 && recursive) {
+		struct createdata cd;
+
+		cd.cd_snapname = delim + 1;
+		cd.cd_ifexists = B_FALSE;
+		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
+	}
+	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(zhp->zfs_hdl, path);
+		if (ret != 0) {
+			(void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY,
+			    &zc);
+		}
+	}
+
+	if (ret != 0)
+		(void) zfs_standard_error(hdl, errno, errbuf);
+
+	free(parent);
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
+ * NULL) to the file descriptor specified by outfd.
+ */
+int
+zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd)
+{
+	zfs_cmd_t zc = { 0 };
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	if (fromsnap)
+		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
+	zc.zc_cookie = outfd;
+
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) {
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot send '%s'"), zhp->zfs_name);
+
+		switch (errno) {
+
+		case EXDEV:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an earlier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+
+		case EDQUOT:
+		case EFBIG:
+		case EIO:
+		case ENOLINK:
+		case ENOSPC:
+		case ENXIO:
+		case EPIPE:
+		case ERANGE:
+		case EFAULT:
+		case EROFS:
+			zfs_error_aux(hdl, strerror(errno));
+			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Create ancestors of 'target', but not target itself, and not
+ * ancestors whose names are shorter than prefixlen.  Die if
+ * prefixlen-ancestor does not exist.
+ */
+static int
+create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
+{
+	zfs_handle_t *h;
+	char *cp;
+
+	/* make sure prefix exists */
+	cp = strchr(target + prefixlen, '/');
+	*cp = '\0';
+	h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+	*cp = '/';
+	if (h == NULL)
+		return (-1);
+	zfs_close(h);
+
+	/*
+	 * Attempt to create, mount, and share any ancestor filesystems,
+	 * up to the prefixlen-long one.
+	 */
+	for (cp = target + prefixlen + 1;
+	    cp = strchr(cp, '/'); *cp = '/', cp++) {
+		const char *opname;
+
+		*cp = '\0';
+
+		h = make_dataset_handle(hdl, target);
+		if (h) {
+			/* it already exists, nothing to do here */
+			zfs_close(h);
+			continue;
+		}
+
+		opname = dgettext(TEXT_DOMAIN, "create");
+		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
+		    NULL) != 0)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "open");
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		if (h == NULL)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "mount");
+		if (zfs_mount(h, NULL, 0) != 0)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "share");
+		if (zfs_share(h) != 0)
+			goto ancestorerr;
+
+		zfs_close(h);
+
+		continue;
+ancestorerr:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "failed to %s ancestor '%s'"), opname, target);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ */
+int
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
+    int verbose, int dryrun, boolean_t force, int infd)
+{
+	zfs_cmd_t zc = { 0 };
+	time_t begin_time;
+	int ioctl_err, err, bytes, size, choplen;
+	char *cp;
+	dmu_replay_record_t drr;
+	struct drr_begin *drrb = &zc.zc_begin_record;
+	char errbuf[1024];
+	prop_changelist_t *clp;
+	char chopprefix[ZFS_MAXNAMELEN];
+
+	begin_time = time(NULL);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	/* read in the BEGIN record */
+	cp = (char *)&drr;
+	bytes = 0;
+	do {
+		size = read(infd, cp, sizeof (drr) - bytes);
+		cp += size;
+		bytes += size;
+	} while (size > 0);
+
+	if (size < 0 || bytes != sizeof (drr)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (failed to read first record)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	zc.zc_begin_record = drr.drr_u.drr_begin;
+
+	if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
+	    drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad magic number)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (drrb->drr_version != DMU_BACKUP_VERSION &&
+	    drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
+		    "0x%llx is supported (stream is version 0x%llx)"),
+		    DMU_BACKUP_VERSION, drrb->drr_version);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad snapshot name)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+	/*
+	 * Determine how much of the snapshot name stored in the stream
+	 * we are going to tack on to the name they specified on the
+	 * command line, and how much we are going to chop off.
+	 *
+	 * If they specified a snapshot, chop the entire name stored in
+	 * the stream.
+	 */
+	(void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
+	if (isprefix) {
+		/*
+		 * They specified a fs with -d, we want to tack on
+		 * everything but the pool name stored in the stream
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -d"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+		cp = strchr(chopprefix, '/');
+		if (cp == NULL)
+			cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	} else if (strchr(tosnap, '@') == NULL) {
+		/*
+		 * If they specified a filesystem without -d, we want to
+		 * tack on everything after the fs specified in the
+		 * first name from the stream.
+		 */
+		cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	}
+	choplen = strlen(chopprefix);
+
+	/*
+	 * Determine name of destination snapshot, store in zc_value.
+	 */
+	(void) strcpy(zc.zc_value, tosnap);
+	(void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
+	    sizeof (zc.zc_value));
+	if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	(void) strcpy(zc.zc_name, zc.zc_value);
+	if (drrb->drr_fromguid) {
+		/* incremental backup stream */
+		zfs_handle_t *h;
+
+		/* do the recvbackup ioctl to the containing fs */
+		*strchr(zc.zc_name, '@') = '\0';
+
+		/* make sure destination fs exists */
+		h = zfs_open(hdl, zc.zc_name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (h == NULL)
+			return (-1);
+		if (!dryrun) {
+			/*
+			 * We need to unmount all the dependents of the dataset
+			 * and the dataset itself. If it's a volume
+			 * then remove device link.
+			 */
+			if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
+				clp = changelist_gather(h, ZFS_PROP_NAME, 0);
+				if (clp == NULL)
+					return (-1);
+				if (changelist_prefix(clp) != 0) {
+					changelist_free(clp);
+					return (-1);
+				}
+			} else {
+				(void) zvol_remove_link(hdl, h->zfs_name);
+			}
+		}
+		zfs_close(h);
+	} else {
+		/* full backup stream */
+
+		/* Make sure destination fs does not exist */
+		*strchr(zc.zc_name, '@') = '\0';
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' exists"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
+		if (strchr(zc.zc_name, '/') == NULL) {
+			/*
+			 * they're trying to do a recv into a
+			 * nonexistant topmost filesystem.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination does not exist"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
+		/* Do the recvbackup ioctl to the fs's parent. */
+		*strrchr(zc.zc_name, '/') = '\0';
+
+		if (isprefix && (err = create_parents(hdl,
+		    zc.zc_value, strlen(tosnap))) != 0) {
+			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
+		}
+
+	}
+
+	zc.zc_cookie = infd;
+	zc.zc_guid = force;
+	if (verbose) {
+		(void) printf("%s %s stream of %s into %s\n",
+		    dryrun ? "would receive" : "receiving",
+		    drrb->drr_fromguid ? "incremental" : "full",
+		    drr.drr_u.drr_begin.drr_toname,
+		    zc.zc_value);
+		(void) fflush(stdout);
+	}
+	if (dryrun)
+		return (0);
+	err = ioctl_err = ioctl(hdl->libzfs_fd, ZFS_IOC_RECVBACKUP, &zc);
+	if (ioctl_err != 0) {
+		switch (errno) {
+		case ENODEV:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "most recent snapshot does not match incremental "
+			    "source"));
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
+		case ETXTBSY:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination has been modified since most recent "
+			    "snapshot"));
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
+		case EEXIST:
+			if (drrb->drr_fromguid == 0) {
+				/* it's the containing fs that exists */
+				cp = strchr(zc.zc_value, '@');
+				*cp = '\0';
+			}
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination already exists"));
+			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
+			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
+			    zc.zc_value);
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		case ECKSUM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid stream (checksum mismatch)"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, errno, errbuf);
+		}
+	}
+
+	/*
+	 * Mount or recreate the /dev links for the target filesystem
+	 * (if created, or if we tore them down to do an incremental
+	 * restore), and the /dev links for the new snapshot (if
+	 * created). Also mount any children of the target filesystem
+	 * if we did an incremental receive.
+	 */
+	cp = strchr(zc.zc_value, '@');
+	if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) {
+		zfs_handle_t *h;
+
+		*cp = '\0';
+		h = zfs_open(hdl, zc.zc_value,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		*cp = '@';
+		if (h) {
+			if (h->zfs_type == ZFS_TYPE_VOLUME) {
+				err = zvol_create_link(hdl, h->zfs_name);
+				if (err == 0 && ioctl_err == 0)
+					err = zvol_create_link(hdl,
+					    zc.zc_value);
+			} else {
+				if (drrb->drr_fromguid) {
+					err = changelist_postfix(clp);
+					changelist_free(clp);
+				} else {
+					err = zfs_mount(h, NULL, 0);
+				}
+			}
+		zfs_close(h);
+		}
+	}
+
+	if (err || ioctl_err)
+		return (-1);
+
+	if (verbose) {
+		char buf1[64];
+		char buf2[64];
+		uint64_t bytes = zc.zc_cookie;
+		time_t delta = time(NULL) - begin_time;
+		if (delta == 0)
+			delta = 1;
+		zfs_nicenum(bytes, buf1, sizeof (buf1));
+		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
+
+		(void) printf("received %sb stream in %lu seconds (%sb/sec)\n",
+		    buf1, delta, buf2);
+	}
+
+	return (0);
+}
+
+/*
+ * Destroy any more recent snapshots.  We invoke this callback on any dependents
+ * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
+ * is a dependent and we should just destroy it without checking the transaction
+ * group.
+ */
+typedef struct rollback_data {
+	const char	*cb_target;		/* the snapshot */
+	uint64_t	cb_create;		/* creation time reference */
+	prop_changelist_t *cb_clp;		/* changelist pointer */
+	int		cb_error;
+	boolean_t	cb_dependent;
+} rollback_data_t;
+
+static int
+rollback_destroy(zfs_handle_t *zhp, void *data)
+{
+	rollback_data_t *cbp = data;
+
+	if (!cbp->cb_dependent) {
+		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
+		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
+		    cbp->cb_create) {
+
+			cbp->cb_dependent = B_TRUE;
+			if (zfs_iter_dependents(zhp, B_FALSE, rollback_destroy,
+			    cbp) != 0)
+				cbp->cb_error = 1;
+			cbp->cb_dependent = B_FALSE;
+
+			if (zfs_destroy(zhp) != 0)
+				cbp->cb_error = 1;
+			else
+				changelist_remove(zhp, cbp->cb_clp);
+		}
+	} else {
+		if (zfs_destroy(zhp) != 0)
+			cbp->cb_error = 1;
+		else
+			changelist_remove(zhp, cbp->cb_clp);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Rollback the dataset to its latest snapshot.
+ */
+static int
+do_rollback(zfs_handle_t *zhp)
+{
+	int ret;
+	zfs_cmd_t zc = { 0 };
+
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
+	    zhp->zfs_type == ZFS_TYPE_VOLUME);
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
+	    zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	/*
+	 * We rely on the consumer to verify that there are no newer snapshots
+	 * for the given dataset.  Given these constraints, we can simply pass
+	 * the name on to the ioctl() call.  There is still an unlikely race
+	 * condition where the user has taken a snapshot since we verified that
+	 * this was the most recent.
+	 */
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_ROLLBACK,
+	    &zc)) != 0) {
+		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
+		    zhp->zfs_name);
+	} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+	}
+
+	return (ret);
+}
+
+/*
+ * Given a dataset, rollback to a specific snapshot, discarding any
+ * data changes since then and making it the active dataset.
+ *
+ * Any snapshots more recent than the target are destroyed, along with
+ * their dependents.
+ */
+int
+zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, int flag)
+{
+	int ret;
+	rollback_data_t cb = { 0 };
+	prop_changelist_t *clp;
+
+	/*
+	 * Unmount all dependendents of the dataset and the dataset itself.
+	 * The list we need to gather is the same as for doing rename
+	 */
+	clp = changelist_gather(zhp, ZFS_PROP_NAME, flag ? MS_FORCE: 0);
+	if (clp == NULL)
+		return (-1);
+
+	if ((ret = changelist_prefix(clp)) != 0)
+		goto out;
+
+	/*
+	 * Destroy all recent snapshots and its dependends.
+	 */
+	cb.cb_target = snap->zfs_name;
+	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
+	cb.cb_clp = clp;
+	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
+
+	if ((ret = cb.cb_error) != 0) {
+		(void) changelist_postfix(clp);
+		goto out;
+	}
+
+	/*
+	 * Now that we have verified that the snapshot is the latest,
+	 * rollback to the given snapshot.
+	 */
+	ret = do_rollback(zhp);
+
+	if (ret != 0) {
+		(void) changelist_postfix(clp);
+		goto out;
+	}
+
+	/*
+	 * We only want to re-mount the filesystem if it was mounted in the
+	 * first place.
+	 */
+	ret = changelist_postfix(clp);
+
+out:
+	changelist_free(clp);
+	return (ret);
+}
+
+/*
+ * Iterate over all dependents for a given dataset.  This includes both
+ * hierarchical dependents (children) and data dependents (snapshots and
+ * clones).  The bulk of the processing occurs in get_dependents() in
+ * libzfs_graph.c.
+ */
+int
+zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
+    zfs_iter_f func, void *data)
+{
+	char **dependents;
+	size_t count;
+	int i;
+	zfs_handle_t *child;
+	int ret = 0;
+
+	if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
+	    &dependents, &count) != 0)
+		return (-1);
+
+	for (i = 0; i < count; i++) {
+		if ((child = make_dataset_handle(zhp->zfs_hdl,
+		    dependents[i])) == NULL)
+			continue;
+
+		if ((ret = func(child, data)) != 0)
+			break;
+	}
+
+	for (i = 0; i < count; i++)
+		free(dependents[i]);
+	free(dependents);
+
+	return (ret);
+}
+
+/*
+ * Renames the given dataset.
+ */
+int
+zfs_rename(zfs_handle_t *zhp, const char *target, int recursive)
+{
+	int ret;
+	zfs_cmd_t zc = { 0 };
+	char *delim;
+	prop_changelist_t *cl = NULL;
+	zfs_handle_t *zhrp = NULL;
+	char *parentname = NULL;
+	char parent[ZFS_MAXNAMELEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+
+	/* if we have the same exact name, just return success */
+	if (strcmp(zhp->zfs_name, target) == 0)
+		return (0);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot rename to '%s'"), target);
+
+	/*
+	 * Make sure the target name is valid
+	 */
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		if ((strchr(target, '@') == NULL) ||
+		    *target == '@') {
+			/*
+			 * Snapshot target name is abbreviated,
+			 * reconstruct full dataset name
+			 */
+			(void) strlcpy(parent, zhp->zfs_name,
+			    sizeof (parent));
+			delim = strchr(parent, '@');
+			if (strchr(target, '@') == NULL)
+				*(++delim) = '\0';
+			else
+				*delim = '\0';
+			(void) strlcat(parent, target, sizeof (parent));
+			target = parent;
+		} else {
+			/*
+			 * Make sure we're renaming within the same dataset.
+			 */
+			delim = strchr(target, '@');
+			if (strncmp(zhp->zfs_name, target, delim - target)
+			    != 0 || zhp->zfs_name[delim - target] != '@') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "snapshots must be part of same "
+				    "dataset"));
+				return (zfs_error(hdl, EZFS_CROSSTARGET,
+				    errbuf));
+			}
+		}
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	} else {
+		if (recursive) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "recursive rename must be a snapshot"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		}
+
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		uint64_t unused;
+
+		/* validate parents */
+		if (check_parents(hdl, target, &unused) != 0)
+			return (-1);
+
+		(void) parent_name(target, parent, sizeof (parent));
+
+		/* make sure we're in the same pool */
+		verify((delim = strchr(target, '/')) != NULL);
+		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
+		    zhp->zfs_name[delim - target] != '/') {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "datasets must be within same pool"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+		}
+
+		/* new name cannot be a child of the current dataset name */
+		if (strncmp(parent, zhp->zfs_name,
+		    strlen(zhp->zfs_name)) == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "New dataset name cannot be a descendent of "
+			    "current dataset name"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+	}
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
+
+	if (getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	if (recursive) {
+		struct destroydata dd;
+
+		parentname = strdup(zhp->zfs_name);
+		delim = strchr(parentname, '@');
+		*delim = '\0';
+		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_ANY);
+		if (zhrp == NULL) {
+			return (-1);
+		}
+
+		dd.snapname = delim + 1;
+		dd.gotone = B_FALSE;
+		dd.closezhp = B_FALSE;
+
+		/* We remove any zvol links prior to renaming them */
+		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
+		if (ret) {
+			goto error;
+		}
+	} else {
+		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
+			return (-1);
+
+		if (changelist_haszonedchild(cl)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "child dataset with inherited mountpoint is used "
+			    "in a non-global zone"));
+			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+			goto error;
+		}
+
+		if ((ret = changelist_prefix(cl)) != 0)
+			goto error;
+	}
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
+
+	zc.zc_cookie = recursive;
+
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc)) != 0) {
+		/*
+		 * if it was recursive, the one that actually failed will
+		 * be in zc.zc_name
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot rename to '%s'"), zc.zc_name);
+
+		if (recursive && errno == EEXIST) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "a child dataset already has a snapshot "
+			    "with the new name"));
+			(void) zfs_error(hdl, EZFS_CROSSTARGET, errbuf);
+		} else {
+			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
+		}
+
+		/*
+		 * On failure, we still want to remount any filesystems that
+		 * were previously mounted, so we don't alter the system state.
+		 */
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = delim + 1;
+			cd.cd_ifexists = B_TRUE;
+			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			(void) changelist_postfix(cl);
+		}
+	} else {
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = strchr(target, '@') + 1;
+			cd.cd_ifexists = B_TRUE;
+			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			changelist_rename(cl, zfs_get_name(zhp), target);
+			ret = changelist_postfix(cl);
+		}
+	}
+
+error:
+	if (parentname) {
+		free(parentname);
+	}
+	if (zhrp) {
+		zfs_close(zhrp);
+	}
+	if (cl) {
+		changelist_free(cl);
+	}
+	return (ret);
+}
+
+/*
+ * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
+ * poke devfsadm to create the /dev link, and then wait for the link to appear.
+ */
+int
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	return (zvol_create_link_common(hdl, dataset, B_FALSE));
+}
+
+static int
+zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
+{
+	zfs_cmd_t zc = { 0 };
+#if 0
+	di_devlink_handle_t dhdl;
+#endif
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	/*
+	 * Issue the appropriate ioctl.
+	 */
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case EEXIST:
+			/*
+			 * Silently ignore the case where the link already
+			 * exists.  This allows 'zfs volinit' to be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		case ENOENT:
+			/*
+			 * Dataset does not exist in the kernel.  If we
+			 * don't care (see zfs_rename), then ignore the
+			 * error quietly.
+			 */
+			if (ifexists) {
+				return (0);
+			}
+
+			/* FALLTHROUGH */
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset));
+		}
+	}
+
+#if 0
+	/*
+	 * Call devfsadm and wait for the links to magically appear.
+	 */
+	if ((dhdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
+		zfs_error_aux(hdl, strerror(errno));
+		(void) zfs_error_fmt(hdl, EZFS_DEVLINKS,
+		    dgettext(TEXT_DOMAIN, "cannot create device links "
+		    "for '%s'"), dataset);
+		(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
+		return (-1);
+	} else {
+		(void) di_devlink_fini(&dhdl);
+	}
+#endif
+
+	return (0);
+}
+
+/*
+ * Remove a minor node for the given zvol and the associated /dev links.
+ */
+int
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case ENXIO:
+			/*
+			 * Silently ignore the case where the link no longer
+			 * exists, so that 'zfs volfini' can be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot remove device "
+			    "links for '%s'"), dataset));
+		}
+	}
+
+	return (0);
+}
+
+nvlist_t *
+zfs_get_user_props(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_user_props);
+}
+
+/*
+ * Given a comma-separated list of properties, contruct a property list
+ * containing both user-defined and native properties.  This function will
+ * return a NULL list if 'all' is specified, which can later be expanded on a
+ * per-dataset basis by zfs_expand_proplist().
+ */
+int
+zfs_get_proplist_common(libzfs_handle_t *hdl, char *fields,
+    zfs_proplist_t **listp, zfs_type_t type)
+{
+	size_t len;
+	char *s, *p;
+	char c;
+	zfs_prop_t prop;
+	zfs_proplist_t *entry;
+	zfs_proplist_t **last;
+
+	*listp = NULL;
+	last = listp;
+
+	/*
+	 * If 'all' is specified, return a NULL list.
+	 */
+	if (strcmp(fields, "all") == 0)
+		return (0);
+
+	/*
+	 * If no fields were specified, return an error.
+	 */
+	if (fields[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no properties specified"));
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad property list")));
+	}
+
+	/*
+	 * It would be nice to use getsubopt() here, but the inclusion of column
+	 * aliases makes this more effort than it's worth.
+	 */
+	s = fields;
+	while (*s != '\0') {
+		if ((p = strchr(s, ',')) == NULL) {
+			len = strlen(s);
+			p = s + len;
+		} else {
+			len = p - s;
+		}
+
+		/*
+		 * Check for empty options.
+		 */
+		if (len == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "empty property name"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad property list")));
+		}
+
+		/*
+		 * Check all regular property names.
+		 */
+		c = s[len];
+		s[len] = '\0';
+		prop = zfs_name_to_prop_common(s, type);
+
+		if (prop != ZFS_PROP_INVAL &&
+		    !zfs_prop_valid_for_type(prop, type))
+			prop = ZFS_PROP_INVAL;
+
+		/*
+		 * When no property table entry can be found, return failure if
+		 * this is a pool property or if this isn't a user-defined
+		 * dataset property,
+		 */
+		if (prop == ZFS_PROP_INVAL &&
+		    (type & ZFS_TYPE_POOL || !zfs_prop_user(s))) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property '%s'"), s);
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad property list")));
+		}
+
+		if ((entry = zfs_alloc(hdl, sizeof (zfs_proplist_t))) == NULL)
+			return (-1);
+
+		entry->pl_prop = prop;
+		if (prop == ZFS_PROP_INVAL) {
+			if ((entry->pl_user_prop =
+			    zfs_strdup(hdl, s)) == NULL) {
+				free(entry);
+				return (-1);
+			}
+			entry->pl_width = strlen(s);
+		} else {
+			entry->pl_width = zfs_prop_width(prop,
+			    &entry->pl_fixed);
+		}
+
+		*last = entry;
+		last = &entry->pl_next;
+
+		s = p;
+		if (c == ',')
+			s++;
+	}
+
+	return (0);
+}
+
+int
+zfs_get_proplist(libzfs_handle_t *hdl, char *fields, zfs_proplist_t **listp)
+{
+	return (zfs_get_proplist_common(hdl, fields, listp, ZFS_TYPE_ANY));
+}
+
+void
+zfs_free_proplist(zfs_proplist_t *pl)
+{
+	zfs_proplist_t *next;
+
+	while (pl != NULL) {
+		next = pl->pl_next;
+		free(pl->pl_user_prop);
+		free(pl);
+		pl = next;
+	}
+}
+
+typedef struct expand_data {
+	zfs_proplist_t	**last;
+	libzfs_handle_t	*hdl;
+} expand_data_t;
+
+static zfs_prop_t
+zfs_expand_proplist_cb(zfs_prop_t prop, void *cb)
+{
+	zfs_proplist_t *entry;
+	expand_data_t *edp = cb;
+
+	if ((entry = zfs_alloc(edp->hdl, sizeof (zfs_proplist_t))) == NULL)
+		return (ZFS_PROP_INVAL);
+
+	entry->pl_prop = prop;
+	entry->pl_width = zfs_prop_width(prop, &entry->pl_fixed);
+	entry->pl_all = B_TRUE;
+
+	*(edp->last) = entry;
+	edp->last = &entry->pl_next;
+
+	return (ZFS_PROP_CONT);
+}
+
+int
+zfs_expand_proplist_common(libzfs_handle_t *hdl, zfs_proplist_t **plp,
+	zfs_type_t type)
+{
+	zfs_proplist_t *entry;
+	zfs_proplist_t **last;
+	expand_data_t exp;
+
+	if (*plp == NULL) {
+		/*
+		 * If this is the very first time we've been called for an 'all'
+		 * specification, expand the list to include all native
+		 * properties.
+		 */
+		last = plp;
+
+		exp.last = last;
+		exp.hdl = hdl;
+
+		if (zfs_prop_iter_common(zfs_expand_proplist_cb, &exp, type,
+		    B_FALSE) == ZFS_PROP_INVAL)
+			return (-1);
+
+		/*
+		 * Add 'name' to the beginning of the list, which is handled
+		 * specially.
+		 */
+		if ((entry = zfs_alloc(hdl,
+		    sizeof (zfs_proplist_t))) == NULL)
+			return (-1);
+
+		entry->pl_prop = ZFS_PROP_NAME;
+		entry->pl_width = zfs_prop_width(ZFS_PROP_NAME,
+		    &entry->pl_fixed);
+		entry->pl_all = B_TRUE;
+		entry->pl_next = *plp;
+		*plp = entry;
+	}
+	return (0);
+}
+
+/*
+ * This function is used by 'zfs list' to determine the exact set of columns to
+ * display, and their maximum widths.  This does two main things:
+ *
+ *      - If this is a list of all properties, then expand the list to include
+ *        all native properties, and set a flag so that for each dataset we look
+ *        for new unique user properties and add them to the list.
+ *
+ *      - For non fixed-width properties, keep track of the maximum width seen
+ *        so that we can size the column appropriately.
+ */
+int
+zfs_expand_proplist(zfs_handle_t *zhp, zfs_proplist_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_proplist_t *entry;
+	zfs_proplist_t **last, **start;
+	nvlist_t *userprops, *propval;
+	nvpair_t *elem;
+	char *strval;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zfs_expand_proplist_common(hdl, plp, ZFS_TYPE_ANY) != 0)
+		return (-1);
+
+	userprops = zfs_get_user_props(zhp);
+
+	entry = *plp;
+	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
+		/*
+		 * Go through and add any user properties as necessary.  We
+		 * start by incrementing our list pointer to the first
+		 * non-native property.
+		 */
+		start = plp;
+		while (*start != NULL) {
+			if ((*start)->pl_prop == ZFS_PROP_INVAL)
+				break;
+			start = &(*start)->pl_next;
+		}
+
+		elem = NULL;
+		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
+			/*
+			 * See if we've already found this property in our list.
+			 */
+			for (last = start; *last != NULL;
+			    last = &(*last)->pl_next) {
+				if (strcmp((*last)->pl_user_prop,
+				    nvpair_name(elem)) == 0)
+					break;
+			}
+
+			if (*last == NULL) {
+				if ((entry = zfs_alloc(hdl,
+				    sizeof (zfs_proplist_t))) == NULL ||
+				    ((entry->pl_user_prop = zfs_strdup(hdl,
+				    nvpair_name(elem)))) == NULL) {
+					free(entry);
+					return (-1);
+				}
+
+				entry->pl_prop = ZFS_PROP_INVAL;
+				entry->pl_width = strlen(nvpair_name(elem));
+				entry->pl_all = B_TRUE;
+				*last = entry;
+			}
+		}
+	}
+
+	/*
+	 * Now go through and check the width of any non-fixed columns
+	 */
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZFS_PROP_INVAL) {
+			if (zfs_prop_get(zhp, entry->pl_prop,
+			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
+				if (strlen(buf) > entry->pl_width)
+					entry->pl_width = strlen(buf);
+			}
+		} else if (nvlist_lookup_nvlist(userprops,
+		    entry->pl_user_prop, &propval)  == 0) {
+			verify(nvlist_lookup_string(propval,
+			    ZFS_PROP_VALUE, &strval) == 0);
+			if (strlen(strval) > entry->pl_width)
+				entry->pl_width = strlen(strval);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Attach/detach the given filesystem to/from the given jail.
+ */
+int
+zfs_jail(zfs_handle_t *zhp, int jailid, int attach)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = { 0 };
+	char errbuf[1024];
+	int cmd, ret;
+
+	if (attach) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name);
+	} else {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name);
+	}
+
+	switch (zhp->zfs_type) {
+	case ZFS_TYPE_VOLUME:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "volumes can not be jailed"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_SNAPSHOT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be jailed"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_jailid = jailid;
+
+	cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL;
+	if ((ret = ioctl(hdl->libzfs_fd, cmd, &zc)) != 0)
+		zfs_standard_error(hdl, errno, errbuf);
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
@@ -0,0 +1,599 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libintl.h>
+#include <libuutil.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+/*
+ * Structure to keep track of dataset state.  Before changing the 'sharenfs' or
+ * 'mountpoint' property, we record whether the filesystem was previously
+ * mounted/shared.  This prior state dictates whether we remount/reshare the
+ * dataset after the property has been changed.
+ *
+ * The interface consists of the following sequence of functions:
+ *
+ * 	changelist_gather()
+ * 	changelist_prefix()
+ * 	< change property >
+ * 	changelist_postfix()
+ * 	changelist_free()
+ *
+ * Other interfaces:
+ *
+ * changelist_remove() - remove a node from a gathered list
+ * changelist_rename() - renames all datasets appropriately when doing a rename
+ * changelist_unshare() - unshares all the nodes in a given changelist
+ * changelist_haszonedchild() - check if there is any child exported to
+ *				a local zone
+ */
+typedef struct prop_changenode {
+	zfs_handle_t		*cn_handle;
+	int			cn_shared;
+	int			cn_mounted;
+	int			cn_zoned;
+	uu_list_node_t		cn_listnode;
+} prop_changenode_t;
+
+struct prop_changelist {
+	zfs_prop_t		cl_prop;
+	zfs_prop_t		cl_realprop;
+	uu_list_pool_t		*cl_pool;
+	uu_list_t		*cl_list;
+	boolean_t		cl_waslegacy;
+	boolean_t		cl_allchildren;
+	boolean_t		cl_alldependents;
+	int			cl_flags;
+	boolean_t		cl_haszonedchild;
+	boolean_t		cl_sorted;
+};
+
+/*
+ * If the property is 'mountpoint', go through and unmount filesystems as
+ * necessary.  We don't do the same for 'sharenfs', because we can just re-share
+ * with different options without interrupting service.
+ */
+int
+changelist_prefix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			switch (clp->cl_realprop) {
+			case ZFS_PROP_NAME:
+				/*
+				 * If this was a rename, unshare the zvol, and
+				 * remove the /dev/zvol links.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+
+				if (zvol_remove_link(cn->cn_handle->zfs_hdl,
+				    cn->cn_handle->zfs_name) != 0)
+					ret = -1;
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				/*
+				 * If this was a change to the volume size, we
+				 * need to unshare and reshare the volume.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+				break;
+			}
+		} else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0)
+			ret = -1;
+	}
+
+	return (ret);
+}
+
+/*
+ * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or
+ * reshare the filesystems as necessary.  In changelist_gather() we recorded
+ * whether the filesystem was previously shared or mounted.  The action we take
+ * depends on the previous state, and whether the value was previously 'legacy'.
+ * For non-legacy properties, we only remount/reshare the filesystem if it was
+ * previously mounted/shared.  Otherwise, we always remount/reshare the
+ * filesystem.
+ */
+int
+changelist_postfix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	char shareopts[ZFS_MAXPROPLEN];
+	int ret = 0;
+
+	/*
+	 * If we're changing the mountpoint, attempt to destroy the underlying
+	 * mountpoint.  All other datasets will have inherited from this dataset
+	 * (in which case their mountpoints exist in the filesystem in the new
+	 * location), or have explicit mountpoints set (in which case they won't
+	 * be in the changelist).
+	 */
+	if ((cn = uu_list_last(clp->cl_list)) == NULL)
+		return (0);
+
+	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
+		remove_mountpoint(cn->cn_handle);
+
+	/*
+	 * We walk the datasets in reverse, because we want to mount any parent
+	 * datasets before mounting the children.
+	 */
+	for (cn = uu_list_last(clp->cl_list); cn != NULL;
+	    cn = uu_list_prev(clp->cl_list, cn)) {
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		zfs_refresh_properties(cn->cn_handle);
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			/*
+			 * If we're doing a rename, recreate the /dev/zvol
+			 * links.
+			 */
+			if (clp->cl_realprop == ZFS_PROP_NAME &&
+			    zvol_create_link(cn->cn_handle->zfs_hdl,
+			    cn->cn_handle->zfs_name) != 0) {
+				ret = -1;
+			} else if (cn->cn_shared ||
+			    clp->cl_prop == ZFS_PROP_SHAREISCSI) {
+				if (zfs_prop_get(cn->cn_handle,
+				    ZFS_PROP_SHAREISCSI, shareopts,
+				    sizeof (shareopts), NULL, NULL, 0,
+				    B_FALSE) == 0 &&
+				    strcmp(shareopts, "off") == 0) {
+					ret = zfs_unshare_iscsi(cn->cn_handle);
+				} else {
+					ret = zfs_share_iscsi(cn->cn_handle);
+				}
+			}
+
+			continue;
+		}
+
+		if ((clp->cl_waslegacy || cn->cn_mounted) &&
+		    !zfs_is_mounted(cn->cn_handle, NULL) &&
+		    zfs_mount(cn->cn_handle, NULL, 0) != 0)
+			ret = -1;
+
+		/*
+		 * We always re-share even if the filesystem is currently
+		 * shared, so that we can adopt any new options.
+		 */
+		if (cn->cn_shared ||
+		    (clp->cl_prop == ZFS_PROP_SHARENFS && clp->cl_waslegacy)) {
+			if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
+			    shareopts, sizeof (shareopts), NULL, NULL, 0,
+			    B_FALSE) == 0 && strcmp(shareopts, "off") == 0) {
+				ret = zfs_unshare_nfs(cn->cn_handle, NULL);
+			} else {
+				ret = zfs_share_nfs(cn->cn_handle);
+			}
+		}
+	}
+
+	return (ret);
+}
+
+/*
+ * Is this "dataset" a child of "parent"?
+ */
+static boolean_t
+isa_child_of(const char *dataset, const char *parent)
+{
+	int len;
+
+	len = strlen(parent);
+
+	if (strncmp(dataset, parent, len) == 0 &&
+	    (dataset[len] == '@' || dataset[len] == '/' ||
+	    dataset[len] == '\0'))
+		return (B_TRUE);
+	else
+		return (B_FALSE);
+
+}
+
+/*
+ * If we rename a filesystem, child filesystem handles are no longer valid
+ * since we identify each dataset by its name in the ZFS namespace.  As a
+ * result, we have to go through and fix up all the names appropriately.  We
+ * could do this automatically if libzfs kept track of all open handles, but
+ * this is a lot less work.
+ */
+void
+changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
+{
+	prop_changenode_t *cn;
+	char newname[ZFS_MAXNAMELEN];
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		/*
+		 * Do not rename a clone that's not in the source hierarchy.
+		 */
+		if (!isa_child_of(cn->cn_handle->zfs_name, src))
+			continue;
+
+		/*
+		 * Destroy the previous mountpoint if needed.
+		 */
+		remove_mountpoint(cn->cn_handle);
+
+		(void) strlcpy(newname, dst, sizeof (newname));
+		(void) strcat(newname, cn->cn_handle->zfs_name + strlen(src));
+
+		(void) strlcpy(cn->cn_handle->zfs_name, newname,
+		    sizeof (cn->cn_handle->zfs_name));
+	}
+}
+
+/*
+ * Given a gathered changelist for the 'sharenfs' property, unshare all the
+ * datasets in the list.
+ */
+int
+changelist_unshare(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_SHARENFS)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		if (zfs_unshare_nfs(cn->cn_handle, NULL) != 0)
+			ret = -1;
+	}
+
+	return (ret);
+}
+
+/*
+ * Check if there is any child exported to a local zone in a given changelist.
+ * This information has already been recorded while gathering the changelist
+ * via changelist_gather().
+ */
+int
+changelist_haszonedchild(prop_changelist_t *clp)
+{
+	return (clp->cl_haszonedchild);
+}
+
+/*
+ * Remove a node from a gathered list.
+ */
+void
+changelist_remove(zfs_handle_t *zhp, prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		if (strcmp(cn->cn_handle->zfs_name, zhp->zfs_name) == 0) {
+			uu_list_remove(clp->cl_list, cn);
+			zfs_close(cn->cn_handle);
+			free(cn);
+			return;
+		}
+	}
+}
+
+/*
+ * Release any memory associated with a changelist.
+ */
+void
+changelist_free(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	void *cookie;
+
+	if (clp->cl_list) {
+		cookie = NULL;
+		while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) {
+			zfs_close(cn->cn_handle);
+			free(cn);
+		}
+
+		uu_list_destroy(clp->cl_list);
+	}
+	if (clp->cl_pool)
+		uu_list_pool_destroy(clp->cl_pool);
+
+	free(clp);
+}
+
+static int
+change_one(zfs_handle_t *zhp, void *data)
+{
+	prop_changelist_t *clp = data;
+	char property[ZFS_MAXPROPLEN];
+	char where[64];
+	prop_changenode_t *cn;
+	zfs_source_t sourcetype;
+
+	/*
+	 * We only want to unmount/unshare those filesystems that may inherit
+	 * from the target filesystem.  If we find any filesystem with a
+	 * locally set mountpoint, we ignore any children since changing the
+	 * property will not affect them.  If this is a rename, we iterate
+	 * over all children regardless, since we need them unmounted in
+	 * order to do the rename.  Also, if this is a volume and we're doing
+	 * a rename, then always add it to the changelist.
+	 */
+
+	if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) &&
+	    zfs_prop_get(zhp, clp->cl_prop, property,
+	    sizeof (property), &sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (clp->cl_alldependents || clp->cl_allchildren ||
+	    sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) {
+		if ((cn = zfs_alloc(zfs_get_handle(zhp),
+		    sizeof (prop_changenode_t))) == NULL) {
+			zfs_close(zhp);
+			return (-1);
+		}
+
+		cn->cn_handle = zhp;
+		cn->cn_mounted = zfs_is_mounted(zhp, NULL);
+		cn->cn_shared = zfs_is_shared(zhp);
+		cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+
+		/* Indicate if any child is exported to a local zone. */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			clp->cl_haszonedchild = B_TRUE;
+
+		uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+
+		if (clp->cl_sorted) {
+			uu_list_index_t idx;
+
+			(void) uu_list_find(clp->cl_list, cn, NULL,
+			    &idx);
+			uu_list_insert(clp->cl_list, cn, idx);
+		} else {
+			ASSERT(!clp->cl_alldependents);
+			verify(uu_list_insert_before(clp->cl_list,
+			    uu_list_first(clp->cl_list), cn) == 0);
+		}
+
+		if (!clp->cl_alldependents)
+			return (zfs_iter_children(zhp, change_one, data));
+	} else {
+		zfs_close(zhp);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+compare_mountpoints(const void *a, const void *b, void *unused)
+{
+	const prop_changenode_t *ca = a;
+	const prop_changenode_t *cb = b;
+
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+
+	boolean_t hasmounta, hasmountb;
+
+	/*
+	 * When unsharing or unmounting filesystems, we need to do it in
+	 * mountpoint order.  This allows the user to have a mountpoint
+	 * hierarchy that is different from the dataset hierarchy, and still
+	 * allow it to be changed.  However, if either dataset doesn't have a
+	 * mountpoint (because it is a volume or a snapshot), we place it at the
+	 * end of the list, because it doesn't affect our change at all.
+	 */
+	hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta,
+	    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb,
+	    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (!hasmounta && hasmountb)
+		return (-1);
+	else if (hasmounta && !hasmountb)
+		return (1);
+	else if (!hasmounta && !hasmountb)
+		return (0);
+	else
+		return (strcmp(mountb, mounta));
+}
+
+/*
+ * Given a ZFS handle and a property, construct a complete list of datasets
+ * that need to be modified as part of this process.  For anything but the
+ * 'mountpoint' and 'sharenfs' properties, this just returns an empty list.
+ * Otherwise, we iterate over all children and look for any datasets that
+ * inherit the property.  For each such dataset, we add it to the list and
+ * mark whether it was shared beforehand.
+ */
+prop_changelist_t *
+changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
+{
+	prop_changelist_t *clp;
+	prop_changenode_t *cn;
+	zfs_handle_t *temp;
+	char property[ZFS_MAXPROPLEN];
+	uu_compare_fn_t *compare = NULL;
+
+	if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
+		return (NULL);
+
+	/*
+	 * For mountpoint-related tasks, we want to sort everything by
+	 * mountpoint, so that we mount and unmount them in the appropriate
+	 * order, regardless of their position in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
+	    prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) {
+		compare = compare_mountpoints;
+		clp->cl_sorted = B_TRUE;
+	}
+
+	clp->cl_pool = uu_list_pool_create("changelist_pool",
+	    sizeof (prop_changenode_t),
+	    offsetof(prop_changenode_t, cn_listnode),
+	    compare, 0);
+	if (clp->cl_pool == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	clp->cl_list = uu_list_create(clp->cl_pool, NULL,
+	    clp->cl_sorted ? UU_LIST_SORTED : 0);
+	clp->cl_flags = flags;
+
+	if (clp->cl_list == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * If this is a rename or the 'zoned' property, we pretend we're
+	 * changing the mountpoint and flag it so we can catch all children in
+	 * change_one().
+	 *
+	 * Flag cl_alldependents to catch all children plus the dependents
+	 * (clones) that are not in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_alldependents = B_TRUE;
+	} else if (prop == ZFS_PROP_ZONED) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_allchildren = B_TRUE;
+	} else if (prop == ZFS_PROP_CANMOUNT) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VOLSIZE) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else {
+		clp->cl_prop = prop;
+	}
+	clp->cl_realprop = prop;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHAREISCSI)
+		return (clp);
+
+	if (clp->cl_alldependents) {
+		if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) {
+			changelist_free(clp);
+			return (NULL);
+		}
+	} else if (zfs_iter_children(zhp, change_one, clp) != 0) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * We have to re-open ourselves because we auto-close all the handles
+	 * and can't tell the difference.
+	 */
+	if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
+	    ZFS_TYPE_ANY)) == NULL) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * Always add ourself to the list.  We add ourselves to the end so that
+	 * we're the last to be unmounted.
+	 */
+	if ((cn = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (prop_changenode_t))) == NULL) {
+		zfs_close(temp);
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	cn->cn_handle = temp;
+	cn->cn_mounted = zfs_is_mounted(temp, NULL);
+	cn->cn_shared = zfs_is_shared(temp);
+	cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+
+	uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+	if (clp->cl_sorted) {
+		uu_list_index_t idx;
+		(void) uu_list_find(clp->cl_list, cn, NULL, &idx);
+		uu_list_insert(clp->cl_list, cn, idx);
+	} else {
+		verify(uu_list_insert_after(clp->cl_list,
+		    uu_list_last(clp->cl_list), cn) == 0);
+	}
+
+	/*
+	 * If the property was previously 'legacy' or 'none', record this fact,
+	 * as the behavior of changelist_postfix() will be different.
+	 */
+	if (zfs_prop_get(zhp, prop, property, sizeof (property),
+	    NULL, NULL, 0, B_FALSE) == 0 &&
+	    (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 ||
+	    strcmp(property, "off") == 0))
+		clp->cl_waslegacy = B_TRUE;
+
+	return (clp);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
@@ -0,0 +1,853 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Internal utility routines for the ZFS library.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#include <sys/types.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+int
+libzfs_errno(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_error);
+}
+
+const char *
+libzfs_error_action(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_action);
+}
+
+const char *
+libzfs_error_description(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_desc[0] != '\0')
+		return (hdl->libzfs_desc);
+
+	switch (hdl->libzfs_error) {
+	case EZFS_NOMEM:
+		return (dgettext(TEXT_DOMAIN, "out of memory"));
+	case EZFS_BADPROP:
+		return (dgettext(TEXT_DOMAIN, "invalid property value"));
+	case EZFS_PROPREADONLY:
+		return (dgettext(TEXT_DOMAIN, "read only property"));
+	case EZFS_PROPTYPE:
+		return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
+		    "datasets of this type"));
+	case EZFS_PROPNONINHERIT:
+		return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
+	case EZFS_PROPSPACE:
+		return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+	case EZFS_BADTYPE:
+		return (dgettext(TEXT_DOMAIN, "operation not applicable to "
+		    "datasets of this type"));
+	case EZFS_BUSY:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
+	case EZFS_EXISTS:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
+	case EZFS_NOENT:
+		return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+	case EZFS_BADSTREAM:
+		return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
+	case EZFS_DSREADONLY:
+		return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+	case EZFS_VOLTOOBIG:
+		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
+		    "this system"));
+	case EZFS_VOLHASDATA:
+		return (dgettext(TEXT_DOMAIN, "volume has data"));
+	case EZFS_INVALIDNAME:
+		return (dgettext(TEXT_DOMAIN, "invalid name"));
+	case EZFS_BADRESTORE:
+		return (dgettext(TEXT_DOMAIN, "unable to restore to "
+		    "destination"));
+	case EZFS_BADBACKUP:
+		return (dgettext(TEXT_DOMAIN, "backup failed"));
+	case EZFS_BADTARGET:
+		return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
+	case EZFS_NODEVICE:
+		return (dgettext(TEXT_DOMAIN, "no such device in pool"));
+	case EZFS_BADDEV:
+		return (dgettext(TEXT_DOMAIN, "invalid device"));
+	case EZFS_NOREPLICAS:
+		return (dgettext(TEXT_DOMAIN, "no valid replicas"));
+	case EZFS_RESILVERING:
+		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
+	case EZFS_BADVERSION:
+		return (dgettext(TEXT_DOMAIN, "unsupported version"));
+	case EZFS_POOLUNAVAIL:
+		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
+	case EZFS_DEVOVERFLOW:
+		return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
+	case EZFS_BADPATH:
+		return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
+	case EZFS_CROSSTARGET:
+		return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
+		    "pools"));
+	case EZFS_ZONED:
+		return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
+	case EZFS_MOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "mount failed"));
+	case EZFS_UMOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "umount failed"));
+	case EZFS_UNSHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+	case EZFS_SHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+	case EZFS_DEVLINKS:
+		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
+	case EZFS_PERM:
+		return (dgettext(TEXT_DOMAIN, "permission denied"));
+	case EZFS_NOSPC:
+		return (dgettext(TEXT_DOMAIN, "out of space"));
+	case EZFS_IO:
+		return (dgettext(TEXT_DOMAIN, "I/O error"));
+	case EZFS_INTR:
+		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_ISSPARE:
+		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
+		    "spare"));
+	case EZFS_INVALCONFIG:
+		return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
+	case EZFS_RECURSIVE:
+		return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
+	case EZFS_NOHISTORY:
+		return (dgettext(TEXT_DOMAIN, "no history available"));
+	case EZFS_UNSHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to unshare"));
+	case EZFS_SHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to share"));
+	case EZFS_POOLPROPS:
+		return (dgettext(TEXT_DOMAIN, "failed to retrieve "
+		    "pool properties"));
+	case EZFS_POOL_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this type of pool"));
+	case EZFS_POOL_INVALARG:
+		return (dgettext(TEXT_DOMAIN, "invalid argument for "
+		    "this pool operation"));
+	case EZFS_NAMETOOLONG:
+		return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
+	case EZFS_UNKNOWN:
+		return (dgettext(TEXT_DOMAIN, "unknown error"));
+	default:
+		assert(hdl->libzfs_error == 0);
+		return (dgettext(TEXT_DOMAIN, "no error"));
+	}
+}
+
+/*PRINTFLIKE2*/
+void
+zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	(void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
+	    fmt, ap);
+	hdl->libzfs_desc_active = 1;
+
+	va_end(ap);
+}
+
+static void
+zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
+{
+	(void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
+	    fmt, ap);
+	hdl->libzfs_error = error;
+
+	if (hdl->libzfs_desc_active)
+		hdl->libzfs_desc_active = 0;
+	else
+		hdl->libzfs_desc[0] = '\0';
+
+	if (hdl->libzfs_printerr) {
+		if (error == EZFS_UNKNOWN) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
+			    "error: %s\n"), libzfs_error_description(hdl));
+			abort();
+		}
+
+		(void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
+		    libzfs_error_description(hdl));
+		if (error == EZFS_NOMEM)
+			exit(1);
+	}
+}
+
+int
+zfs_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	zfs_verror(hdl, error, fmt, ap);
+
+	va_end(ap);
+
+	return (-1);
+}
+
+static int
+zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
+    va_list ap)
+{
+	switch (error) {
+	case EPERM:
+	case EACCES:
+		zfs_verror(hdl, EZFS_PERM, fmt, ap);
+		return (-1);
+
+	case EIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		return (-1);
+
+	case EINTR:
+		zfs_verror(hdl, EZFS_INTR, fmt, ap);
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+
+	switch (error) {
+	case ENXIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset does not exist"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
+		break;
+
+	default:
+		zfs_error_aux(hdl, strerror(errno));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+		break;
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+int
+zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zpool_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENODEV:
+		zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case ENXIO:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "one or more devices is currently unavailable"));
+		zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
+		break;
+
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
+		break;
+
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap);
+		break;
+
+	case EINVAL:
+		zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
+		break;
+
+	default:
+		zfs_error_aux(hdl, strerror(error));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+/*
+ * Display an out of memory error message and abort the current program.
+ */
+int
+no_memory(libzfs_handle_t *hdl)
+{
+	return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
+}
+
+/*
+ * A safe form of malloc() which will die if the allocation fails.
+ */
+void *
+zfs_alloc(libzfs_handle_t *hdl, size_t size)
+{
+	void *data;
+
+	if ((data = calloc(1, size)) == NULL)
+		(void) no_memory(hdl);
+
+	return (data);
+}
+
+/*
+ * A safe form of realloc(), which also zeroes newly allocated space.
+ */
+void *
+zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
+{
+	void *ret;
+
+	if ((ret = realloc(ptr, newsize)) == NULL) {
+		(void) no_memory(hdl);
+		free(ptr);
+		return (NULL);
+	}
+
+	bzero((char *)ret + oldsize, (newsize - oldsize));
+	return (ret);
+}
+
+/*
+ * A safe form of strdup() which will die if the allocation fails.
+ */
+char *
+zfs_strdup(libzfs_handle_t *hdl, const char *str)
+{
+	char *ret;
+
+	if ((ret = strdup(str)) == NULL)
+		(void) no_memory(hdl);
+
+	return (ret);
+}
+
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+	uint64_t n = num;
+	int index = 0;
+	char u;
+
+	while (n >= 1024) {
+		n /= 1024;
+		index++;
+	}
+
+	u = " KMGTPE"[index];
+
+	if (index == 0) {
+		(void) snprintf(buf, buflen, "%llu", n);
+	} else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+		/*
+		 * If this is an even multiple of the base, always display
+		 * without any decimal precision.
+		 */
+		(void) snprintf(buf, buflen, "%llu%c", n, u);
+	} else {
+		/*
+		 * We want to choose a precision that reflects the best choice
+		 * for fitting in 5 characters.  This can get rather tricky when
+		 * we have numbers that are very close to an order of magnitude.
+		 * For example, when displaying 10239 (which is really 9.999K),
+		 * we want only a single place of precision for 10.0K.  We could
+		 * develop some complex heuristics for this, but it's much
+		 * easier just to try each combination in turn.
+		 */
+		int i;
+		for (i = 2; i >= 0; i--) {
+			(void) snprintf(buf, buflen, "%.*f%c", i,
+			    (double)num / (1ULL << 10 * index), u);
+			if (strlen(buf) <= 5)
+				break;
+		}
+	}
+}
+
+void
+libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
+{
+	hdl->libzfs_printerr = printerr;
+}
+
+static int
+libzfs_load(void)
+{
+	int error;
+
+	if (modfind("zfs") < 0) {
+		/* Not present in kernel, try loading it. */
+		if (kldload("zfs") < 0 || modfind("zfs") < 0) {
+			if (errno != EEXIST)
+				return (error);
+		}
+	}
+	return (0);
+}
+
+libzfs_handle_t *
+libzfs_init(void)
+{
+	libzfs_handle_t *hdl;
+
+	if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+		if (libzfs_load() == 0)
+			hdl->libzfs_fd = open(ZFS_DEV, O_RDWR);
+		if (hdl->libzfs_fd < 0) {
+			free(hdl);
+			return (NULL);
+		}
+	}
+
+	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+		(void) close(hdl->libzfs_fd);
+		free(hdl);
+		return (NULL);
+	}
+
+	hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r");
+
+	return (hdl);
+}
+
+void
+libzfs_fini(libzfs_handle_t *hdl)
+{
+	(void) close(hdl->libzfs_fd);
+	if (hdl->libzfs_mnttab)
+		(void) fclose(hdl->libzfs_mnttab);
+	if (hdl->libzfs_sharetab)
+		(void) fclose(hdl->libzfs_sharetab);
+	namespace_clear(hdl);
+	free(hdl);
+}
+
+libzfs_handle_t *
+zpool_get_handle(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+libzfs_handle_t *
+zfs_get_handle(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_hdl);
+}
+
+/*
+ * Given a name, determine whether or not it's a valid path
+ * (starts with '/' or "./").  If so, walk the mnttab trying
+ * to match the device number.  If not, treat the path as an
+ * fs/vol/snap name.
+ */
+zfs_handle_t *
+zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
+{
+	struct statfs statbuf;
+
+	if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
+		/*
+		 * It's not a valid path, assume it's a name of type 'argtype'.
+		 */
+		return (zfs_open(hdl, path, argtype));
+	}
+
+	if (statfs(path, &statbuf) != 0) {
+		(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
+		return (NULL);
+	}
+
+	if (strcmp(statbuf.f_fstypename, MNTTYPE_ZFS) != 0) {
+		(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
+		    path);
+		return (NULL);
+	}
+
+	return (zfs_open(hdl, statbuf.f_mntfromname, ZFS_TYPE_FILESYSTEM));
+}
+
+/*
+ * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from
+ * an ioctl().
+ */
+int
+zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
+{
+	if (len == 0)
+		len = 2048;
+	zc->zc_nvlist_dst_size = len;
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called when an ioctl() which returns an nvlist fails with ENOMEM.  This will
+ * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was
+ * filled in by the kernel to indicate the actual required size.
+ */
+int
+zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size))
+	    == 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called to free the src and dst nvlists stored in the command structure.
+ */
+void
+zcmd_free_nvlists(zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_src);
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+}
+
+int
+zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl,
+    size_t *size)
+{
+	char *packed;
+	size_t len;
+
+	verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0);
+
+	if ((packed = zfs_alloc(hdl, len)) == NULL)
+		return (-1);
+
+	verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
+
+	zc->zc_nvlist_src = (uint64_t)(uintptr_t)packed;
+	zc->zc_nvlist_src_size = len;
+
+	if (size)
+		*size = len;
+	return (0);
+}
+
+/*
+ * Unpacks an nvlist from the ZFS ioctl command structure.
+ */
+int
+zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
+{
+	if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
+	    zc->zc_nvlist_dst_size, nvlp, 0) != 0)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+static void
+zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
+{
+	zfs_proplist_t *pl = cbp->cb_proplist;
+	int i;
+	char *title;
+	size_t len;
+
+	cbp->cb_first = B_FALSE;
+	if (cbp->cb_scripted)
+		return;
+
+	/*
+	 * Start with the length of the column headers.
+	 */
+	cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME"));
+	cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN,
+	    "PROPERTY"));
+	cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
+	    "VALUE"));
+	cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
+	    "SOURCE"));
+
+	/*
+	 * Go through and calculate the widths for each column.  For the
+	 * 'source' column, we kludge it up by taking the worst-case scenario of
+	 * inheriting from the longest name.  This is acceptable because in the
+	 * majority of cases 'SOURCE' is the last column displayed, and we don't
+	 * use the width anyway.  Note that the 'VALUE' column can be oversized,
+	 * if the name of the property is much longer the any values we find.
+	 */
+	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
+		/*
+		 * 'PROPERTY' column
+		 */
+		if (pl->pl_prop != ZFS_PROP_INVAL) {
+			len = strlen(zfs_prop_to_name(pl->pl_prop));
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		} else {
+			len = strlen(pl->pl_user_prop);
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		}
+
+		/*
+		 * 'VALUE' column
+		 */
+		if ((pl->pl_prop != ZFS_PROP_NAME || !pl->pl_all) &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
+			cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
+
+		/*
+		 * 'NAME' and 'SOURCE' columns
+		 */
+		if (pl->pl_prop == ZFS_PROP_NAME &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
+			cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
+			cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
+			    strlen(dgettext(TEXT_DOMAIN, "inherited from"));
+		}
+	}
+
+	/*
+	 * Now go through and print the headers.
+	 */
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			title = dgettext(TEXT_DOMAIN, "NAME");
+			break;
+		case GET_COL_PROPERTY:
+			title = dgettext(TEXT_DOMAIN, "PROPERTY");
+			break;
+		case GET_COL_VALUE:
+			title = dgettext(TEXT_DOMAIN, "VALUE");
+			break;
+		case GET_COL_SOURCE:
+			title = dgettext(TEXT_DOMAIN, "SOURCE");
+			break;
+		default:
+			title = NULL;
+		}
+
+		if (title != NULL) {
+			if (i == 3 || cbp->cb_columns[i + 1] == 0)
+				(void) printf("%s", title);
+			else
+				(void) printf("%-*s  ",
+				    cbp->cb_colwidths[cbp->cb_columns[i]],
+				    title);
+		}
+	}
+	(void) printf("\n");
+}
+
+/*
+ * Display a single line of output, according to the settings in the callback
+ * structure.
+ */
+void
+libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,
+    const char *propname, const char *value, zfs_source_t sourcetype,
+    const char *source)
+{
+	int i;
+	const char *str;
+	char buf[128];
+
+	/*
+	 * Ignore those source types that the user has chosen to ignore.
+	 */
+	if ((sourcetype & cbp->cb_sources) == 0)
+		return;
+
+	if (cbp->cb_first)
+		zfs_print_prop_headers(cbp);
+
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			str = name;
+			break;
+
+		case GET_COL_PROPERTY:
+			str = propname;
+			break;
+
+		case GET_COL_VALUE:
+			str = value;
+			break;
+
+		case GET_COL_SOURCE:
+			switch (sourcetype) {
+			case ZFS_SRC_NONE:
+				str = "-";
+				break;
+
+			case ZFS_SRC_DEFAULT:
+				str = "default";
+				break;
+
+			case ZFS_SRC_LOCAL:
+				str = "local";
+				break;
+
+			case ZFS_SRC_TEMPORARY:
+				str = "temporary";
+				break;
+
+			case ZFS_SRC_INHERITED:
+				(void) snprintf(buf, sizeof (buf),
+				    "inherited from %s", source);
+				str = buf;
+				break;
+			}
+			break;
+
+		default:
+			continue;
+		}
+
+		if (cbp->cb_columns[i + 1] == 0)
+			(void) printf("%s", str);
+		else if (cbp->cb_scripted)
+			(void) printf("%s\t", str);
+		else
+			(void) printf("%-*s  ",
+			    cbp->cb_colwidths[cbp->cb_columns[i]],
+			    str);
+
+	}
+
+	(void) printf("\n");
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
@@ -0,0 +1,2055 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <devid.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio.h>
+#include <strings.h>
+#include <umem.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+/*
+ * Validate the given pool name, optionally putting an extended error message in
+ * 'buf'.
+ */
+static boolean_t
+zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
+{
+	namecheck_err_t why;
+	char what;
+	int ret;
+
+	ret = pool_namecheck(pool, &why, &what);
+
+	/*
+	 * The rules for reserved pool names were extended at a later point.
+	 * But we need to support users with existing pools that may now be
+	 * invalid.  So we only check for this expanded set of names during a
+	 * create (or import), and only in userland.
+	 */
+	if (ret == 0 && !isopen &&
+	    (strncmp(pool, "mirror", 6) == 0 ||
+	    strncmp(pool, "raidz", 5) == 0 ||
+	    strncmp(pool, "spare", 5) == 0)) {
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "name is reserved"));
+		return (B_FALSE);
+	}
+
+
+	if (ret != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "name is too long"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in pool name"), what);
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name must begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool name is reserved"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			}
+		}
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static int
+zpool_get_all_props(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (0);
+}
+
+/*
+ * Open a handle to the given pool, even if the pool is currently in the FAULTED
+ * state.
+ */
+zpool_handle_t *
+zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	/*
+	 * Make sure the pool name is valid.
+	 */
+	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
+		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+		    pool);
+		return (NULL);
+	}
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (NULL);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	if (missing) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no such pool"));
+		(void) zfs_error_fmt(hdl, EZFS_NOENT,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+		    pool);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Like the above, but silent on error.  Used when iterating over pools (because
+ * the configuration cache may be out of date).
+ */
+int
+zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (-1);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (-1);
+	}
+
+	if (missing) {
+		zpool_close(zhp);
+		*ret = NULL;
+		return (0);
+	}
+
+	*ret = zhp;
+	return (0);
+}
+
+/*
+ * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
+ * state.
+ */
+zpool_handle_t *
+zpool_open(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+
+	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
+		return (NULL);
+
+	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
+		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Close the handle.  Simply frees the memory associated with the handle.
+ */
+void
+zpool_close(zpool_handle_t *zhp)
+{
+	if (zhp->zpool_config)
+		nvlist_free(zhp->zpool_config);
+	if (zhp->zpool_old_config)
+		nvlist_free(zhp->zpool_old_config);
+	if (zhp->zpool_props)
+		nvlist_free(zhp->zpool_props);
+	free(zhp);
+}
+
+/*
+ * Return the name of the pool.
+ */
+const char *
+zpool_get_name(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_name);
+}
+
+/*
+ * Return the GUID of the pool.
+ */
+uint64_t
+zpool_get_guid(zpool_handle_t *zhp)
+{
+	uint64_t guid;
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
+	    &guid) == 0);
+	return (guid);
+}
+
+/*
+ * Return the version of the pool.
+ */
+uint64_t
+zpool_get_version(zpool_handle_t *zhp)
+{
+	uint64_t version;
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+
+	return (version);
+}
+
+/*
+ * Return the amount of space currently consumed by the pool.
+ */
+uint64_t
+zpool_get_space_used(zpool_handle_t *zhp)
+{
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+
+	return (vs->vs_alloc);
+}
+
+/*
+ * Return the total space in the pool.
+ */
+uint64_t
+zpool_get_space_total(zpool_handle_t *zhp)
+{
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+
+	return (vs->vs_space);
+}
+
+/*
+ * Return the alternate root for this pool, if any.
+ */
+int
+zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
+	    zc.zc_value[0] == '\0')
+		return (-1);
+
+	(void) strlcpy(buf, zc.zc_value, buflen);
+
+	return (0);
+}
+
+/*
+ * Return the state of the pool (ACTIVE or UNAVAILABLE)
+ */
+int
+zpool_get_state(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_state);
+}
+
+/*
+ * Create the named pool, using the provided vdev list.  It is assumed
+ * that the consumer has already validated the contents of the nvlist, so we
+ * don't have to worry about error semantics.
+ */
+int
+zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
+    const char *altroot)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), pool);
+
+	if (!zpool_name_valid(hdl, B_FALSE, pool))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+	if (altroot != NULL && altroot[0] != '/')
+		return (zfs_error_fmt(hdl, EZFS_BADPATH,
+		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+
+	if (altroot != NULL)
+		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
+		zcmd_free_nvlists(&zc);
+
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case EOVERFLOW:
+			/*
+			 * This occurs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "one or more devices is less than the "
+				    "minimum size (%s)"), buf);
+			}
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case ENOSPC:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is out of space"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		default:
+			return (zpool_standard_error(hdl, errno, msg));
+		}
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * If this is an alternate root pool, then we automatically set the
+	 * mountpoint of the root dataset to be '/'.
+	 */
+	if (altroot != NULL) {
+		zfs_handle_t *zhp;
+
+		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
+		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
+		    "/") == 0);
+
+		zfs_close(zhp);
+	}
+
+	return (0);
+}
+
+/*
+ * Destroy the given pool.  It is up to the caller to ensure that there are no
+ * datasets left in the pool.
+ */
+int
+zpool_destroy(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *zfp = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+
+	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
+	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
+		return (-1);
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s'"), zhp->zpool_name);
+
+		if (errno == EROFS) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is read only"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		} else {
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		if (zfp)
+			zfs_close(zfp);
+		return (-1);
+	}
+
+	if (zfp) {
+		remove_mountpoint(zfp);
+		zfs_close(zfp);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given vdevs to the pool.  The caller must have already performed the
+ * necessary verification to ensure that the vdev specification is well-formed.
+ */
+int
+zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+	nvlist_t **spares;
+	uint_t nspares;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot add to '%s'"), zhp->zpool_name);
+
+	if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add hot spares"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
+		return (-1);
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This occurrs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device is less than the minimum "
+				    "size (%s)"), buf);
+			}
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to add raidz2 vdevs"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
+			break;
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "root pool can not have concatenated devices"));
+			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		ret = -1;
+	} else {
+		ret = 0;
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (ret);
+}
+
+/*
+ * Exports the pool from the system.  The caller must ensure that there are no
+ * mounted datasets in the pool.
+ */
+int
+zpool_export(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
+		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
+		    zhp->zpool_name));
+	return (0);
+}
+
+/*
+ * Import the given pool using the known configuration.  The configuration
+ * should have come from zpool_find_import().  The 'newname' and 'altroot'
+ * parameters control whether the pool is imported with a different name or with
+ * an alternate root, respectively.
+ */
+int
+zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    const char *altroot)
+{
+	zfs_cmd_t zc = { 0 };
+	char *thename;
+	char *origname;
+	int ret;
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &origname) == 0);
+
+	if (newname != NULL) {
+		if (!zpool_name_valid(hdl, B_FALSE, newname))
+			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		thename = (char *)newname;
+	} else {
+		thename = origname;
+	}
+
+	if (altroot != NULL && altroot[0] != '/')
+		return (zfs_error_fmt(hdl, EZFS_BADPATH,
+		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
+		    altroot));
+
+	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
+
+	if (altroot != NULL)
+		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
+	else
+		zc.zc_value[0] = '\0';
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &zc.zc_guid) == 0);
+
+	if (zcmd_write_src_nvlist(hdl, &zc, config, NULL) != 0)
+		return (-1);
+
+	ret = 0;
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+		char desc[1024];
+		if (newname == NULL)
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    thename);
+		else
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
+			    origname, thename);
+
+		switch (errno) {
+		case ENOTSUP:
+			/*
+			 * Unsupported version.
+			 */
+			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
+			break;
+
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, desc);
+		}
+
+		ret = -1;
+	} else {
+		zpool_handle_t *zhp;
+		/*
+		 * This should never fail, but play it safe anyway.
+		 */
+		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
+			ret = -1;
+		} else if (zhp != NULL) {
+			ret = zpool_create_zvol_links(zhp);
+			zpool_close(zhp);
+		}
+	}
+
+	zcmd_free_nvlists(&zc);
+	return (ret);
+}
+
+/*
+ * Scrub the pool.
+ */
+int
+zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_cookie = type;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+		return (0);
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+
+	if (errno == EBUSY)
+		return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	else
+		return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
+ * spare; but FALSE if its an INUSE spare.
+ */
+static nvlist_t *
+vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
+    boolean_t *avail_spare)
+{
+	uint_t c, children;
+	nvlist_t **child;
+	uint64_t theguid, present;
+	char *path;
+	uint64_t wholedisk = 0;
+	nvlist_t *ret;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
+
+	if (search == NULL &&
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
+		/*
+		 * If the device has never been present since import, the only
+		 * reliable way to match the vdev is by GUID.
+		 */
+		if (theguid == guid)
+			return (nv);
+	} else if (search != NULL &&
+	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk);
+		if (wholedisk) {
+			/*
+			 * For whole disks, the internal path has 's0', but the
+			 * path passed in by the user doesn't.
+			 */
+			if (strlen(search) == strlen(path) - 2 &&
+			    strncmp(search, path, strlen(search)) == 0)
+				return (nv);
+		} else if (strcmp(search, path) == 0) {
+			return (nv);
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (NULL);
+
+	for (c = 0; c < children; c++)
+		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+		    avail_spare)) != NULL)
+			return (ret);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare)) != NULL) {
+				*avail_spare = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+nvlist_t *
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
+{
+	char buf[MAXPATHLEN];
+	const char *search;
+	char *end;
+	nvlist_t *nvroot;
+	uint64_t guid;
+
+	guid = strtoull(path, &end, 10);
+	if (guid != 0 && *end == '\0') {
+		search = NULL;
+	} else if (path[0] != '/') {
+		(void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
+		search = buf;
+	} else {
+		search = path;
+	}
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	*avail_spare = B_FALSE;
+	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
+}
+
+/*
+ * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
+ */
+static boolean_t
+is_spare(zpool_handle_t *zhp, uint64_t guid)
+{
+	uint64_t spare_guid;
+	nvlist_t *nvroot;
+	nvlist_t **spares;
+	uint_t nspares;
+	int i;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			verify(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &spare_guid) == 0);
+			if (guid == spare_guid)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Bring the specified vdev online
+ */
+int
+zpool_vdev_online(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Take the specified vdev offline
+ */
+int
+zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	zc.zc_cookie = istmp;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+	case EBUSY:
+
+		/*
+		 * There are no other replicas of this device.
+		 */
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
+	}
+}
+
+/*
+ * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
+ * a hot spare.
+ */
+static boolean_t
+is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *type;
+
+	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
+
+		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		    children == 2 && child[which] == tgt)
+			return (B_TRUE);
+
+		for (c = 0; c < children; c++)
+			if (is_replacing_spare(child[c], tgt, which))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Attach new_disk (fully described by nvroot) to old_disk.
+ * If 'replacing' is specified, tne new disk will replace the old one.
+ */
+int
+zpool_vdev_attach(zpool_handle_t *zhp,
+    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	int ret;
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	uint64_t val;
+	char *path;
+	nvlist_t **child;
+	uint_t children;
+	nvlist_t *config_root;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	if (replacing)
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot replace %s with %s"), old_disk, new_disk);
+	else
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot attach %s to %s"), new_disk, old_disk);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+	zc.zc_cookie = replacing;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0 || children != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+	}
+
+	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
+
+	/*
+	 * If the target is a hot spare that has been swapped in, we can only
+	 * replace it with another hot spare.
+	 */
+	if (replacing &&
+	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
+	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can only be replaced by another hot spare"));
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	/*
+	 * If we are attempting to replace a spare, it canot be applied to an
+	 * already spared device.
+	 */
+	if (replacing &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
+	    is_replacing_spare(config_root, tgt, 0)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device has already been replaced with a spare"));
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
+		return (-1);
+
+	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret == 0)
+		return (0);
+
+	switch (errno) {
+	case ENOTSUP:
+		/*
+		 * Can't attach to or replace this type of vdev.
+		 */
+		if (replacing)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot replace a replacing device"));
+		else
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "can only attach to mirrors and top-level "
+			    "disks"));
+		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EINVAL:
+		/*
+		 * The new device must be a single disk.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+		    new_disk);
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EOVERFLOW:
+		/*
+		 * The new device is too small.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device is too small"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EDOM:
+		/*
+		 * The new device has a different alignment requirement.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "devices have different sector alignment"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case ENAMETOOLONG:
+		/*
+		 * The resulting top-level vdev spec won't fit in the label.
+		 */
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Detach the specified device.
+ */
+int
+zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+
+	case ENOTSUP:
+		/*
+		 * Can't detach from this type of vdev.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
+		    "applicable to mirror and replacing vdevs"));
+		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EBUSY:
+		/*
+		 * There are no other replicas of this device.
+		 */
+		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Remove the given device.  Currently, this is supported only for hot spares.
+ */
+int
+zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (!avail_spare) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "only inactive hot spares can be removed"));
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+	}
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Clear the errors for the pool, or the particular device if specified.
+ */
+int
+zpool_clear(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	if (path)
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    path);
+	else
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    zhp->zpool_name);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if (path) {
+		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+			return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+		if (avail_spare)
+			return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &zc.zc_guid) == 0);
+	}
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
+ * hierarchy.
+ */
+int
+zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
+    void *data)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char (*paths)[MAXPATHLEN];
+	char path[MAXPATHLEN];
+	size_t size = 4;
+	int curr, fd, base, ret = 0;
+	DIR *dirp;
+	struct dirent *dp;
+	struct stat st;
+
+	if ((base = open(ZVOL_FULL_DEV_DIR, O_RDONLY)) < 0)
+		return (errno == ENOENT ? 0 : -1);
+
+	snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
+	    zhp->zpool_name);
+	if (stat(path, &st) != 0) {
+		int err = errno;
+		(void) close(base);
+		return (err == ENOENT ? 0 : -1);
+	}
+
+	/*
+	 * Oddly this wasn't a directory -- ignore that failure since we
+	 * know there are no links lower in the (non-existant) hierarchy.
+	 */
+	if (!S_ISDIR(st.st_mode)) {
+		(void) close(base);
+		return (0);
+	}
+
+	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
+		(void) close(base);
+		return (-1);
+	}
+
+	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
+	curr = 0;
+
+	while (curr >= 0) {
+		snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
+		    paths[curr]);
+		if (lstat(path, &st) != 0)
+			goto err;
+
+		if (S_ISDIR(st.st_mode)) {
+			if ((dirp = opendir(path)) == NULL) {
+				goto err;
+			}
+
+			while ((dp = readdir(dirp)) != NULL) {
+				if (dp->d_name[0] == '.')
+					continue;
+
+				if (curr + 1 == size) {
+					paths = zfs_realloc(hdl, paths,
+					    size * sizeof (paths[0]),
+					    size * 2 * sizeof (paths[0]));
+					if (paths == NULL) {
+						(void) closedir(dirp);
+						goto err;
+					}
+
+					size *= 2;
+				}
+
+				(void) strlcpy(paths[curr + 1], paths[curr],
+				    sizeof (paths[curr + 1]));
+				(void) strlcat(paths[curr], "/",
+				    sizeof (paths[curr]));
+				(void) strlcat(paths[curr], dp->d_name,
+				    sizeof (paths[curr]));
+				curr++;
+			}
+
+			(void) closedir(dirp);
+
+		} else {
+			if ((ret = cb(paths[curr], data)) != 0)
+				break;
+		}
+
+		curr--;
+	}
+
+	free(paths);
+	(void) close(base);
+
+	return (ret);
+
+err:
+	free(paths);
+	(void) close(base);
+	return (-1);
+}
+
+typedef struct zvol_cb {
+	zpool_handle_t *zcb_pool;
+	boolean_t zcb_create;
+} zvol_cb_t;
+
+/*ARGSUSED*/
+static int
+do_zvol_create(zfs_handle_t *zhp, void *data)
+{
+	int ret;
+
+	if (ZFS_IS_VOLUME(zhp))
+		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+
+	ret = zfs_iter_children(zhp, do_zvol_create, NULL);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Iterate over all zvols in the pool and make any necessary minor nodes.
+ */
+int
+zpool_create_zvol_links(zpool_handle_t *zhp)
+{
+	zfs_handle_t *zfp;
+	int ret;
+
+	/*
+	 * If the pool is unavailable, just return success.
+	 */
+	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+	    zhp->zpool_name)) == NULL)
+		return (0);
+
+	ret = zfs_iter_children(zfp, do_zvol_create, NULL);
+
+	zfs_close(zfp);
+	return (ret);
+}
+
+static int
+do_zvol_remove(const char *dataset, void *data)
+{
+	zpool_handle_t *zhp = data;
+
+	return (zvol_remove_link(zhp->zpool_hdl, dataset));
+}
+
+/*
+ * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
+ * by examining the /dev links so that a corrupted pool doesn't impede this
+ * operation.
+ */
+int
+zpool_remove_zvol_links(zpool_handle_t *zhp)
+{
+	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
+}
+
+/*
+ * Convert from a devid string to a path.
+ */
+static char *
+devid_to_path(char *devid_str)
+{
+	ddi_devid_t devid;
+	char *minor;
+	char *path;
+	devid_nmlist_t *list = NULL;
+	int ret;
+
+	if (devid_str_decode(devid_str, &devid, &minor) != 0)
+		return (NULL);
+
+	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
+
+	devid_str_free(minor);
+	devid_free(devid);
+
+	if (ret != 0)
+		return (NULL);
+
+	if ((path = strdup(list[0].devname)) == NULL)
+		return (NULL);
+
+	devid_free_nmlist(list);
+
+	return (path);
+}
+
+/*
+ * Convert from a path to a devid string.
+ */
+static char *
+path_to_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+/*
+ * Issue the necessary ioctl() to update the stored path value for the vdev.  We
+ * ignore any failure here, since a common case is for an unprivileged user to
+ * type 'zpool status', and we'll display the correct information anyway.
+ */
+static void
+set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+	    &zc.zc_guid) == 0);
+
+	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
+}
+
+/*
+ * Given a vdev, return the name to display in iostat.  If the vdev has a path,
+ * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
+ * We also check if this is a whole disk, in which case we strip off the
+ * trailing 's0' slice name.
+ *
+ * This routine is also responsible for identifying when disks have been
+ * reconfigured in a new location.  The kernel will have opened the device by
+ * devid, but the path will still refer to the old location.  To catch this, we
+ * first do a path -> devid translation (which is fast for the common case).  If
+ * the devid matches, we're done.  If not, we do a reverse devid -> path
+ * translation and issue the appropriate ioctl() to update the path of the vdev.
+ * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
+ * of these checks.
+ */
+char *
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+{
+	char *path, *devid;
+	uint64_t value;
+	char buf[64];
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+	    &value) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+		    &value) == 0);
+		(void) snprintf(buf, sizeof (buf), "%llu",
+		    (u_longlong_t)value);
+		path = buf;
+	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+
+		if (zhp != NULL &&
+		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
+			/*
+			 * Determine if the current path is correct.
+			 */
+			char *newdevid = path_to_devid(path);
+
+			if (newdevid == NULL ||
+			    strcmp(devid, newdevid) != 0) {
+				char *newpath;
+
+				if ((newpath = devid_to_path(devid)) != NULL) {
+					/*
+					 * Update the path appropriately.
+					 */
+					set_path(zhp, nv, newpath);
+					if (nvlist_add_string(nv,
+					    ZPOOL_CONFIG_PATH, newpath) == 0)
+						verify(nvlist_lookup_string(nv,
+						    ZPOOL_CONFIG_PATH,
+						    &path) == 0);
+					free(newpath);
+				}
+			}
+
+			if (newdevid)
+				devid_str_free(newdevid);
+		}
+
+		if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+			path += sizeof(_PATH_DEV) - 1;
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &value) == 0 && value) {
+			char *tmp = zfs_strdup(hdl, path);
+			if (tmp == NULL)
+				return (NULL);
+			tmp[strlen(path) - 2] = '\0';
+			return (tmp);
+		}
+	} else {
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
+
+		/*
+		 * If it's a raidz device, we need to stick in the parity level.
+		 */
+		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+			    &value) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
+			    (u_longlong_t)value);
+			path = buf;
+		}
+	}
+
+	return (zfs_strdup(hdl, path));
+}
+
+static int
+zbookmark_compare(const void *a, const void *b)
+{
+	return (memcmp(a, b, sizeof (zbookmark_t)));
+}
+
+/*
+ * Retrieve the persistent error log, uniquify the members, and return to the
+ * caller.
+ */
+int
+zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
+{
+	zfs_cmd_t zc = { 0 };
+	uint64_t count;
+	zbookmark_t *zb = NULL;
+	int i;
+
+	/*
+	 * Retrieve the raw error list from the kernel.  If the number of errors
+	 * has increased, allocate more space and continue until we get the
+	 * entire list.
+	 */
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
+	    &count) == 0);
+	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
+	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
+		return (-1);
+	zc.zc_nvlist_dst_size = count;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+		    &zc) != 0) {
+			free((void *)(uintptr_t)zc.zc_nvlist_dst);
+			if (errno == ENOMEM) {
+				count = zc.zc_nvlist_dst_size;
+				if ((zc.zc_nvlist_dst = (uintptr_t)
+				    zfs_alloc(zhp->zpool_hdl, count *
+				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
+					return (-1);
+			} else {
+				return (-1);
+			}
+		} else {
+			break;
+		}
+	}
+
+	/*
+	 * Sort the resulting bookmarks.  This is a little confusing due to the
+	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
+	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
+	 * _not_ copied as part of the process.  So we point the start of our
+	 * array appropriate and decrement the total number of elements.
+	 */
+	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
+	    zc.zc_nvlist_dst_size;
+	count -= zc.zc_nvlist_dst_size;
+
+	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
+
+	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
+
+	/*
+	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
+	 */
+	for (i = 0; i < count; i++) {
+		nvlist_t *nv;
+
+		/* ignoring zb_blkid and zb_level for now */
+		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
+		    zb[i-1].zb_object == zb[i].zb_object)
+			continue;
+
+		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
+			goto nomem;
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
+		    zb[i].zb_objset) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
+		    zb[i].zb_object) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		nvlist_free(nv);
+	}
+
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (0);
+
+nomem:
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (no_memory(zhp->zpool_hdl));
+}
+
+/*
+ * Upgrade a ZFS pool to the latest on-disk version.
+ */
+int
+zpool_upgrade(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
+		return (zpool_standard_error_fmt(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
+		    zhp->zpool_name));
+
+	return (0);
+}
+
+/*
+ * Log command history.
+ *
+ * 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
+ * otherwise ('zfs').  'pool_create' is B_TRUE if we are logging the creation
+ * of the pool; B_FALSE otherwise.  'path' is the pathanme containing the
+ * poolname.  'argc' and 'argv' are used to construct the command string.
+ */
+void
+zpool_log_history(libzfs_handle_t *hdl, int argc, char **argv, const char *path,
+	boolean_t pool, boolean_t pool_create)
+{
+	char cmd_buf[HIS_MAX_RECORD_LEN];
+	char *dspath;
+	zfs_cmd_t zc = { 0 };
+	int i;
+
+	/* construct the command string */
+	(void) strcpy(cmd_buf, pool ? "zpool" : "zfs");
+	for (i = 0; i < argc; i++) {
+		if (strlen(cmd_buf) + 1 + strlen(argv[i]) > HIS_MAX_RECORD_LEN)
+			break;
+		(void) strcat(cmd_buf, " ");
+		(void) strcat(cmd_buf, argv[i]);
+	}
+
+	/* figure out the poolname */
+	dspath = strpbrk(path, "/@");
+	if (dspath == NULL) {
+		(void) strcpy(zc.zc_name, path);
+	} else {
+		(void) strncpy(zc.zc_name, path, dspath - path);
+		zc.zc_name[dspath-path] = '\0';
+	}
+
+	zc.zc_history = (uint64_t)(uintptr_t)cmd_buf;
+	zc.zc_history_len = strlen(cmd_buf);
+
+	/* overloading zc_history_offset */
+	zc.zc_history_offset = pool_create;
+
+	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_LOG_HISTORY, &zc);
+}
+
+/*
+ * Perform ioctl to get some command history of a pool.
+ *
+ * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
+ * logical offset of the history buffer to start reading from.
+ *
+ * Upon return, 'off' is the next logical offset to read from and
+ * 'len' is the actual amount of bytes read into 'buf'.
+ */
+static int
+get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	zc.zc_history = (uint64_t)(uintptr_t)buf;
+	zc.zc_history_len = *len;
+	zc.zc_history_offset = *off;
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
+		switch (errno) {
+		case EPERM:
+			return (zfs_error_fmt(hdl, EZFS_PERM,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot show history for pool '%s'"),
+			    zhp->zpool_name));
+		case ENOENT:
+			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s'"), zhp->zpool_name));
+		case ENOTSUP:
+			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s', pool must be upgraded"), zhp->zpool_name));
+		default:
+			return (zpool_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot get history for '%s'"), zhp->zpool_name));
+		}
+	}
+
+	*len = zc.zc_history_len;
+	*off = zc.zc_history_offset;
+
+	return (0);
+}
+
+/*
+ * Process the buffer of nvlists, unpacking and storing each nvlist record
+ * into 'records'.  'leftover' is set to the number of bytes that weren't
+ * processed as there wasn't a complete record.
+ */
+static int
+zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
+    nvlist_t ***records, uint_t *numrecords)
+{
+	uint64_t reclen;
+	nvlist_t *nv;
+	int i;
+
+	while (bytes_read > sizeof (reclen)) {
+
+		/* get length of packed record (stored as little endian) */
+		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
+			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
+
+		if (bytes_read < sizeof (reclen) + reclen)
+			break;
+
+		/* unpack record */
+		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
+			return (ENOMEM);
+		bytes_read -= sizeof (reclen) + reclen;
+		buf += sizeof (reclen) + reclen;
+
+		/* add record to nvlist array */
+		(*numrecords)++;
+		if (ISP2(*numrecords + 1)) {
+			*records = realloc(*records,
+			    *numrecords * 2 * sizeof (nvlist_t *));
+		}
+		(*records)[*numrecords - 1] = nv;
+	}
+
+	*leftover = bytes_read;
+	return (0);
+}
+
+#define	HIS_BUF_LEN	(128*1024)
+
+/*
+ * Retrieve the command history of a pool.
+ */
+int
+zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
+{
+	char buf[HIS_BUF_LEN];
+	uint64_t off = 0;
+	nvlist_t **records = NULL;
+	uint_t numrecords = 0;
+	int err, i;
+
+	do {
+		uint64_t bytes_read = sizeof (buf);
+		uint64_t leftover;
+
+		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
+			break;
+
+		/* if nothing else was read in, we're at EOF, just return */
+		if (!bytes_read)
+			break;
+
+		if ((err = zpool_history_unpack(buf, bytes_read,
+		    &leftover, &records, &numrecords)) != 0)
+			break;
+		off -= leftover;
+
+		/* CONSTCOND */
+	} while (1);
+
+	if (!err) {
+		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
+		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
+		    records, numrecords) == 0);
+	}
+	for (i = 0; i < numrecords; i++)
+		nvlist_free(records[i]);
+	free(records);
+
+	return (err);
+}
+
+void
+zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len)
+{
+	zfs_cmd_t zc = { 0 };
+	boolean_t mounted = B_FALSE;
+	char *mntpnt = NULL;
+	char dsname[MAXNAMELEN];
+
+	if (dsobj == 0) {
+		/* special case for the MOS */
+		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
+		return;
+	}
+
+	/* get the dataset's name */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_obj = dsobj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd,
+	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
+		/* just write out a path of two object numbers */
+		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
+		    dsobj, obj);
+		return;
+	}
+	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
+
+	/* find out if the dataset is mounted */
+	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
+
+	/* get the corrupted object's path */
+	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
+	zc.zc_obj = obj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
+	    &zc) == 0) {
+		if (mounted) {
+			(void) snprintf(pathname, len, "%s%s", mntpnt,
+			    zc.zc_value);
+		} else {
+			(void) snprintf(pathname, len, "%s:%s",
+			    dsname, zc.zc_value);
+		}
+	} else {
+		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
+	}
+	free(mntpnt);
+}
+
+int
+zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	char errbuf[1024];
+	nvlist_t *nvl = NULL;
+	nvlist_t *realprops;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zpool_name);
+
+	if (zpool_get_version(zhp) < ZFS_VERSION_BOOTFS) {
+		zfs_error_aux(zhp->zpool_hdl,
+		    dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to support pool properties"));
+		return (zfs_error(zhp->zpool_hdl, EZFS_BADVERSION, errbuf));
+	}
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
+		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+	    nvlist_add_string(nvl, propname, propval) != 0) {
+		return (no_memory(zhp->zpool_hdl));
+	}
+
+	if ((realprops = zfs_validate_properties(zhp->zpool_hdl, ZFS_TYPE_POOL,
+	    zhp->zpool_name, nvl, 0, NULL, errbuf)) == NULL) {
+		nvlist_free(nvl);
+		return (-1);
+	}
+
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl, NULL) != 0)
+		return (-1);
+
+	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SET_PROPS, &zc);
+	zcmd_free_nvlists(&zc);
+
+	if (ret)
+		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
+
+	return (ret);
+}
+
+int
+zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *propbuf,
+    size_t proplen, zfs_source_t *srctype)
+{
+	uint64_t value;
+	char msg[1024], *strvalue;
+	nvlist_t *nvp;
+	zfs_source_t src = ZFS_SRC_NONE;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot get property '%s'"), zpool_prop_to_name(prop));
+
+	if (zpool_get_version(zhp) < ZFS_VERSION_BOOTFS) {
+		zfs_error_aux(zhp->zpool_hdl,
+		    dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to support pool properties"));
+		return (zfs_error(zhp->zpool_hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
+		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, msg));
+
+	/*
+	 * the "name" property is special cased
+	 */
+	if (!zfs_prop_valid_for_type(prop, ZFS_TYPE_POOL) &&
+	    prop != ZFS_PROP_NAME)
+		return (-1);
+
+	switch (prop) {
+	case ZFS_PROP_NAME:
+		(void) strlcpy(propbuf, zhp->zpool_name, proplen);
+		break;
+
+	case ZFS_PROP_BOOTFS:
+		if (nvlist_lookup_nvlist(zhp->zpool_props,
+		    zpool_prop_to_name(prop), &nvp) != 0) {
+			strvalue = (char *)zfs_prop_default_string(prop);
+			if (strvalue == NULL)
+				strvalue = "-";
+			src = ZFS_SRC_DEFAULT;
+		} else {
+			VERIFY(nvlist_lookup_uint64(nvp,
+			    ZFS_PROP_SOURCE, &value) == 0);
+			src = value;
+			VERIFY(nvlist_lookup_string(nvp, ZFS_PROP_VALUE,
+			    &strvalue) == 0);
+			if (strlen(strvalue) >= proplen)
+				return (-1);
+		}
+		(void) strcpy(propbuf, strvalue);
+		break;
+
+	default:
+		return (-1);
+	}
+	if (srctype)
+		*srctype = src;
+	return (0);
+}
+
+int
+zpool_get_proplist(libzfs_handle_t *hdl, char *fields, zpool_proplist_t **listp)
+{
+	return (zfs_get_proplist_common(hdl, fields, listp, ZFS_TYPE_POOL));
+}
+
+
+int
+zpool_expand_proplist(zpool_handle_t *zhp, zpool_proplist_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zpool_proplist_t *entry;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zfs_expand_proplist_common(hdl, plp, ZFS_TYPE_POOL) != 0)
+		return (-1);
+
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZFS_PROP_INVAL &&
+		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
+		    NULL) == 0) {
+			if (strlen(buf) > entry->pl_width)
+				entry->pl_width = strlen(buf);
+		}
+	}
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
@@ -0,0 +1,986 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Routines to manage ZFS mounts.  We separate all the nasty routines that have
+ * to deal with the OS.  The following functions are the main entry points --
+ * they are used by mount and unmount and when changing a filesystem's
+ * mountpoint.
+ *
+ * 	zfs_is_mounted()
+ * 	zfs_mount()
+ * 	zfs_unmount()
+ * 	zfs_unmountall()
+ *
+ * This file also contains the functions used to manage sharing filesystems via
+ * NFS and iSCSI:
+ *
+ * 	zfs_is_shared()
+ * 	zfs_share()
+ * 	zfs_unshare()
+ *
+ * 	zfs_is_shared_nfs()
+ * 	zfs_share_nfs()
+ * 	zfs_unshare_nfs()
+ * 	zfs_unshareall_nfs()
+ * 	zfs_is_shared_iscsi()
+ * 	zfs_share_iscsi()
+ * 	zfs_unshare_iscsi()
+ *
+ * The following functions are available for pool consumers, and will
+ * mount/unmount and share/unshare all datasets within pool:
+ *
+ * 	zpool_enable_datasets()
+ * 	zpool_disable_datasets()
+ */
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+static int (*iscsitgt_zfs_share)(const char *);
+static int (*iscsitgt_zfs_unshare)(const char *);
+static int (*iscsitgt_zfs_is_shared)(const char *);
+
+#pragma init(zfs_iscsi_init)
+static void
+zfs_iscsi_init(void)
+{
+	void *libiscsitgt;
+
+	if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
+	    RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
+	    (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_share")) == NULL ||
+	    (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_unshare")) == NULL ||
+	    (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_is_shared")) == NULL) {
+		iscsitgt_zfs_share = NULL;
+		iscsitgt_zfs_unshare = NULL;
+		iscsitgt_zfs_is_shared = NULL;
+	}
+}
+
+/*
+ * Search the sharetab for the given mountpoint, returning true if it is found.
+ */
+static boolean_t
+is_shared(libzfs_handle_t *hdl, const char *mountpoint)
+{
+	char buf[MAXPATHLEN], *tab;
+
+	if (hdl->libzfs_sharetab == NULL)
+		return (0);
+
+	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
+
+	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
+
+		/* the mountpoint is the first entry on each line */
+		if ((tab = strchr(buf, '\t')) != NULL) {
+			*tab = '\0';
+			if (strcmp(buf, mountpoint) == 0)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+#if 0
+/*
+ * Returns true if the specified directory is empty.  If we can't open the
+ * directory at all, return true so that the mount can fail with a more
+ * informative error message.
+ */
+static boolean_t
+dir_is_empty(const char *dirname)
+{
+	DIR *dirp;
+	struct dirent64 *dp;
+
+	if ((dirp = opendir(dirname)) == NULL)
+		return (B_TRUE);
+
+	while ((dp = readdir64(dirp)) != NULL) {
+
+		if (strcmp(dp->d_name, ".") == 0 ||
+		    strcmp(dp->d_name, "..") == 0)
+			continue;
+
+		(void) closedir(dirp);
+		return (B_FALSE);
+	}
+
+	(void) closedir(dirp);
+	return (B_TRUE);
+}
+#endif
+
+/*
+ * Checks to see if the mount is active.  If the filesystem is mounted, we fill
+ * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
+ * 0.
+ */
+boolean_t
+is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
+{
+	struct mnttab search = { 0 }, entry;
+
+	/*
+	 * Search for the entry in /etc/mnttab.  We don't bother getting the
+	 * mountpoint, as we can just search for the special device.  This will
+	 * also let us find mounts when the mountpoint is 'legacy'.
+	 */
+	search.mnt_special = (char *)special;
+	search.mnt_fstype = MNTTYPE_ZFS;
+
+	rewind(zfs_hdl->libzfs_mnttab);
+	if (getmntany(zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+		return (B_FALSE);
+
+	if (where != NULL)
+		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zfs_is_mounted(zfs_handle_t *zhp, char **where)
+{
+	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
+}
+
+/*
+ * Returns true if the given dataset is mountable, false otherwise.  Returns the
+ * mountpoint in 'buf'.
+ */
+static boolean_t
+zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
+    zfs_source_t *source)
+{
+	char sourceloc[ZFS_MAXNAMELEN];
+	zfs_source_t sourcetype;
+
+	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
+		return (B_FALSE);
+
+	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
+	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
+
+	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
+	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
+		return (B_FALSE);
+
+	if (!zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT))
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+	    getzoneid() == GLOBAL_ZONEID)
+		return (B_FALSE);
+
+	if (source)
+		*source = sourcetype;
+
+	return (B_TRUE);
+}
+
+/*
+ * Mount the given filesystem.
+ */
+int
+zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
+{
+	struct stat buf;
+	char mountpoint[ZFS_MAXPROPLEN];
+	char mntopts[MNT_LINE_MAX];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	if (options == NULL)
+		mntopts[0] = '\0';
+	else
+		(void) strlcpy(mntopts, options, sizeof (mntopts));
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	/* Create the directory if it doesn't already exist */
+	if (lstat(mountpoint, &buf) != 0) {
+		if (mkdirp(mountpoint, 0755) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "failed to create mountpoint"));
+			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+			    mountpoint));
+		}
+	}
+
+#if 0	/* FreeBSD: overlay mounts are not checked. */
+	/*
+	 * Determine if the mountpoint is empty.  If so, refuse to perform the
+	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
+	 * would defeat the point.  We also avoid this check if 'remount' is
+	 * specified.
+	 */
+	if ((flags & MS_OVERLAY) == 0 &&
+	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
+	    !dir_is_empty(mountpoint)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "directory is not empty"));
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
+	}
+#endif
+
+	/* perform the mount */
+	if (zmount(zfs_get_name(zhp), mountpoint, flags,
+	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
+		/*
+		 * Generic errors are nasty, but there are just way too many
+		 * from mount(), and they're well-understood.  We pick a few
+		 * common ones to improve upon.
+		 */
+		if (errno == EBUSY)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "mountpoint or dataset is busy"));
+		else
+			zfs_error_aux(hdl, strerror(errno));
+
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+		    zhp->zfs_name));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount a single filesystem.
+ */
+static int
+unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
+{
+	if (unmount(mountpoint, flags) != 0) {
+		zfs_error_aux(hdl, strerror(errno));
+		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+		    mountpoint));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount the given filesystem.
+ */
+int
+zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
+{
+	struct mnttab search = { 0 }, entry;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = zhp->zfs_name;
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+
+		if (mountpoint == NULL)
+			mountpoint = entry.mnt_mountp;
+
+		/*
+		 * Unshare and unmount the filesystem
+		 */
+		if (zfs_unshare_nfs(zhp, mountpoint) != 0 ||
+		    unmount_one(zhp->zfs_hdl, mountpoint, flags) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount this filesystem and any children inheriting the mountpoint property.
+ * To do this, just act like we're changing the mountpoint property, but don't
+ * remount the filesystems afterwards.
+ */
+int
+zfs_unmountall(zfs_handle_t *zhp, int flags)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, flags);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_prefix(clp);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+boolean_t
+zfs_is_shared(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_is_shared_iscsi(zhp));
+
+	return (zfs_is_shared_nfs(zhp, NULL));
+}
+
+int
+zfs_share(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_share_iscsi(zhp));
+
+	return (zfs_share_nfs(zhp));
+}
+
+int
+zfs_unshare(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_unshare_iscsi(zhp));
+
+	return (zfs_unshare_nfs(zhp, NULL));
+}
+
+/*
+ * Check to see if the filesystem is currently shared.
+ */
+boolean_t
+zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
+{
+	char *mountpoint;
+
+	if (!zfs_is_mounted(zhp, &mountpoint))
+		return (B_FALSE);
+
+	if (is_shared(zhp->zfs_hdl, mountpoint)) {
+		if (where != NULL)
+			*where = mountpoint;
+		else
+			free(mountpoint);
+		return (B_TRUE);
+	} else {
+		free(mountpoint);
+		return (B_FALSE);
+	}
+}
+
+/*
+ * Share the given filesystem according to the options in 'sharenfs'.  We rely
+ * on share(1M) to the dirty work for us.
+ */
+int
+zfs_share_nfs(zfs_handle_t *zhp)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	char buf[MAXPATHLEN];
+	FILE *fp;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	/*
+	 * Return success if there are no share options.
+	 */
+	if (zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts),
+	    NULL, NULL, 0, B_FALSE) != 0 ||
+	    strcmp(shareopts, "off") == 0)
+		return (0);
+
+	/*
+	 * If the 'zoned' property is set, then zfs_is_mountable() will have
+	 * already bailed out if we are in the global zone.  But local
+	 * zones cannot be NFS servers, so we ignore it for local zones as well.
+	 */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
+		return (0);
+
+#ifdef __FreeBSD__
+	{
+	int error;
+
+	if (strcmp(shareopts, "on") == 0)
+		error = fsshare(ZFS_EXPORTS_PATH, mountpoint, "");
+	else
+		error = fsshare(ZFS_EXPORTS_PATH, mountpoint, shareopts);
+	if (error != 0) {
+		zfs_error_aux(hdl, "%s", strerror(error));
+		(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+		    zfs_get_name(zhp));
+		return (-1);
+	}
+	}
+#else
+	/*
+	 * Invoke the share(1M) command.  We always do this, even if it's
+	 * currently shared, as the options may have changed.
+	 */
+	if (strcmp(shareopts, "on") == 0)
+		(void) snprintf(buf, sizeof (buf), "/usr/sbin/share "
+		    "-F nfs \"%s\" 2>&1", mountpoint);
+	else
+		(void) snprintf(buf, sizeof (buf), "/usr/sbin/share "
+		    "-F nfs -o \"%s\" \"%s\" 2>&1", shareopts,
+		    mountpoint);
+
+	if ((fp = popen(buf, "r")) == NULL)
+		return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+		    zfs_get_name(zhp)));
+
+	/*
+	 * share(1M) should only produce output if there is some kind
+	 * of error.  All output begins with "share_nfs: ", so we trim
+	 * this off to get to the real error.
+	 */
+	if (fgets(buf, sizeof (buf), fp) != NULL) {
+		char *colon = strchr(buf, ':');
+
+		while (buf[strlen(buf) - 1] == '\n')
+			buf[strlen(buf) - 1] = '\0';
+
+		if (colon != NULL)
+			zfs_error_aux(hdl, colon + 2);
+
+		(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+		    zfs_get_name(zhp));
+
+		verify(pclose(fp) != 0);
+		return (-1);
+	}
+
+	verify(pclose(fp) == 0);
+#endif
+
+	return (0);
+}
+
+/*
+ * Unshare a filesystem by mountpoint.
+ */
+static int
+unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint)
+{
+	char buf[MAXPATHLEN];
+	FILE *fp;
+
+#ifdef __FreeBSD__
+	{
+	int error;
+
+	error = fsunshare(ZFS_EXPORTS_PATH, mountpoint);
+	if (error != 0) {
+		zfs_error_aux(hdl, "%s", strerror(error));
+		return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot unshare '%s'"), name));
+	}
+	}
+#else
+	(void) snprintf(buf, sizeof (buf),
+	    "/usr/sbin/unshare  \"%s\" 2>&1",
+	    mountpoint);
+
+	if ((fp = popen(buf, "r")) == NULL)
+		return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot unshare '%s'"), name));
+
+	/*
+	 * unshare(1M) should only produce output if there is
+	 * some kind of error.  All output begins with "unshare
+	 * nfs: ", so we trim this off to get to the real error.
+	 */
+	if (fgets(buf, sizeof (buf), fp) != NULL) {
+		char *colon = strchr(buf, ':');
+
+		while (buf[strlen(buf) - 1] == '\n')
+			buf[strlen(buf) - 1] = '\0';
+
+		if (colon != NULL)
+			zfs_error_aux(hdl, colon + 2);
+
+		verify(pclose(fp) != 0);
+
+		return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot unshare '%s'"), name));
+	}
+
+	verify(pclose(fp) == 0);
+#endif
+
+	return (0);
+}
+
+/*
+ * Unshare the given filesystem.
+ */
+int
+zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
+{
+	struct mnttab search = { 0 }, entry;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = (char *)zfs_get_name(zhp);
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+
+		if (mountpoint == NULL)
+			mountpoint = entry.mnt_mountp;
+
+		if (is_shared(zhp->zfs_hdl, mountpoint) &&
+		    unshare_one(zhp->zfs_hdl, zhp->zfs_name, mountpoint) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Same as zfs_unmountall(), but for NFS unshares.
+ */
+int
+zfs_unshareall_nfs(zfs_handle_t *zhp)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_unshare(clp);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+/*
+ * Remove the mountpoint associated with the current dataset, if necessary.
+ * We only remove the underlying directory if:
+ *
+ *	- The mountpoint is not 'none' or 'legacy'
+ *	- The mountpoint is non-empty
+ *	- The mountpoint is the default or inherited
+ *	- The 'zoned' property is set, or we're in a local zone
+ *
+ * Any other directories we leave alone.
+ */
+void
+remove_mountpoint(zfs_handle_t *zhp)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	zfs_source_t source;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
+	    &source))
+		return;
+
+	if (source == ZFS_SRC_DEFAULT ||
+	    source == ZFS_SRC_INHERITED) {
+		/*
+		 * Try to remove the directory, silently ignoring any errors.
+		 * The filesystem may have since been removed or moved around,
+		 * and this error isn't really useful to the administrator in
+		 * any way.
+		 */
+		(void) rmdir(mountpoint);
+	}
+}
+
+boolean_t
+zfs_is_shared_iscsi(zfs_handle_t *zhp)
+{
+	return (iscsitgt_zfs_is_shared != NULL &&
+	    iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
+}
+
+int
+zfs_share_iscsi(zfs_handle_t *zhp)
+{
+	char shareopts[ZFS_MAXPROPLEN];
+	const char *dataset = zhp->zfs_name;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	/*
+	 * Return success if there are no share options.
+	 */
+	if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
+	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
+	    strcmp(shareopts, "off") == 0)
+		return (0);
+
+/* We don't support iSCSI on FreeBSD yet. */
+#ifdef TODO
+	if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0)
+		return (zfs_error_fmt(hdl, EZFS_SHAREISCSIFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
+#endif
+
+	return (0);
+}
+
+int
+zfs_unshare_iscsi(zfs_handle_t *zhp)
+{
+	const char *dataset = zfs_get_name(zhp);
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+/* We don't support iSCSI on FreeBSD yet. */
+#ifdef TODO
+	/*
+	 * Return if the volume is not shared
+	 */
+	if (!zfs_is_shared_iscsi(zhp))
+		return (0);
+
+	/*
+	 * If this fails with ENODEV it indicates that zvol wasn't shared so
+	 * we should return success in that case.
+	 */
+	if (iscsitgt_zfs_unshare == NULL ||
+	    (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV))
+		return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
+#endif
+
+	return (0);
+}
+
+typedef struct mount_cbdata {
+	zfs_handle_t	**cb_datasets;
+	int 		cb_used;
+	int		cb_alloc;
+} mount_cbdata_t;
+
+static int
+mount_cb(zfs_handle_t *zhp, void *data)
+{
+	mount_cbdata_t *cbp = data;
+
+	if (!(zfs_get_type(zhp) & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (cbp->cb_alloc == cbp->cb_used) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(zhp->zfs_hdl,
+		    cbp->cb_datasets, cbp->cb_alloc * sizeof (void *),
+		    cbp->cb_alloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+		cbp->cb_datasets = ptr;
+
+		cbp->cb_alloc *= 2;
+	}
+
+	cbp->cb_datasets[cbp->cb_used++] = zhp;
+
+	return (zfs_iter_children(zhp, mount_cb, cbp));
+}
+
+static int
+dataset_cmp(const void *a, const void *b)
+{
+	zfs_handle_t **za = (zfs_handle_t **)a;
+	zfs_handle_t **zb = (zfs_handle_t **)b;
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+	boolean_t gota, gotb;
+
+	if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (gota && gotb)
+		return (strcmp(mounta, mountb));
+
+	if (gota)
+		return (-1);
+	if (gotb)
+		return (1);
+
+	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+}
+
+/*
+ * Mount and share all datasets within the given pool.  This assumes that no
+ * datasets within the pool are currently mounted.  Because users can create
+ * complicated nested hierarchies of mountpoints, we first gather all the
+ * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
+ * we have the list of all filesystems, we iterate over them in order and mount
+ * and/or share each one.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+	mount_cbdata_t cb = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zfs_handle_t *zfsp;
+	int i, ret = -1;
+
+	/*
+	 * Gather all datasets within the pool.
+	 */
+	if ((cb.cb_datasets = zfs_alloc(hdl, 4 * sizeof (void *))) == NULL)
+		return (-1);
+	cb.cb_alloc = 4;
+
+	if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_ANY)) == NULL)
+		goto out;
+
+	cb.cb_datasets[0] = zfsp;
+	cb.cb_used = 1;
+
+	if (zfs_iter_children(zfsp, mount_cb, &cb) != 0)
+		goto out;
+
+	/*
+	 * Sort the datasets by mountpoint.
+	 */
+	qsort(cb.cb_datasets, cb.cb_used, sizeof (void *), dataset_cmp);
+
+	/*
+	 * And mount all the datasets.
+	 */
+	ret = 0;
+	for (i = 0; i < cb.cb_used; i++) {
+		if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0 ||
+		    zfs_share(cb.cb_datasets[i]) != 0)
+			ret = -1;
+	}
+
+out:
+	for (i = 0; i < cb.cb_used; i++)
+		zfs_close(cb.cb_datasets[i]);
+	free(cb.cb_datasets);
+
+	return (ret);
+}
+
+
+static int
+zvol_cb(const char *dataset, void *data)
+{
+	libzfs_handle_t *hdl = data;
+	zfs_handle_t *zhp;
+
+	/*
+	 * Ignore snapshots and ignore failures from non-existant datasets.
+	 */
+	if (strchr(dataset, '@') != NULL ||
+	    (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
+		return (0);
+
+	(void) zfs_unshare_iscsi(zhp);
+
+	zfs_close(zhp);
+
+	return (0);
+}
+
+static int
+mountpoint_compare(const void *a, const void *b)
+{
+	const char *mounta = *((char **)a);
+	const char *mountb = *((char **)b);
+
+	return (strcmp(mountb, mounta));
+}
+
+/*
+ * Unshare and unmount all datasets within the given pool.  We don't want to
+ * rely on traversing the DSL to discover the filesystems within the pool,
+ * because this may be expensive (if not all of them are mounted), and can fail
+ * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
+ * gather all the filesystems that are currently mounted.
+ */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+	int used, alloc;
+	struct statfs *sfs;
+	size_t namelen;
+	char **mountpoints = NULL;
+	zfs_handle_t **datasets = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	int i, j, n;
+	int ret = -1;
+	int flags = (force ? MS_FORCE : 0);
+
+	/*
+	 * First unshare all zvols.
+	 */
+	if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
+		return (-1);
+
+	namelen = strlen(zhp->zpool_name);
+
+	used = alloc = 0;
+	if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
+		fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
+		return (-1);
+	}
+	for (j = 0; j < n; j++) {
+		/*
+		 * Ignore non-ZFS entries.
+		 */
+		if (strcmp(sfs[j].f_fstypename, MNTTYPE_ZFS) != 0)
+			continue;
+
+		/*
+		 * Ignore filesystems not within this pool.
+		 */
+		if (strncmp(sfs[j].f_mntfromname, zhp->zpool_name, namelen) != 0 ||
+		    (sfs[j].f_mntfromname[namelen] != '/' &&
+		    sfs[j].f_mntfromname[namelen] != '\0'))
+			continue;
+
+		/*
+		 * At this point we've found a filesystem within our pool.  Add
+		 * it to our growing list.
+		 */
+		if (used == alloc) {
+			if (alloc == 0) {
+				if ((mountpoints = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				if ((datasets = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				alloc = 8;
+			} else {
+				void *ptr;
+
+				if ((ptr = zfs_realloc(hdl, mountpoints,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				mountpoints = ptr;
+
+				if ((ptr = zfs_realloc(hdl, datasets,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				datasets = ptr;
+
+				alloc *= 2;
+			}
+		}
+
+		if ((mountpoints[used] = zfs_strdup(hdl,
+		    sfs[j].f_mntonname)) == NULL)
+			goto out;
+
+		/*
+		 * This is allowed to fail, in case there is some I/O error.  It
+		 * is only used to determine if we need to remove the underlying
+		 * mountpoint, so failure is not fatal.
+		 */
+		datasets[used] = make_dataset_handle(hdl, sfs[j].f_mntfromname);
+
+		used++;
+	}
+
+	/*
+	 * At this point, we have the entire list of filesystems, so sort it by
+	 * mountpoint.
+	 */
+	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
+
+	/*
+	 * Walk through and first unshare everything.
+	 */
+	for (i = 0; i < used; i++) {
+		if (is_shared(hdl, mountpoints[i]) &&
+		    unshare_one(hdl, mountpoints[i], mountpoints[i]) != 0)
+			goto out;
+	}
+
+	/*
+	 * Now unmount everything, removing the underlying directories as
+	 * appropriate.
+	 */
+	for (i = 0; i < used; i++) {
+		if (unmount_one(hdl, mountpoints[i], flags) != 0)
+			goto out;
+	}
+
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			remove_mountpoint(datasets[i]);
+	}
+
+	ret = 0;
+out:
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			zfs_close(datasets[i]);
+		free(mountpoints[i]);
+	}
+	free(datasets);
+	free(mountpoints);
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBFS_IMPL_H
+#define	_LIBFS_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/dmu.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_acl.h>
+#include <sys/nvpair.h>
+
+#include <libuutil.h>
+#include <libzfs.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct libzfs_handle {
+	int libzfs_error;
+	int libzfs_fd;
+	FILE *libzfs_mnttab;
+	FILE *libzfs_sharetab;
+	uu_avl_pool_t *libzfs_ns_avlpool;
+	uu_avl_t *libzfs_ns_avl;
+	uint64_t libzfs_ns_gen;
+	int libzfs_desc_active;
+	char libzfs_action[1024];
+	char libzfs_desc[1024];
+	int libzfs_printerr;
+};
+
+struct zfs_handle {
+	libzfs_handle_t *zfs_hdl;
+	char zfs_name[ZFS_MAXNAMELEN];
+	zfs_type_t zfs_type; /* type including snapshot */
+	zfs_type_t zfs_head_type; /* type excluding snapshot */
+	dmu_objset_stats_t zfs_dmustats;
+	nvlist_t *zfs_props;
+	nvlist_t *zfs_user_props;
+	boolean_t zfs_mntcheck;
+	char *zfs_mntopts;
+	char zfs_root[MAXPATHLEN];
+};
+
+/*
+ * This is different from checking zfs_type, because it will also catch
+ * snapshots of volumes.
+ */
+#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
+
+struct zpool_handle {
+	libzfs_handle_t *zpool_hdl;
+	char zpool_name[ZPOOL_MAXNAMELEN];
+	int zpool_state;
+	size_t zpool_config_size;
+	nvlist_t *zpool_config;
+	nvlist_t *zpool_old_config;
+	nvlist_t *zpool_props;
+};
+
+int zfs_error(libzfs_handle_t *, int, const char *);
+int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
+void zfs_error_aux(libzfs_handle_t *, const char *, ...);
+void *zfs_alloc(libzfs_handle_t *, size_t);
+void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
+char *zfs_strdup(libzfs_handle_t *, const char *);
+int no_memory(libzfs_handle_t *);
+
+int zfs_standard_error(libzfs_handle_t *, int, const char *);
+int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+int zpool_standard_error(libzfs_handle_t *, int, const char *);
+int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+
+int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
+    size_t *);
+
+int zfs_expand_proplist_common(libzfs_handle_t *, zfs_proplist_t **,
+    zfs_type_t);
+int zfs_get_proplist_common(libzfs_handle_t *, char *, zfs_proplist_t **,
+    zfs_type_t);
+zfs_prop_t zfs_prop_iter_common(zfs_prop_f, void *, zfs_type_t, boolean_t);
+zfs_prop_t zfs_name_to_prop_common(const char *, zfs_type_t);
+
+nvlist_t *zfs_validate_properties(libzfs_handle_t *, zfs_type_t, char *,
+	nvlist_t *, uint64_t, zfs_handle_t *zhp, const char *errbuf);
+
+typedef struct prop_changelist prop_changelist_t;
+
+int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
+int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *, size_t *);
+int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
+int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
+void zcmd_free_nvlists(zfs_cmd_t *);
+
+int changelist_prefix(prop_changelist_t *);
+int changelist_postfix(prop_changelist_t *);
+void changelist_rename(prop_changelist_t *, const char *, const char *);
+void changelist_remove(zfs_handle_t *, prop_changelist_t *);
+void changelist_free(prop_changelist_t *);
+prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int);
+int changelist_unshare(prop_changelist_t *);
+int changelist_haszonedchild(prop_changelist_t *);
+
+void remove_mountpoint(zfs_handle_t *);
+
+zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
+
+int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
+
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
+int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
+
+void namespace_clear(libzfs_handle_t *);
+
+#ifdef	__FreeBSD__
+/*
+ * This is FreeBSD version of ioctl, because Solaris' ioctl() updates
+ * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
+ * error is returned zc_nvlist_dst_size won't be updated.
+ */
+static __inline int
+zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
+{
+	size_t oldsize;
+	int ret;
+
+	oldsize = zc->zc_nvlist_dst_size;
+	ret = ioctl(fd, cmd, zc);
+	if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
+		ret = -1;
+		errno = ENOMEM;
+	}
+
+	return (ret);
+}
+#define	ioctl(fd, cmd, zc)	zcmd_ioctl((fd), (cmd), (zc))
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBFS_IMPL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
@@ -0,0 +1,303 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * This file contains the functions which analyze the status of a pool.  This
+ * include both the status of an active pool, as well as the status exported
+ * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
+ * the pool.  This status is independent (to a certain degree) from the state of
+ * the pool.  A pool's state descsribes only whether or not it is capable of
+ * providing the necessary fault tolerance for data.  The status describes the
+ * overall status of devices.  A pool that is online can still have a device
+ * that is experiencing errors.
+ *
+ * Only a subset of the possible faults can be detected using 'zpool status',
+ * and not all possible errors correspond to a FMA message ID.  The explanation
+ * is left up to the caller, depending on whether it is a live pool or an
+ * import.
+ */
+
+#include <libzfs.h>
+#include <string.h>
+#include <unistd.h>
+#include "libzfs_impl.h"
+
+/*
+ * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
+ * in libzfs.h.  Note that there are some status results which go past the end
+ * of this table, and hence have no associated message ID.
+ */
+static char *zfs_msgid_table[] = {
+	"ZFS-8000-14",
+	"ZFS-8000-2Q",
+	"ZFS-8000-3C",
+	"ZFS-8000-4J",
+	"ZFS-8000-5E",
+	"ZFS-8000-6X",
+	"ZFS-8000-72",
+	"ZFS-8000-8A",
+	"ZFS-8000-9P",
+	"ZFS-8000-A5",
+	"ZFS-8000-EY"
+};
+
+/*
+ * If the pool is active, a certain class of static errors is overridden by the
+ * faults as analayzed by FMA.  These faults have separate knowledge articles,
+ * and the article referred to by 'zpool status' must match that indicated by
+ * the syslog error message.  We override missing data as well as corrupt pool.
+ */
+static char *zfs_msgid_table_active[] = {
+	"ZFS-8000-14",
+	"ZFS-8000-D3",		/* overridden */
+	"ZFS-8000-D3",		/* overridden */
+	"ZFS-8000-4J",
+	"ZFS-8000-5E",
+	"ZFS-8000-6X",
+	"ZFS-8000-CS",		/* overridden */
+	"ZFS-8000-8A",
+	"ZFS-8000-9P",
+	"ZFS-8000-CS",		/* overridden */
+};
+
+#define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
+
+/* ARGSUSED */
+static int
+vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN &&
+	    aux == VDEV_AUX_OPEN_FAILED);
+}
+
+/* ARGSUSED */
+static int
+vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (errs != 0);
+}
+
+/* ARGSUSED */
+static int
+vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN);
+}
+
+/* ARGSUSED */
+static int
+vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_OFFLINE);
+}
+
+/*
+ * Detect if any leaf devices that have seen errors or could not be opened.
+ */
+static boolean_t
+find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
+{
+	nvlist_t **child;
+	vdev_stat_t *vs;
+	uint_t c, children;
+	char *type;
+
+	/*
+	 * Ignore problems within a 'replacing' vdev, since we're presumably in
+	 * the process of repairing any such errors, and don't want to call them
+	 * out again.  We'll pick up the fact that a resilver is happening
+	 * later.
+	 */
+	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
+		return (B_FALSE);
+
+	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev_problem(child[c], func))
+				return (B_TRUE);
+	} else {
+		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &c) == 0);
+
+		if (func(vs->vs_state, vs->vs_aux,
+		    vs->vs_read_errors +
+		    vs->vs_write_errors +
+		    vs->vs_checksum_errors))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Active pool health status.
+ *
+ * To determine the status for a pool, we make several passes over the config,
+ * picking the most egregious error we find.  In order of importance, we do the
+ * following:
+ *
+ *	- Check for a complete and valid configuration
+ *	- Look for any missing devices in a non-replicated config
+ *	- Check for any data errors
+ *	- Check for any missing devices in a replicated config
+ *	- Look for any devices showing errors
+ *	- Check for any resilvering devices
+ *
+ * There can obviously be multiple errors within a single pool, so this routine
+ * only picks the most damaging of all the current errors to report.
+ */
+static zpool_status_t
+check_status(nvlist_t *config, boolean_t isimport)
+{
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+	uint64_t nerr;
+	uint64_t version;
+	uint64_t stateval;
+	uint64_t hostid = 0;
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+	/*
+	 * Pool last accessed by another system.
+	 */
+	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
+	    stateval == POOL_STATE_ACTIVE)
+		return (ZPOOL_STATUS_HOSTID_MISMATCH);
+
+	/*
+	 * Newer on-disk version.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
+		return (ZPOOL_STATUS_VERSION_NEWER);
+
+	/*
+	 * Check that the config is complete.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
+		return (ZPOOL_STATUS_BAD_GUID_SUM);
+
+	/*
+	 * Missing devices in non-replicated config.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_NR);
+
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
+
+	/*
+	 * Corrupted pool metadata
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+		return (ZPOOL_STATUS_CORRUPT_POOL);
+
+	/*
+	 * Persistent data errors.
+	 */
+	if (!isimport) {
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+		    &nerr) == 0 && nerr != 0)
+			return (ZPOOL_STATUS_CORRUPT_DATA);
+	}
+
+	/*
+	 * Missing devices in a replicated config.
+	 */
+	if (find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_R);
+	if (find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
+
+	/*
+	 * Devices with errors
+	 */
+	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
+		return (ZPOOL_STATUS_FAILING_DEV);
+
+	/*
+	 * Offlined devices
+	 */
+	if (find_vdev_problem(nvroot, vdev_offlined))
+		return (ZPOOL_STATUS_OFFLINE_DEV);
+
+	/*
+	 * Currently resilvering
+	 */
+	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
+		return (ZPOOL_STATUS_RESILVERING);
+
+	/*
+	 * Outdated, but usable, version
+	 */
+	if (version < ZFS_VERSION)
+		return (ZPOOL_STATUS_VERSION_OLDER);
+
+	return (ZPOOL_STATUS_OK);
+}
+
+zpool_status_t
+zpool_get_status(zpool_handle_t *zhp, char **msgid)
+{
+	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table_active[ret];
+
+	return (ret);
+}
+
+zpool_status_t
+zpool_import_status(nvlist_t *config, char **msgid)
+{
+	zpool_status_t ret = check_status(config, B_TRUE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table[ret];
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
@@ -0,0 +1,360 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
+ * single packed nvlist.  While it would be nice to just read in this
+ * file from userland, this wouldn't work from a local zone.  So we have to have
+ * a zpool ioctl to return the complete configuration for all pools.  In the
+ * global zone, this will be identical to reading the file and unpacking it in
+ * userland.
+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <libuutil.h>
+
+#include "libzfs_impl.h"
+
+typedef struct config_node {
+	char		*cn_name;
+	nvlist_t	*cn_config;
+	uu_avl_node_t	cn_avl;
+} config_node_t;
+
+/* ARGSUSED */
+static int
+config_node_compare(const void *a, const void *b, void *unused)
+{
+	int ret;
+
+	const config_node_t *ca = (config_node_t *)a;
+	const config_node_t *cb = (config_node_t *)b;
+
+	ret = strcmp(ca->cn_name, cb->cn_name);
+
+	if (ret < 0)
+		return (-1);
+	else if (ret > 0)
+		return (1);
+	else
+		return (0);
+}
+
+void
+namespace_clear(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_ns_avl) {
+		config_node_t *cn;
+		void *cookie = NULL;
+
+		while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl,
+		    &cookie)) != NULL) {
+			nvlist_free(cn->cn_config);
+			free(cn->cn_name);
+			free(cn);
+		}
+
+		uu_avl_destroy(hdl->libzfs_ns_avl);
+		hdl->libzfs_ns_avl = NULL;
+	}
+
+	if (hdl->libzfs_ns_avlpool) {
+		uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
+		hdl->libzfs_ns_avlpool = NULL;
+	}
+}
+
+/*
+ * Loads the pool namespace, or re-loads it if the cache has changed.
+ */
+static int
+namespace_reload(libzfs_handle_t *hdl)
+{
+	nvlist_t *config;
+	config_node_t *cn;
+	nvpair_t *elem;
+	zfs_cmd_t zc = { 0 };
+	void *cookie;
+
+	if (hdl->libzfs_ns_gen == 0) {
+		/*
+		 * This is the first time we've accessed the configuration
+		 * cache.  Initialize the AVL tree and then fall through to the
+		 * common code.
+		 */
+		if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
+		    sizeof (config_node_t),
+		    offsetof(config_node_t, cn_avl),
+		    config_node_compare, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+
+		if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
+		    NULL, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+	}
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	for (;;) {
+		zc.zc_cookie = hdl->libzfs_ns_gen;
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+			switch (errno) {
+			case EEXIST:
+				/*
+				 * The namespace hasn't changed.
+				 */
+				zcmd_free_nvlists(&zc);
+				return (0);
+
+			case ENOMEM:
+				if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+					zcmd_free_nvlists(&zc);
+					return (-1);
+				}
+				break;
+
+			default:
+				zcmd_free_nvlists(&zc);
+				return (zfs_standard_error(hdl, errno,
+				    dgettext(TEXT_DOMAIN, "failed to read "
+				    "pool configuration")));
+			}
+		} else {
+			hdl->libzfs_ns_gen = zc.zc_cookie;
+			break;
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * Clear out any existing configuration information.
+	 */
+	cookie = NULL;
+	while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) {
+		nvlist_free(cn->cn_config);
+		free(cn->cn_name);
+		free(cn);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
+		nvlist_t *child;
+		uu_avl_index_t where;
+
+		if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if ((cn->cn_name = zfs_strdup(hdl,
+		    nvpair_name(elem))) == NULL) {
+			free(cn);
+			nvlist_free(config);
+			return (-1);
+		}
+
+		verify(nvpair_value_nvlist(elem, &child) == 0);
+		if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
+			free(cn->cn_name);
+			free(cn);
+			nvlist_free(config);
+			return (no_memory(hdl));
+		}
+		verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
+		    == NULL);
+
+		uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
+	}
+
+	nvlist_free(config);
+	return (0);
+}
+
+/*
+ * Retrieve the configuration for the given pool.  The configuration is a nvlist
+ * describing the vdevs, as well as the statistics associated with each one.
+ */
+nvlist_t *
+zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
+{
+	if (oldconfig)
+		*oldconfig = zhp->zpool_old_config;
+	return (zhp->zpool_config);
+}
+
+/*
+ * Refresh the vdev statistics associated with the given pool.  This is used in
+ * iostat to show configuration changes and determine the delta from the last
+ * time the function was called.  This function can fail, in case the pool has
+ * been destroyed.
+ */
+int
+zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	nvlist_t *config;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	*missing = B_FALSE;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+
+	if (zhp->zpool_config_size == 0)
+		zhp->zpool_config_size = 1 << 16;
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0)
+		return (-1);
+
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+		    &zc) == 0) {
+			/*
+			 * The real error is returned in the zc_cookie field.
+			 */
+			error = zc.zc_cookie;
+			break;
+		}
+
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			if (errno == ENOENT || errno == EINVAL)
+				*missing = B_TRUE;
+			zhp->zpool_state = POOL_STATE_UNAVAIL;
+			return (0);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	zhp->zpool_config_size = zc.zc_nvlist_dst_size;
+
+	if (zhp->zpool_config != NULL) {
+		uint64_t oldtxg, newtxg;
+
+		verify(nvlist_lookup_uint64(zhp->zpool_config,
+		    ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0);
+		verify(nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0);
+
+		if (zhp->zpool_old_config != NULL)
+			nvlist_free(zhp->zpool_old_config);
+
+		if (oldtxg != newtxg) {
+			nvlist_free(zhp->zpool_config);
+			zhp->zpool_old_config = NULL;
+		} else {
+			zhp->zpool_old_config = zhp->zpool_config;
+		}
+	}
+
+	zhp->zpool_config = config;
+	if (error)
+		zhp->zpool_state = POOL_STATE_UNAVAIL;
+	else
+		zhp->zpool_state = POOL_STATE_ACTIVE;
+
+	return (0);
+}
+
+/*
+ * Iterate over all pools in the system.
+ */
+int
+zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zpool_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
+			return (-1);
+
+		if (zhp == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over root datasets, calling the given function for each.  The zfs
+ * handle passed each time must be explicitly closed by the callback.
+ */
+int
+zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zfs_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
@@ -0,0 +1,1023 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Pool import support functions.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ * 	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <devid.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgeom.h>
+
+#include <sys/vdev_impl.h>
+
+#include "libzfs.h"
+#include "libzfs_impl.h"
+
+/*
+ * Intermediate structures used to gather configuration information.
+ */
+typedef struct config_entry {
+	uint64_t		ce_txg;
+	nvlist_t		*ce_config;
+	struct config_entry	*ce_next;
+} config_entry_t;
+
+typedef struct vdev_entry {
+	uint64_t		ve_guid;
+	config_entry_t		*ve_configs;
+	struct vdev_entry	*ve_next;
+} vdev_entry_t;
+
+typedef struct pool_entry {
+	uint64_t		pe_guid;
+	vdev_entry_t		*pe_vdevs;
+	struct pool_entry	*pe_next;
+} pool_entry_t;
+
+typedef struct name_entry {
+	char			*ne_name;
+	uint64_t		ne_guid;
+	struct name_entry	*ne_next;
+} name_entry_t;
+
+typedef struct pool_list {
+	pool_entry_t		*pools;
+	name_entry_t		*names;
+} pool_list_t;
+
+static char *
+get_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+/*
+ * Go through and fix up any path and/or devid information for the given vdev
+ * configuration.
+ */
+static int
+fix_paths(nvlist_t *nv, name_entry_t *names)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	uint64_t guid;
+	name_entry_t *ne, *best;
+	char *path, *devid;
+	int matched;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (fix_paths(child[c], names) != 0)
+				return (-1);
+		return (0);
+	}
+
+	/*
+	 * This is a leaf (file or disk) vdev.  In either case, go through
+	 * the name list and see if we find a matching guid.  If so, replace
+	 * the path and see if we can calculate a new devid.
+	 *
+	 * There may be multiple names associated with a particular guid, in
+	 * which case we have overlapping slices or multiple paths to the same
+	 * disk.  If this is the case, then we want to pick the path that is
+	 * the most similar to the original, where "most similar" is the number
+	 * of matching characters starting from the end of the path.  This will
+	 * preserve slice numbers even if the disks have been reorganized, and
+	 * will also catch preferred disk names if multiple paths exist.
+	 */
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		path = NULL;
+
+	matched = 0;
+	best = NULL;
+	for (ne = names; ne != NULL; ne = ne->ne_next) {
+		if (ne->ne_guid == guid) {
+			const char *src, *dst;
+			int count;
+
+			if (path == NULL) {
+				best = ne;
+				break;
+			}
+
+			src = ne->ne_name + strlen(ne->ne_name) - 1;
+			dst = path + strlen(path) - 1;
+			for (count = 0; src >= ne->ne_name && dst >= path;
+			    src--, dst--, count++)
+				if (*src != *dst)
+					break;
+
+			/*
+			 * At this point, 'count' is the number of characters
+			 * matched from the end.
+			 */
+			if (count > matched || best == NULL) {
+				best = ne;
+				matched = count;
+			}
+		}
+	}
+
+	if (best == NULL)
+		return (0);
+
+	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+		return (-1);
+
+	if ((devid = get_devid(best->ne_name)) == NULL) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+	} else {
+		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
+			return (-1);
+		devid_str_free(devid);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given configuration to the list of known devices.
+ */
+static int
+add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
+    nvlist_t *config)
+{
+	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	name_entry_t *ne;
+
+	/*
+	 * If this is a hot spare not currently in use, add it to the list of
+	 * names to translate, but don't do anything else.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &state) == 0 && state == POOL_STATE_SPARE &&
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+			return (-1);
+
+		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+			free(ne);
+			return (-1);
+		}
+		ne->ne_guid = vdev_guid;
+		ne->ne_next = pl->names;
+		pl->names = ne;
+		return (0);
+	}
+
+	/*
+	 * If we have a valid config but cannot read any of these fields, then
+	 * it means we have a half-initialized label.  In vdev_label_init()
+	 * we write a label with txg == 0 so that we can identify the device
+	 * in case the user refers to the same disk later on.  If we fail to
+	 * create the pool, we'll be left with a label in this state
+	 * which should not be considered part of a valid pool.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &pool_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+	    &top_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+	    &txg) != 0 || txg == 0) {
+		nvlist_free(config);
+		return (0);
+	}
+
+	/*
+	 * First, see if we know about this pool.  If not, then add it to the
+	 * list of known pools.
+	 */
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		if (pe->pe_guid == pool_guid)
+			break;
+	}
+
+	if (pe == NULL) {
+		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		pe->pe_guid = pool_guid;
+		pe->pe_next = pl->pools;
+		pl->pools = pe;
+	}
+
+	/*
+	 * Second, see if we know about this toplevel vdev.  Add it if its
+	 * missing.
+	 */
+	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+		if (ve->ve_guid == top_guid)
+			break;
+	}
+
+	if (ve == NULL) {
+		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ve->ve_guid = top_guid;
+		ve->ve_next = pe->pe_vdevs;
+		pe->pe_vdevs = ve;
+	}
+
+	/*
+	 * Third, see if we have a config with a matching transaction group.  If
+	 * so, then we do nothing.  Otherwise, add it to the list of known
+	 * configs.
+	 */
+	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
+		if (ce->ce_txg == txg)
+			break;
+	}
+
+	if (ce == NULL) {
+		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ce->ce_txg = txg;
+		ce->ce_config = config;
+		ce->ce_next = ve->ve_configs;
+		ve->ve_configs = ce;
+	} else {
+		nvlist_free(config);
+	}
+
+	/*
+	 * At this point we've successfully added our config to the list of
+	 * known configs.  The last thing to do is add the vdev guid -> path
+	 * mappings so that we can fix up the configuration as necessary before
+	 * doing the import.
+	 */
+	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		return (-1);
+
+	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+		free(ne);
+		return (-1);
+	}
+
+	ne->ne_guid = vdev_guid;
+	ne->ne_next = pl->names;
+	pl->names = ne;
+
+	return (0);
+}
+
+/*
+ * Returns true if the named pool matches the given GUID.
+ */
+static int
+pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
+    boolean_t *isactive)
+{
+	zpool_handle_t *zhp;
+	uint64_t theguid;
+
+	if (zpool_open_silent(hdl, name, &zhp) != 0)
+		return (-1);
+
+	if (zhp == NULL) {
+		*isactive = B_FALSE;
+		return (0);
+	}
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
+	    &theguid) == 0);
+
+	zpool_close(zhp);
+
+	*isactive = (theguid == guid);
+	return (0);
+}
+
+/*
+ * Convert our list of pools into the definitive set of configurations.  We
+ * start by picking the best config for each toplevel vdev.  Once that's done,
+ * we assemble the toplevel vdevs into a full config for the pool.  We make a
+ * pass to fix up any incorrect paths, and then add it to the main list to
+ * return to the user.
+ */
+static nvlist_t *
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
+{
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	boolean_t config_seen;
+	uint64_t best_txg;
+	char *name, *hostname;
+	zfs_cmd_t zc = { 0 };
+	uint64_t version, guid;
+	size_t len;
+	int err;
+	uint_t children = 0;
+	nvlist_t **child = NULL;
+	uint_t c;
+	boolean_t isactive;
+	uint64_t hostid;
+
+	if (nvlist_alloc(&ret, 0, 0) != 0)
+		goto nomem;
+
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		uint64_t id;
+
+		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		config_seen = B_FALSE;
+
+		/*
+		 * Iterate over all toplevel vdevs.  Grab the pool configuration
+		 * from the first one we find, and then go through the rest and
+		 * add them as necessary to the 'vdevs' member of the config.
+		 */
+		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+
+			/*
+			 * Determine the best configuration for this vdev by
+			 * selecting the config with the latest transaction
+			 * group.
+			 */
+			best_txg = 0;
+			for (ce = ve->ve_configs; ce != NULL;
+			    ce = ce->ce_next) {
+
+				if (ce->ce_txg > best_txg) {
+					tmp = ce->ce_config;
+					best_txg = ce->ce_txg;
+				}
+			}
+
+			if (!config_seen) {
+				/*
+				 * Copy the relevant pieces of data to the pool
+				 * configuration:
+				 *
+				 *	version
+				 * 	pool guid
+				 * 	name
+				 * 	pool state
+				 *	hostid (if available)
+				 *	hostname (if available)
+				 */
+				uint64_t state;
+
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VERSION, &version) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_VERSION, version) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_GUID, guid) != 0)
+					goto nomem;
+				verify(nvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
+				if (nvlist_add_string(config,
+				    ZPOOL_CONFIG_POOL_NAME, name) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, state) != 0)
+					goto nomem;
+				hostid = 0;
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+					if (nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_HOSTID, hostid) != 0)
+						goto nomem;
+					verify(nvlist_lookup_string(tmp,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    &hostname) == 0);
+					if (nvlist_add_string(config,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    hostname) != 0)
+						goto nomem;
+				}
+
+				config_seen = B_TRUE;
+			}
+
+			/*
+			 * Add this top-level vdev to the child array.
+			 */
+			verify(nvlist_lookup_nvlist(tmp,
+			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
+			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+			if (id >= children) {
+				nvlist_t **newchild;
+
+				newchild = zfs_alloc(hdl, (id + 1) *
+				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				free(child);
+				child = newchild;
+				children = id + 1;
+			}
+			if (nvlist_dup(nvtop, &child[id], 0) != 0)
+				goto nomem;
+
+		}
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		/*
+		 * Look for any missing top-level vdevs.  If this is the case,
+		 * create a faked up 'missing' vdev as a placeholder.  We cannot
+		 * simply compress the child array, because the kernel performs
+		 * certain checks to make sure the vdev IDs match their location
+		 * in the configuration.
+		 */
+		for (c = 0; c < children; c++)
+			if (child[c] == NULL) {
+				nvlist_t *missing;
+				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+				if (nvlist_add_string(missing,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_MISSING) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(missing);
+					goto nomem;
+				}
+				child[c] = missing;
+			}
+
+		/*
+		 * Put all of this pool's top-level vdevs into a root vdev.
+		 */
+		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+		    child, children) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		for (c = 0; c < children; c++)
+			nvlist_free(child[c]);
+		free(child);
+		children = 0;
+		child = NULL;
+
+		/*
+		 * Go through and fix up any paths and/or devids based on our
+		 * known list of vdev GUID -> path mappings.
+		 */
+		if (fix_paths(nvroot, pl->names) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		/*
+		 * Add the root vdev to this pool's configuration.
+		 */
+		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    nvroot) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+		nvlist_free(nvroot);
+
+		/*
+		 * Determine if this pool is currently active, in which case we
+		 * can't actually import it.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		if (pool_active(hdl, name, guid, &isactive) != 0)
+			goto error;
+
+		if (isactive) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
+
+		/*
+		 * Try to do the import in order to get vdev state.
+		 */
+		if (zcmd_write_src_nvlist(hdl, &zc, config, &len) != 0)
+			goto error;
+
+		nvlist_free(config);
+		config = NULL;
+
+		if (zcmd_alloc_dst_nvlist(hdl, &zc, len * 2) != 0) {
+			zcmd_free_nvlists(&zc);
+			goto error;
+		}
+
+		while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+		    &zc)) != 0 && errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				goto error;
+			}
+		}
+
+		if (err) {
+			(void) zpool_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot discover pools"));
+			zcmd_free_nvlists(&zc);
+			goto error;
+		}
+
+		if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+			zcmd_free_nvlists(&zc);
+			goto error;
+		}
+
+		zcmd_free_nvlists(&zc);
+
+		/*
+		 * Go through and update the paths for spares, now that we have
+		 * them.
+		 */
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0) {
+			for (i = 0; i < nspares; i++) {
+				if (fix_paths(spares[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Restore the original information read from the actual label.
+		 */
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
+		    DATA_TYPE_UINT64);
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
+		    DATA_TYPE_STRING);
+		if (hostid != 0) {
+			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
+			    hostid) == 0);
+			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+			    hostname) == 0);
+		}
+
+		/*
+		 * Add this pool to the list of configs.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (nvlist_add_nvlist(ret, name, config) != 0)
+			goto nomem;
+
+		nvlist_free(config);
+		config = NULL;
+	}
+
+	return (ret);
+
+nomem:
+	(void) no_memory(hdl);
+error:
+	nvlist_free(config);
+	nvlist_free(ret);
+	for (c = 0; c < children; c++)
+		nvlist_free(child[c]);
+	free(child);
+
+	return (NULL);
+}
+
+/*
+ * Return the offset of the given label.
+ */
+static uint64_t
+label_offset(size_t size, int l)
+{
+	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
+	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
+/*
+ * Given a file descriptor, read the label information and return an nvlist
+ * describing the configuration, if there is one.
+ */
+int
+zpool_read_label(int fd, nvlist_t **config)
+{
+	struct stat64 statbuf;
+	int l;
+	vdev_label_t *label;
+	uint64_t state, txg;
+
+	*config = NULL;
+
+	if (fstat64(fd, &statbuf) == -1)
+		return (0);
+
+	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+		return (-1);
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (pread(fd, label, sizeof (vdev_label_t),
+		    label_offset(statbuf.st_size, l)) != sizeof (vdev_label_t))
+			continue;
+
+		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_SPARE) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		free(label);
+		return (0);
+	}
+
+	free(label);
+	*config = NULL;
+	return (0);
+}
+
+/*
+ * Given a list of directories to search, find all pools stored on disk.  This
+ * includes partial pools which are not available to import.  If no args are
+ * given (argc is 0), then the default directory (/dev) is searched.
+ */
+nvlist_t *
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	int i;
+	char path[MAXPATHLEN];
+	nvlist_t *ret = NULL, *config;
+	int fd;
+	pool_list_t pools = { 0 };
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+	struct gmesh mesh;
+	struct gclass *mp;
+	struct ggeom *gp;
+	struct gprovider *pp;
+
+	/*
+	 * Go through and read the label configuration information from every
+	 * possible device, organizing the information according to pool GUID
+	 * and toplevel GUID.
+	 */
+
+	fd = geom_gettree(&mesh);
+	assert(fd == 0);
+
+	LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
+        	LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
+			LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+
+				(void) snprintf(path, sizeof (path), "%s%s",
+				    _PATH_DEV, pp->lg_name);
+
+				if ((fd = open64(path, O_RDONLY)) < 0)
+					continue;
+
+				if ((zpool_read_label(fd, &config)) != 0) {
+					(void) no_memory(hdl);
+					goto error;
+				}
+
+				(void) close(fd);
+
+				if (config == NULL)
+					continue;
+
+				if (add_config(hdl, &pools, path, config) != 0)
+					goto error;
+			}
+		}
+	}
+
+	geom_deletetree(&mesh);
+
+	ret = get_configs(hdl, &pools);
+
+error:
+	for (pe = pools.pools; pe != NULL; pe = penext) {
+		penext = pe->pe_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+			venext = ve->ve_next;
+			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+				cenext = ce->ce_next;
+				if (ce->ce_config)
+					nvlist_free(ce->ce_config);
+				free(ce);
+			}
+			free(ve);
+		}
+		free(pe);
+	}
+
+	for (ne = pools.names; ne != NULL; ne = nenext) {
+		nenext = ne->ne_next;
+		if (ne->ne_name)
+			free(ne->ne_name);
+		free(ne);
+	}
+
+	return (ret);
+}
+
+boolean_t
+find_guid(nvlist_t *nv, uint64_t guid)
+{
+	uint64_t tmp;
+	nvlist_t **child;
+	uint_t c, children;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
+	if (tmp == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_guid(child[c], guid))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+typedef struct spare_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} spare_cbdata_t;
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+	spare_cbdata_t *cbp = data;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	uint64_t guid;
+	nvlist_t *nvroot;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			verify(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			if (guid == cbp->cb_guid) {
+				cbp->cb_zhp = zhp;
+				return (1);
+			}
+		}
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Determines if the pool is in use.  If so, it returns true and the state of
+ * the pool as well as the name of the pool.  Both strings are allocated and
+ * must be freed by the caller.
+ */
+int
+zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
+    boolean_t *inuse)
+{
+	nvlist_t *config;
+	char *name;
+	boolean_t ret;
+	uint64_t guid, vdev_guid;
+	zpool_handle_t *zhp;
+	nvlist_t *pool_config;
+	uint64_t stateval, isspare;
+	spare_cbdata_t cb = { 0 };
+	boolean_t isactive;
+
+	*inuse = B_FALSE;
+
+	if (zpool_read_label(fd, &config) != 0) {
+		(void) no_memory(hdl);
+		return (-1);
+	}
+
+	if (config == NULL)
+		return (0);
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) == 0);
+
+	if (stateval != POOL_STATE_SPARE) {
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+	}
+
+	switch (stateval) {
+	case POOL_STATE_EXPORTED:
+		ret = B_TRUE;
+		break;
+
+	case POOL_STATE_ACTIVE:
+		/*
+		 * For an active pool, we have to determine if it's really part
+		 * of a currently active pool (in which case the pool will exist
+		 * and the guid will be the same), or whether it's part of an
+		 * active pool that was disconnected without being explicitly
+		 * exported.
+		 */
+		if (pool_active(hdl, name, guid, &isactive) != 0) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if (isactive) {
+			/*
+			 * Because the device may have been removed while
+			 * offlined, we only report it as active if the vdev is
+			 * still present in the config.  Otherwise, pretend like
+			 * it's not in use.
+			 */
+			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
+			    (pool_config = zpool_get_config(zhp, NULL))
+			    != NULL) {
+				nvlist_t *nvroot;
+
+				verify(nvlist_lookup_nvlist(pool_config,
+				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+				ret = find_guid(nvroot, vdev_guid);
+			} else {
+				ret = B_FALSE;
+			}
+
+			/*
+			 * If this is an active spare within another pool, we
+			 * treat it like an unused hot spare.  This allows the
+			 * user to create a pool with a hot spare that currently
+			 * in use within another pool.  Since we return B_TRUE,
+			 * libdiskmgt will continue to prevent generic consumers
+			 * from using the device.
+			 */
+			if (ret && nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
+				stateval = POOL_STATE_SPARE;
+
+			if (zhp != NULL)
+				zpool_close(zhp);
+		} else {
+			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
+			ret = B_TRUE;
+		}
+		break;
+
+	case POOL_STATE_SPARE:
+		/*
+		 * For a hot spare, it can be either definitively in use, or
+		 * potentially active.  To determine if it's in use, we iterate
+		 * over all pools in the system and search for one with a spare
+		 * with a matching guid.
+		 *
+		 * Due to the shared nature of spares, we don't actually report
+		 * the potentially active case as in use.  This means the user
+		 * can freely create pools on the hot spares of exported pools,
+		 * but to do otherwise makes the resulting code complicated, and
+		 * we end up having to deal with this case anyway.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		if (zpool_iter(hdl, find_spare, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	default:
+		ret = B_FALSE;
+	}
+
+
+	if (ret) {
+		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		*state = (pool_state_t)stateval;
+	}
+
+	if (cb.cb_zhp)
+		zpool_close(cb.cb_zhp);
+
+	nvlist_free(config);
+	*inuse = ret;
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@@ -0,0 +1,443 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBZFS_H
+#define	_LIBZFS_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <libnvpair.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Miscellaneous ZFS constants
+ */
+#define	ZFS_MAXNAMELEN		MAXNAMELEN
+#define	ZPOOL_MAXNAMELEN	MAXNAMELEN
+#define	ZFS_MAXPROPLEN		MAXPATHLEN
+
+/*
+ * libzfs errors
+ */
+enum {
+	EZFS_NOMEM = 2000,	/* out of memory */
+	EZFS_BADPROP,		/* invalid property value */
+	EZFS_PROPREADONLY,	/* cannot set readonly property */
+	EZFS_PROPTYPE,		/* property does not apply to dataset type */
+	EZFS_PROPNONINHERIT,	/* property is not inheritable */
+	EZFS_PROPSPACE,		/* bad quota or reservation */
+	EZFS_BADTYPE,		/* dataset is not of appropriate type */
+	EZFS_BUSY,		/* pool or dataset is busy */
+	EZFS_EXISTS,		/* pool or dataset already exists */
+	EZFS_NOENT,		/* no such pool or dataset */
+	EZFS_BADSTREAM,		/* bad backup stream */
+	EZFS_DSREADONLY,	/* dataset is readonly */
+	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
+	EZFS_VOLHASDATA,	/* volume already contains data */
+	EZFS_INVALIDNAME,	/* invalid dataset name */
+	EZFS_BADRESTORE,	/* unable to restore to destination */
+	EZFS_BADBACKUP,		/* backup failed */
+	EZFS_BADTARGET,		/* bad attach/detach/replace target */
+	EZFS_NODEVICE,		/* no such device in pool */
+	EZFS_BADDEV,		/* invalid device to add */
+	EZFS_NOREPLICAS,	/* no valid replicas */
+	EZFS_RESILVERING,	/* currently resilvering */
+	EZFS_BADVERSION,	/* unsupported version */
+	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
+	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
+	EZFS_BADPATH,		/* must be an absolute path */
+	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
+	EZFS_ZONED,		/* used improperly in local zone */
+	EZFS_MOUNTFAILED,	/* failed to mount dataset */
+	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
+	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
+	EZFS_SHARENFSFAILED,	/* share(1M) failed */
+	EZFS_DEVLINKS,		/* failed to create zvol links */
+	EZFS_PERM,		/* permission denied */
+	EZFS_NOSPC,		/* out of space */
+	EZFS_IO,		/* I/O error */
+	EZFS_INTR,		/* signal received */
+	EZFS_ISSPARE,		/* device is a hot spare */
+	EZFS_INVALCONFIG,	/* invalid vdev configuration */
+	EZFS_RECURSIVE,		/* recursive dependency */
+	EZFS_NOHISTORY,		/* no history object */
+	EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
+	EZFS_SHAREISCSIFAILED,	/* iscsitgtd failed request to share */
+	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
+	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
+	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
+	EZFS_NAMETOOLONG,	/* dataset name is too long */
+	EZFS_UNKNOWN
+};
+
+/*
+ * Basic handle types
+ */
+typedef struct zfs_handle zfs_handle_t;
+typedef struct zpool_handle zpool_handle_t;
+typedef struct libzfs_handle libzfs_handle_t;
+
+/*
+ * Library initialization
+ */
+extern libzfs_handle_t *libzfs_init(void);
+extern void libzfs_fini(libzfs_handle_t *);
+
+extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
+extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
+
+extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+
+extern int libzfs_errno(libzfs_handle_t *);
+extern const char *libzfs_error_action(libzfs_handle_t *);
+extern const char *libzfs_error_description(libzfs_handle_t *);
+
+/*
+ * Basic handle functions
+ */
+extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
+extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
+extern void zpool_close(zpool_handle_t *);
+extern const char *zpool_get_name(zpool_handle_t *);
+extern uint64_t zpool_get_guid(zpool_handle_t *);
+extern uint64_t zpool_get_space_used(zpool_handle_t *);
+extern uint64_t zpool_get_space_total(zpool_handle_t *);
+extern int zpool_get_root(zpool_handle_t *, char *, size_t);
+extern int zpool_get_state(zpool_handle_t *);
+extern uint64_t zpool_get_version(zpool_handle_t *);
+
+/*
+ * Iterate over all active pools in the system.
+ */
+typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
+extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
+
+/*
+ * Functions to create and destroy pools
+ */
+extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
+    const char *);
+extern int zpool_destroy(zpool_handle_t *);
+extern int zpool_add(zpool_handle_t *, nvlist_t *);
+
+/*
+ * Functions to manipulate pool and vdev state
+ */
+extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
+
+extern int zpool_vdev_online(zpool_handle_t *, const char *);
+extern int zpool_vdev_offline(zpool_handle_t *, const char *, int);
+extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *,
+    nvlist_t *, int);
+extern int zpool_vdev_detach(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_clear(zpool_handle_t *, const char *);
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
+
+/*
+ * Functions to manage pool properties
+ */
+extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
+extern int zpool_get_prop(zpool_handle_t *, zfs_prop_t, char *,
+	size_t proplen, zfs_source_t *);
+extern const char *zpool_prop_to_name(zpool_prop_t);
+extern const char *zpool_prop_values(zpool_prop_t);
+
+/*
+ * Pool health statistics.
+ */
+typedef enum {
+	/*
+	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
+	 * event namespace.  Each is associated with a corresponding message ID.
+	 */
+	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
+	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
+	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
+	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
+	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
+	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
+	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
+	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
+	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
+
+	/*
+	 * The following are not faults per se, but still an error possibly
+	 * requiring administrative attention.  There is no corresponding
+	 * message ID.
+	 */
+	ZPOOL_STATUS_VERSION_OLDER,	/* older on-disk version */
+	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
+	ZPOOL_STATUS_OFFLINE_DEV,	/* device online */
+
+	/*
+	 * Finally, the following indicates a healthy pool.
+	 */
+	ZPOOL_STATUS_OK
+} zpool_status_t;
+
+extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
+extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+
+/*
+ * Statistics and configuration functions.
+ */
+extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
+extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
+extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
+
+/*
+ * Import and export functions
+ */
+extern int zpool_export(zpool_handle_t *);
+extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
+    const char *);
+
+/*
+ * Search for pools to import
+ */
+extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
+
+/*
+ * Miscellaneous pool functions
+ */
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern int zpool_upgrade(zpool_handle_t *);
+extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
+extern void zpool_log_history(libzfs_handle_t *, int, char **, const char *,
+    boolean_t, boolean_t);
+extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
+    size_t len);
+
+/*
+ * Basic handle manipulations.  These functions do not create or destroy the
+ * underlying datasets, only the references to them.
+ */
+extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
+extern void zfs_close(zfs_handle_t *);
+extern zfs_type_t zfs_get_type(const zfs_handle_t *);
+extern const char *zfs_get_name(const zfs_handle_t *);
+
+/*
+ * Property management functions.  Some functions are shared with the kernel,
+ * and are found in sys/fs/zfs.h.
+ */
+extern const char *zfs_prop_to_name(zfs_prop_t);
+extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
+extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
+    zfs_source_t *, char *, size_t, boolean_t);
+extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
+    zfs_source_t *, char *, size_t);
+extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
+extern const char *zfs_prop_get_string(zfs_handle_t *, zfs_prop_t);
+extern int zfs_prop_inherit(zfs_handle_t *, const char *);
+extern const char *zfs_prop_values(zfs_prop_t);
+extern int zfs_prop_valid_for_type(zfs_prop_t, int);
+extern const char *zfs_prop_default_string(zfs_prop_t prop);
+extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
+extern int zfs_prop_is_string(zfs_prop_t prop);
+extern const char *zfs_prop_column_name(zfs_prop_t);
+extern boolean_t zfs_prop_align_right(zfs_prop_t);
+extern void nicebool(int value, char *buf, size_t buflen);
+
+typedef struct zfs_proplist {
+	zfs_prop_t	pl_prop;
+	char		*pl_user_prop;
+	struct zfs_proplist *pl_next;
+	boolean_t	pl_all;
+	size_t		pl_width;
+	boolean_t	pl_fixed;
+} zfs_proplist_t;
+
+typedef zfs_proplist_t zpool_proplist_t;
+
+extern int zfs_get_proplist(libzfs_handle_t *, char *, zfs_proplist_t **);
+extern int zpool_get_proplist(libzfs_handle_t *, char *, zpool_proplist_t **);
+extern int zfs_expand_proplist(zfs_handle_t *, zfs_proplist_t **);
+extern int zpool_expand_proplist(zpool_handle_t *, zpool_proplist_t **);
+extern void zfs_free_proplist(zfs_proplist_t *);
+extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+
+#define	ZFS_MOUNTPOINT_NONE	"none"
+#define	ZFS_MOUNTPOINT_LEGACY	"legacy"
+
+/*
+ * Functions for printing properties from zfs/zpool
+ */
+typedef struct libzfs_get_cbdata {
+	int cb_sources;
+	int cb_columns[4];
+	int cb_colwidths[5];
+	boolean_t cb_scripted;
+	boolean_t cb_literal;
+	boolean_t cb_first;
+	zfs_proplist_t *cb_proplist;
+} libzfs_get_cbdata_t;
+
+void libzfs_print_one_property(const char *, libzfs_get_cbdata_t *,
+    const char *, const char *, zfs_source_t, const char *);
+
+#define	GET_COL_NAME		1
+#define	GET_COL_PROPERTY	2
+#define	GET_COL_VALUE		3
+#define	GET_COL_SOURCE		4
+
+/*
+ * Iterator functions.
+ */
+typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
+extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
+extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
+
+/*
+ * Functions to create and destroy datasets.
+ */
+extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
+    nvlist_t *);
+extern int zfs_destroy(zfs_handle_t *);
+extern int zfs_destroy_snaps(zfs_handle_t *, char *);
+extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
+extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
+extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
+extern int zfs_rename(zfs_handle_t *, const char *, int);
+extern int zfs_send(zfs_handle_t *, const char *, int);
+extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int,
+    boolean_t, int);
+extern int zfs_promote(zfs_handle_t *);
+
+/*
+ * Miscellaneous functions.
+ */
+extern const char *zfs_type_to_name(zfs_type_t);
+extern void zfs_refresh_properties(zfs_handle_t *);
+extern int zfs_name_valid(const char *, zfs_type_t);
+extern int zfs_disable(zfs_handle_t *);
+extern int zfs_enable(zfs_handle_t *);
+extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+
+/*
+ * Mount support functions.
+ */
+extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
+extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
+extern int zfs_mount(zfs_handle_t *, const char *, int);
+extern int zfs_unmount(zfs_handle_t *, const char *, int);
+extern int zfs_unmountall(zfs_handle_t *, int);
+
+/*
+ * Share support functions.
+ */
+extern boolean_t zfs_is_shared(zfs_handle_t *);
+extern int zfs_share(zfs_handle_t *);
+extern int zfs_unshare(zfs_handle_t *);
+
+/*
+ * Protocol-specifc share support functions.
+ */
+extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
+extern int zfs_share_nfs(zfs_handle_t *);
+extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
+extern int zfs_unshareall_nfs(zfs_handle_t *);
+extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
+extern int zfs_share_iscsi(zfs_handle_t *);
+extern int zfs_unshare_iscsi(zfs_handle_t *);
+
+/*
+ * FreeBSD-specific jail support function.
+ */
+extern int zfs_jail(zfs_handle_t *, int, int);
+
+/*
+ * When dealing with nvlists, verify() is extremely useful
+ */
+#ifndef verify
+#ifdef NDEBUG
+#define	verify(EX)	((void)(EX))
+#else
+#define	verify(EX)	assert(EX)
+#endif
+#endif
+
+/*
+ * Utility function to convert a number to a human-readable form.
+ */
+extern void zfs_nicenum(uint64_t, char *, size_t);
+extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
+
+/*
+ * Pool destroy special.  Remove the device information without destroying
+ * the underlying dataset.
+ */
+extern int zfs_remove_link(zfs_handle_t *);
+
+/*
+ * Given a device or file, determine if it is part of a pool.
+ */
+extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
+    boolean_t *);
+
+/*
+ * ftyp special.  Read the label from a given device.
+ */
+extern int zpool_read_label(int, nvlist_t **);
+
+/*
+ * Create and remove zvol /dev links.
+ */
+extern int zpool_create_zvol_links(zpool_handle_t *);
+extern int zpool_remove_zvol_links(zpool_handle_t *);
+
+/*
+ * Enable and disable datasets within a pool by mounting/unmounting and
+ * sharing/unsharing them.
+ */
+extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
+extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
+
+#ifdef	__FreeBSD__
+extern int zmount(const char *, const char *, int, char *, char *, int, char *,
+    int);
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBZFS_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
@@ -0,0 +1,646 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Iterate over all children of the current object.  This includes the normal
+ * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
+ * walk all datasets in the pool, and construct a directed graph of the form:
+ *
+ * 			home
+ *                        |
+ *                   +----+----+
+ *                   |         |
+ *                   v         v             ws
+ *                  bar       baz             |
+ *                             |              |
+ *                             v              v
+ *                          @yesterday ----> foo
+ *
+ * In order to construct this graph, we have to walk every dataset in the pool,
+ * because the clone parent is stored as a property of the child, not the
+ * parent.  The parent only keeps track of the number of clones.
+ *
+ * In the normal case (without clones) this would be rather expensive.  To avoid
+ * unnecessary computation, we first try a walk of the subtree hierarchy
+ * starting from the initial node.  At each dataset, we construct a node in the
+ * graph and an edge leading from its parent.  If we don't see any snapshots
+ * with a non-zero clone count, then we are finished.
+ *
+ * If we do find a cloned snapshot, then we finish the walk of the current
+ * subtree, but indicate that we need to do a complete walk.  We then perform a
+ * global walk of all datasets, avoiding the subtree we already processed.
+ *
+ * At the end of this, we'll end up with a directed graph of all relevant (and
+ * possible some irrelevant) datasets in the system.  We need to both find our
+ * limiting subgraph and determine a safe ordering in which to destroy the
+ * datasets.  We do a topological ordering of our graph starting at our target
+ * dataset, and then walk the results in reverse.
+ *
+ * It's possible for the graph to have cycles if, for example, the user renames
+ * a clone to be the parent of its origin snapshot.  The user can request to
+ * generate an error in this case, or ignore the cycle and continue.
+ *
+ * When removing datasets, we want to destroy the snapshots in chronological
+ * order (because this is the most efficient method).  In order to accomplish
+ * this, we store the creation transaction group with each vertex and keep each
+ * vertex's edges sorted according to this value.  The topological sort will
+ * automatically walk the snapshots in the correct order.
+ */
+
+#include <assert.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include "zfs_namecheck.h"
+
+#define	MIN_EDGECOUNT	4
+
+/*
+ * Vertex structure.  Indexed by dataset name, this structure maintains a list
+ * of edges to other vertices.
+ */
+struct zfs_edge;
+typedef struct zfs_vertex {
+	char			zv_dataset[ZFS_MAXNAMELEN];
+	struct zfs_vertex	*zv_next;
+	int			zv_visited;
+	uint64_t		zv_txg;
+	struct zfs_edge		**zv_edges;
+	int			zv_edgecount;
+	int			zv_edgealloc;
+} zfs_vertex_t;
+
+enum {
+	VISIT_SEEN = 1,
+	VISIT_SORT_PRE,
+	VISIT_SORT_POST
+};
+
+/*
+ * Edge structure.  Simply maintains a pointer to the destination vertex.  There
+ * is no need to store the source vertex, since we only use edges in the context
+ * of the source vertex.
+ */
+typedef struct zfs_edge {
+	zfs_vertex_t		*ze_dest;
+	struct zfs_edge		*ze_next;
+} zfs_edge_t;
+
+#define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
+
+/*
+ * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
+ */
+typedef struct zfs_graph {
+	zfs_vertex_t		**zg_hash;
+	size_t			zg_size;
+	size_t			zg_nvertex;
+} zfs_graph_t;
+
+/*
+ * Allocate a new edge pointing to the target vertex.
+ */
+static zfs_edge_t *
+zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
+
+	if (zep == NULL)
+		return (NULL);
+
+	zep->ze_dest = dest;
+
+	return (zep);
+}
+
+/*
+ * Destroy an edge.
+ */
+static void
+zfs_edge_destroy(zfs_edge_t *zep)
+{
+	free(zep);
+}
+
+/*
+ * Allocate a new vertex with the given name.
+ */
+static zfs_vertex_t *
+zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
+
+	if (zvp == NULL)
+		return (NULL);
+
+	assert(strlen(dataset) < ZFS_MAXNAMELEN);
+
+	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
+
+	if ((zvp->zv_edges = zfs_alloc(hdl,
+	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
+		free(zvp);
+		return (NULL);
+	}
+
+	zvp->zv_edgealloc = MIN_EDGECOUNT;
+
+	return (zvp);
+}
+
+/*
+ * Destroy a vertex.  Frees up any associated edges.
+ */
+static void
+zfs_vertex_destroy(zfs_vertex_t *zvp)
+{
+	int i;
+
+	for (i = 0; i < zvp->zv_edgecount; i++)
+		zfs_edge_destroy(zvp->zv_edges[i]);
+
+	free(zvp->zv_edges);
+	free(zvp);
+}
+
+/*
+ * Given a vertex, add an edge to the destination vertex.
+ */
+static int
+zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
+    zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
+
+	if (zep == NULL)
+		return (-1);
+
+	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
+		    zvp->zv_edgealloc * sizeof (void *),
+		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+
+		zvp->zv_edges = ptr;
+		zvp->zv_edgealloc *= 2;
+	}
+
+	zvp->zv_edges[zvp->zv_edgecount++] = zep;
+
+	return (0);
+}
+
+static int
+zfs_edge_compare(const void *a, const void *b)
+{
+	const zfs_edge_t *ea = *((zfs_edge_t **)a);
+	const zfs_edge_t *eb = *((zfs_edge_t **)b);
+
+	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
+		return (-1);
+	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
+		return (1);
+	return (0);
+}
+
+/*
+ * Sort the given vertex edges according to the creation txg of each vertex.
+ */
+static void
+zfs_vertex_sort_edges(zfs_vertex_t *zvp)
+{
+	if (zvp->zv_edgecount == 0)
+		return;
+
+	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
+	    zfs_edge_compare);
+}
+
+/*
+ * Construct a new graph object.  We allow the size to be specified as a
+ * parameter so in the future we can size the hash according to the number of
+ * datasets in the pool.
+ */
+static zfs_graph_t *
+zfs_graph_create(libzfs_handle_t *hdl, size_t size)
+{
+	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
+
+	if (zgp == NULL)
+		return (NULL);
+
+	zgp->zg_size = size;
+	if ((zgp->zg_hash = zfs_alloc(hdl,
+	    size * sizeof (zfs_vertex_t *))) == NULL) {
+		free(zgp);
+		return (NULL);
+	}
+
+	return (zgp);
+}
+
+/*
+ * Destroy a graph object.  We have to iterate over all the hash chains,
+ * destroying each vertex in the process.
+ */
+static void
+zfs_graph_destroy(zfs_graph_t *zgp)
+{
+	int i;
+	zfs_vertex_t *current, *next;
+
+	for (i = 0; i < zgp->zg_size; i++) {
+		current = zgp->zg_hash[i];
+		while (current != NULL) {
+			next = current->zv_next;
+			zfs_vertex_destroy(current);
+			current = next;
+		}
+	}
+
+	free(zgp->zg_hash);
+	free(zgp);
+}
+
+/*
+ * Graph hash function.  Classic bernstein k=33 hash function, taken from
+ * usr/src/cmd/sgs/tools/common/strhash.c
+ */
+static size_t
+zfs_graph_hash(zfs_graph_t *zgp, const char *str)
+{
+	size_t hash = 5381;
+	int c;
+
+	while ((c = *str++) != 0)
+		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+
+	return (hash % zgp->zg_size);
+}
+
+/*
+ * Given a dataset name, finds the associated vertex, creating it if necessary.
+ */
+static zfs_vertex_t *
+zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
+    uint64_t txg)
+{
+	size_t idx = zfs_graph_hash(zgp, dataset);
+	zfs_vertex_t *zvp;
+
+	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
+		if (strcmp(zvp->zv_dataset, dataset) == 0) {
+			if (zvp->zv_txg == 0)
+				zvp->zv_txg = txg;
+			return (zvp);
+		}
+	}
+
+	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
+		return (NULL);
+
+	zvp->zv_next = zgp->zg_hash[idx];
+	zvp->zv_txg = txg;
+	zgp->zg_hash[idx] = zvp;
+	zgp->zg_nvertex++;
+
+	return (zvp);
+}
+
+/*
+ * Given two dataset names, create an edge between them.  For the source vertex,
+ * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
+ * created it as a destination of another edge.  If 'dest' is NULL, then this
+ * is an individual vertex (i.e. the starting vertex), so don't add an edge.
+ */
+static int
+zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
+    const char *dest, uint64_t txg)
+{
+	zfs_vertex_t *svp, *dvp;
+
+	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
+		return (-1);
+	svp->zv_visited = VISIT_SEEN;
+	if (dest != NULL) {
+		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
+		if (dvp == NULL)
+			return (-1);
+		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over all children of the given dataset, adding any vertices as
+ * necessary.  Returns 0 if no cloned snapshots were seen, -1 if there was an
+ * error, or 1 otherwise.  This is a simple recursive algorithm - the ZFS
+ * namespace typically is very flat.  We manually invoke the necessary ioctl()
+ * calls to avoid the overhead and additional semantics of zfs_open().
+ */
+static int
+iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = 0, err;
+	zfs_vertex_t *zvp;
+
+	/*
+	 * Look up the source vertex, and avoid it if we've seen it before.
+	 */
+	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
+	if (zvp == NULL)
+		return (-1);
+	if (zvp->zv_visited == VISIT_SEEN)
+		return (0);
+
+	/*
+	 * We check the clone parent here instead of within the loop, so that if
+	 * the root dataset has been promoted from a clone, we find its parent
+	 * appropriately.
+	 */
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
+	    zc.zc_objset_stats.dds_clone_of[0] != '\0') {
+		if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
+		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+	}
+
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		/*
+		 * Iterate over all children
+		 */
+		err = iterate_children(hdl, zgp, zc.zc_name);
+		if (err == -1)
+			return (-1);
+		else if (err == 1)
+			ret = 1;
+
+		/*
+		 * Indicate if we found a dataset with a non-zero clone count.
+		 */
+		if (zc.zc_objset_stats.dds_num_clones != 0)
+			ret = 1;
+	}
+
+	/*
+	 * Now iterate over all snapshots.
+	 */
+	bzero(&zc, sizeof (zc));
+
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		/*
+		 * Indicate if we found a dataset with a non-zero clone count.
+		 */
+		if (zc.zc_objset_stats.dds_num_clones != 0)
+			ret = 1;
+	}
+
+	zvp->zv_visited = VISIT_SEEN;
+
+	return (ret);
+}
+
+/*
+ * Construct a complete graph of all necessary vertices.  First, we iterate over
+ * only our object's children.  If we don't find any cloned snapshots, then we
+ * simple return that.  Otherwise, we have to start at the pool root and iterate
+ * over all datasets.
+ */
+static zfs_graph_t *
+construct_graph(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
+	zfs_cmd_t zc = { 0 };
+	int ret = 0;
+
+	if (zgp == NULL)
+		return (zgp);
+
+	/*
+	 * We need to explicitly check whether this dataset has clones or not,
+	 * since iterate_children() only checks the children.
+	 */
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
+
+	if (zc.zc_objset_stats.dds_num_clones != 0 ||
+	    (ret = iterate_children(hdl, zgp, dataset)) != 0) {
+		/*
+		 * Determine pool name and try again.
+		 */
+		char *pool, *slash;
+
+		if ((slash = strchr(dataset, '/')) != NULL ||
+		    (slash = strchr(dataset, '@')) != NULL) {
+			pool = zfs_alloc(hdl, slash - dataset + 1);
+			if (pool == NULL) {
+				zfs_graph_destroy(zgp);
+				return (NULL);
+			}
+			(void) strncpy(pool, dataset, slash - dataset);
+			pool[slash - dataset] = '\0';
+
+			if (iterate_children(hdl, zgp, pool) == -1 ||
+			    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+				free(pool);
+				zfs_graph_destroy(zgp);
+				return (NULL);
+			}
+
+			free(pool);
+		}
+	}
+
+	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
+
+	return (zgp);
+}
+
+/*
+ * Given a graph, do a recursive topological sort into the given array.  This is
+ * really just a depth first search, so that the deepest nodes appear first.
+ * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
+ */
+static int
+topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
+    size_t *idx, zfs_vertex_t *zgv)
+{
+	int i;
+
+	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
+		/*
+		 * If we've already seen this vertex as part of our depth-first
+		 * search, then we have a cyclic dependency, and we must return
+		 * an error.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "recursive dependency at '%s'"),
+		    zgv->zv_dataset);
+		return (zfs_error(hdl, EZFS_RECURSIVE,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot determine dependent datasets")));
+	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
+		/*
+		 * If we've already processed this as part of the topological
+		 * sort, then don't bother doing so again.
+		 */
+		return (0);
+	}
+
+	zgv->zv_visited = VISIT_SORT_PRE;
+
+	/* avoid doing a search if we don't have to */
+	zfs_vertex_sort_edges(zgv);
+	for (i = 0; i < zgv->zv_edgecount; i++) {
+		if (topo_sort(hdl, allowrecursion, result, idx,
+		    zgv->zv_edges[i]->ze_dest) != 0)
+			return (-1);
+	}
+
+	/* we may have visited this in the course of the above */
+	if (zgv->zv_visited == VISIT_SORT_POST)
+		return (0);
+
+	if ((result[*idx] = zfs_alloc(hdl,
+	    strlen(zgv->zv_dataset) + 1)) == NULL)
+		return (-1);
+
+	(void) strcpy(result[*idx], zgv->zv_dataset);
+	*idx += 1;
+	zgv->zv_visited = VISIT_SORT_POST;
+	return (0);
+}
+
+/*
+ * The only public interface for this file.  Do the dirty work of constructing a
+ * child list for the given object.  Construct the graph, do the toplogical
+ * sort, and then return the array of strings to the caller.
+ *
+ * The 'allowrecursion' parameter controls behavior when cycles are found.  If
+ * it is set, the the cycle is ignored and the results returned as if the cycle
+ * did not exist.  If it is not set, then the routine will generate an error if
+ * a cycle is found.
+ */
+int
+get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
+    const char *dataset, char ***result, size_t *count)
+{
+	zfs_graph_t *zgp;
+	zfs_vertex_t *zvp;
+
+	if ((zgp = construct_graph(hdl, dataset)) == NULL)
+		return (-1);
+
+	if ((*result = zfs_alloc(hdl,
+	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	*count = 0;
+	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	/*
+	 * Get rid of the last entry, which is our starting vertex and not
+	 * strictly a dependent.
+	 */
+	assert(*count > 0);
+	free((*result)[*count - 1]);
+	(*count)--;
+
+	zfs_graph_destroy(zgp);
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzpool/common/util.c
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <sys/zfs_context.h>
+#include <sys/avl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/spa.h>
+#include <sys/fs/zfs.h>
+#include <sys/refcount.h>
+
+/*
+ * Routines needed by more than one client of libzpool.
+ */
+
+void
+nicenum(uint64_t num, char *buf)
+{
+	uint64_t n = num;
+	int index = 0;
+	char u;
+
+	while (n >= 1024) {
+		n = (n + (1024 / 2)) / 1024; /* Round up or down */
+		index++;
+	}
+
+	u = " KMGTPE"[index];
+
+	if (index == 0) {
+		(void) sprintf(buf, "%llu", (u_longlong_t)n);
+	} else if (n < 10 && (num & (num - 1)) != 0) {
+		(void) sprintf(buf, "%.2f%c",
+		    (double)num / (1ULL << 10 * index), u);
+	} else if (n < 100 && (num & (num - 1)) != 0) {
+		(void) sprintf(buf, "%.1f%c",
+		    (double)num / (1ULL << 10 * index), u);
+	} else {
+		(void) sprintf(buf, "%llu%c", (u_longlong_t)n, u);
+	}
+}
+
+static void
+show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	vdev_stat_t *vs;
+	uint64_t sec;
+	char used[6], avail[6];
+	char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
+
+	if (indent == 0) {
+		(void) printf("                     "
+		    " capacity   operations   bandwidth  ---- errors ----\n");
+		(void) printf("description          "
+		    "used avail  read write  read write  read write cksum\n");
+	}
+
+	VERIFY(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
+	sec = MAX(1, vs->vs_timestamp / NANOSEC);
+
+	nicenum(vs->vs_alloc, used);
+	nicenum(vs->vs_space - vs->vs_alloc, avail);
+	nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
+	nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
+	nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
+	nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
+	nicenum(vs->vs_read_errors, rerr);
+	nicenum(vs->vs_write_errors, werr);
+	nicenum(vs->vs_checksum_errors, cerr);
+
+	(void) printf("%*s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
+	    indent, "",
+	    indent - 19 - (vs->vs_space ? 0 : 12), desc,
+	    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
+	    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
+	    rops, wops, rbytes, wbytes, rerr, werr, cerr);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return;
+
+	for (c = 0; c < children; c++) {
+		nvlist_t *cnv = child[c];
+		char *cname, *tname;
+		uint64_t np;
+		if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
+		    nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
+			cname = "<unknown>";
+		tname = calloc(1, strlen(cname) + 2);
+		(void) strcpy(tname, cname);
+		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
+			tname[strlen(tname)] = '0' + np;
+		show_vdev_stats(tname, cnv, indent + 2);
+		free(tname);
+	}
+}
+
+void
+show_pool_stats(spa_t *spa)
+{
+	nvlist_t *config, *nvroot;
+	char *name;
+
+	spa_config_enter(spa, RW_READER, FTAG);
+	config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
+	spa_config_exit(spa, FTAG);
+
+	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &name) == 0);
+
+	show_vdev_stats(name, nvroot, 0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
@@ -0,0 +1,852 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <sys/spa.h>
+#include <sys/stat.h>
+#include <sys/processor.h>
+#include <sys/zfs_context.h>
+#include <sys/zmod.h>
+#include <sys/utsname.h>
+
+/*
+ * Emulation of kernel services in userland.
+ */
+
+int hz = 119;	/* frequency when using gethrtime() >> 23 for lbolt */
+uint64_t physmem;
+vnode_t *rootdir = (vnode_t *)0xabcd1234;
+char hw_serial[11];
+
+struct utsname utsname = {
+	"userland", "libzpool", "1", "1", "na"
+};
+
+/*
+ * =========================================================================
+ * threads
+ * =========================================================================
+ */
+/*ARGSUSED*/
+kthread_t *
+zk_thread_create(void (*func)(), void *arg)
+{
+	thread_t tid;
+
+	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
+	    &tid) == 0);
+
+	return ((void *)(uintptr_t)tid);
+}
+
+/*
+ * =========================================================================
+ * kstats
+ * =========================================================================
+ */
+/*ARGSUSED*/
+kstat_t *
+kstat_create(char *module, int instance, char *name, char *class,
+    uchar_t type, ulong_t ndata, uchar_t ks_flag)
+{
+	return (NULL);
+}
+
+/*ARGSUSED*/
+void
+kstat_install(kstat_t *ksp)
+{}
+
+/*ARGSUSED*/
+void
+kstat_delete(kstat_t *ksp)
+{}
+
+/*
+ * =========================================================================
+ * mutexes
+ * =========================================================================
+ */
+void
+zmutex_init(kmutex_t *mp)
+{
+	mp->m_owner = NULL;
+	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
+}
+
+void
+zmutex_destroy(kmutex_t *mp)
+{
+	ASSERT(mp->m_owner == NULL);
+	(void) _mutex_destroy(&(mp)->m_lock);
+	mp->m_owner = (void *)-1UL;
+}
+
+void
+mutex_enter(kmutex_t *mp)
+{
+	ASSERT(mp->m_owner != (void *)-1UL);
+	ASSERT(mp->m_owner != curthread);
+	VERIFY(mutex_lock(&mp->m_lock) == 0);
+	ASSERT(mp->m_owner == NULL);
+	mp->m_owner = curthread;
+}
+
+int
+mutex_tryenter(kmutex_t *mp)
+{
+	ASSERT(mp->m_owner != (void *)-1UL);
+	if (mutex_trylock(&mp->m_lock) == 0) {
+		ASSERT(mp->m_owner == NULL);
+		mp->m_owner = curthread;
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+void
+mutex_exit(kmutex_t *mp)
+{
+	ASSERT(mp->m_owner == curthread);
+	mp->m_owner = NULL;
+	VERIFY(mutex_unlock(&mp->m_lock) == 0);
+}
+
+void *
+mutex_owner(kmutex_t *mp)
+{
+	return (mp->m_owner);
+}
+
+/*
+ * =========================================================================
+ * rwlocks
+ * =========================================================================
+ */
+/*ARGSUSED*/
+void
+rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
+{
+	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
+	rwlp->rw_owner = NULL;
+	rwlp->rw_count = 0;
+}
+
+void
+rw_destroy(krwlock_t *rwlp)
+{
+	rwlock_destroy(&rwlp->rw_lock);
+	rwlp->rw_owner = (void *)-1UL;
+	rwlp->rw_count = -2;
+}
+
+void
+rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+	//ASSERT(!RW_LOCK_HELD(rwlp));
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+	ASSERT(rwlp->rw_owner != curthread);
+
+	if (rw == RW_READER) {
+		(void) rw_rdlock(&rwlp->rw_lock);
+		ASSERT(rwlp->rw_count >= 0);
+		atomic_add_int(&rwlp->rw_count, 1);
+	} else {
+		(void) rw_wrlock(&rwlp->rw_lock);
+		ASSERT(rwlp->rw_count == 0);
+		rwlp->rw_count = -1;
+		rwlp->rw_owner = curthread;
+	}
+}
+
+void
+rw_exit(krwlock_t *rwlp)
+{
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+
+	if (rwlp->rw_owner == curthread) {
+		/* Write locked. */
+		ASSERT(rwlp->rw_count == -1);
+		rwlp->rw_count = 0;
+		rwlp->rw_owner = NULL;
+	} else {
+		/* Read locked. */
+		ASSERT(rwlp->rw_count > 0);
+		atomic_add_int(&rwlp->rw_count, -1);
+	}
+	(void) rw_unlock(&rwlp->rw_lock);
+}
+
+int
+rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	int rv;
+
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+	ASSERT(rwlp->rw_owner != curthread);
+
+	if (rw == RW_READER)
+		rv = rw_tryrdlock(&rwlp->rw_lock);
+	else
+		rv = rw_trywrlock(&rwlp->rw_lock);
+
+	if (rv == 0) {
+		ASSERT(rwlp->rw_owner == NULL);
+		if (rw == RW_READER) {
+			ASSERT(rwlp->rw_count >= 0);
+			atomic_add_int(&rwlp->rw_count, 1);
+		} else {
+			ASSERT(rwlp->rw_count == 0);
+			rwlp->rw_count = -1;
+			rwlp->rw_owner = curthread;
+		}
+		return (1);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+rw_tryupgrade(krwlock_t *rwlp)
+{
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+
+	return (0);
+}
+
+int
+rw_lock_held(krwlock_t *rwlp)
+{
+
+	return (rwlp->rw_count != 0);
+}
+
+/*
+ * =========================================================================
+ * condition variables
+ * =========================================================================
+ */
+/*ARGSUSED*/
+void
+cv_init(kcondvar_t *cv, char *name, int type, void *arg)
+{
+	VERIFY(cond_init(cv, name, NULL) == 0);
+}
+
+void
+cv_destroy(kcondvar_t *cv)
+{
+	VERIFY(cond_destroy(cv) == 0);
+}
+
+void
+cv_wait(kcondvar_t *cv, kmutex_t *mp)
+{
+	ASSERT(mutex_owner(mp) == curthread);
+	mp->m_owner = NULL;
+	int ret = cond_wait(cv, &mp->m_lock);
+	VERIFY(ret == 0 || ret == EINTR);
+	mp->m_owner = curthread;
+}
+
+clock_t
+cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
+{
+	int error;
+	struct timespec ts;
+	struct timeval tv;
+	clock_t delta;
+
+	ASSERT(abstime > 0);
+top:
+	delta = abstime;
+	if (delta <= 0)
+		return (-1);
+
+	if (gettimeofday(&tv, NULL) != 0)
+		assert(!"gettimeofday() failed");
+
+	ts.tv_sec = tv.tv_sec + delta / hz;
+	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
+	ASSERT(ts.tv_nsec >= 0);
+
+	if(ts.tv_nsec >= NANOSEC) {
+		ts.tv_sec++;
+		ts.tv_nsec -= NANOSEC;
+	}
+
+	ASSERT(mutex_owner(mp) == curthread);
+	mp->m_owner = NULL;
+	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
+	mp->m_owner = curthread;
+
+	if (error == EINTR)
+		goto top;
+
+	if (error == ETIMEDOUT)
+		return (-1);
+
+	ASSERT(error == 0);
+
+	return (1);
+}
+
+void
+cv_signal(kcondvar_t *cv)
+{
+	VERIFY(cond_signal(cv) == 0);
+}
+
+void
+cv_broadcast(kcondvar_t *cv)
+{
+	VERIFY(cond_broadcast(cv) == 0);
+}
+
+/*
+ * =========================================================================
+ * vnode operations
+ * =========================================================================
+ */
+/*
+ * Note: for the xxxat() versions of these functions, we assume that the
+ * starting vp is always rootdir (which is true for spa_directory.c, the only
+ * ZFS consumer of these interfaces).  We assert this is true, and then emulate
+ * them by adding '/' in front of the path.
+ */
+
+/*ARGSUSED*/
+int
+vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
+{
+	int fd;
+	vnode_t *vp;
+	int old_umask;
+	char realpath[MAXPATHLEN];
+	struct stat64 st;
+
+	/*
+	 * If we're accessing a real disk from userland, we need to use
+	 * the character interface to avoid caching.  This is particularly
+	 * important if we're trying to look at a real in-kernel storage
+	 * pool from userland, e.g. via zdb, because otherwise we won't
+	 * see the changes occurring under the segmap cache.
+	 * On the other hand, the stupid character device returns zero
+	 * for its size.  So -- gag -- we open the block device to get
+	 * its size, and remember it for subsequent VOP_GETATTR().
+	 */
+	if (strncmp(path, "/dev/", 5) == 0) {
+		char *dsk;
+		fd = open64(path, O_RDONLY);
+		if (fd == -1)
+			return (errno);
+		if (fstat64(fd, &st) == -1) {
+			close(fd);
+			return (errno);
+		}
+		close(fd);
+		(void) sprintf(realpath, "%s", path);
+		dsk = strstr(path, "/dsk/");
+		if (dsk != NULL)
+			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
+			    dsk + 1);
+	} else {
+		(void) sprintf(realpath, "%s", path);
+		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
+			return (errno);
+	}
+
+	if (flags & FCREAT)
+		old_umask = umask(0);
+
+	/*
+	 * The construct 'flags - FREAD' conveniently maps combinations of
+	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
+	 */
+	fd = open64(realpath, flags - FREAD, mode);
+
+	if (flags & FCREAT)
+		(void) umask(old_umask);
+
+	if (fd == -1)
+		return (errno);
+
+	if (fstat64(fd, &st) == -1) {
+		close(fd);
+		return (errno);
+	}
+
+	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
+
+	vp->v_fd = fd;
+	if (S_ISCHR(st.st_mode))
+		ioctl(fd, DIOCGMEDIASIZE, &vp->v_size);
+	else
+		vp->v_size = st.st_size;
+	vp->v_path = spa_strdup(path);
+
+	return (0);
+}
+
+int
+vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
+    int x3, vnode_t *startvp)
+{
+	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
+	int ret;
+
+	ASSERT(startvp == rootdir);
+	(void) sprintf(realpath, "/%s", path);
+
+	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
+
+	umem_free(realpath, strlen(path) + 2);
+
+	return (ret);
+}
+
+/*ARGSUSED*/
+int
+vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
+	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
+{
+	ssize_t iolen, split;
+
+	if (uio == UIO_READ) {
+		iolen = pread64(vp->v_fd, addr, len, offset);
+	} else {
+		/*
+		 * To simulate partial disk writes, we split writes into two
+		 * system calls so that the process can be killed in between.
+		 */
+		split = (len > 0 ? rand() % len : 0);
+		iolen = pwrite64(vp->v_fd, addr, split, offset);
+		iolen += pwrite64(vp->v_fd, (char *)addr + split,
+		    len - split, offset + split);
+	}
+
+	if (iolen == -1)
+		return (errno);
+	if (residp)
+		*residp = len - iolen;
+	else if (iolen != len)
+		return (EIO);
+	return (0);
+}
+
+void
+vn_close(vnode_t *vp)
+{
+	close(vp->v_fd);
+	spa_strfree(vp->v_path);
+	umem_free(vp, sizeof (vnode_t));
+}
+
+#ifdef ZFS_DEBUG
+
+/*
+ * =========================================================================
+ * Figure out which debugging statements to print
+ * =========================================================================
+ */
+
+static char *dprintf_string;
+static int dprintf_print_all;
+
+int
+dprintf_find_string(const char *string)
+{
+	char *tmp_str = dprintf_string;
+	int len = strlen(string);
+
+	/*
+	 * Find out if this is a string we want to print.
+	 * String format: file1.c,function_name1,file2.c,file3.c
+	 */
+
+	while (tmp_str != NULL) {
+		if (strncmp(tmp_str, string, len) == 0 &&
+		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
+			return (1);
+		tmp_str = strchr(tmp_str, ',');
+		if (tmp_str != NULL)
+			tmp_str++; /* Get rid of , */
+	}
+	return (0);
+}
+
+void
+dprintf_setup(int *argc, char **argv)
+{
+	int i, j;
+
+	/*
+	 * Debugging can be specified two ways: by setting the
+	 * environment variable ZFS_DEBUG, or by including a
+	 * "debug=..."  argument on the command line.  The command
+	 * line setting overrides the environment variable.
+	 */
+
+	for (i = 1; i < *argc; i++) {
+		int len = strlen("debug=");
+		/* First look for a command line argument */
+		if (strncmp("debug=", argv[i], len) == 0) {
+			dprintf_string = argv[i] + len;
+			/* Remove from args */
+			for (j = i; j < *argc; j++)
+				argv[j] = argv[j+1];
+			argv[j] = NULL;
+			(*argc)--;
+		}
+	}
+
+	if (dprintf_string == NULL) {
+		/* Look for ZFS_DEBUG environment variable */
+		dprintf_string = getenv("ZFS_DEBUG");
+	}
+
+	/*
+	 * Are we just turning on all debugging?
+	 */
+	if (dprintf_find_string("on"))
+		dprintf_print_all = 1;
+}
+
+/*
+ * =========================================================================
+ * debug printfs
+ * =========================================================================
+ */
+void
+__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	const char *newfile;
+	va_list adx;
+
+	/*
+	 * Get rid of annoying "../common/" prefix to filename.
+	 */
+	newfile = strrchr(file, '/');
+	if (newfile != NULL) {
+		newfile = newfile + 1; /* Get rid of leading / */
+	} else {
+		newfile = file;
+	}
+
+	if (dprintf_print_all ||
+	    dprintf_find_string(newfile) ||
+	    dprintf_find_string(func)) {
+		/* Print out just the function name if requested */
+		flockfile(stdout);
+		if (dprintf_find_string("pid"))
+			(void) printf("%d ", getpid());
+		if (dprintf_find_string("tid"))
+			(void) printf("%u ", thr_self());
+#if 0
+		if (dprintf_find_string("cpu"))
+			(void) printf("%u ", getcpuid());
+#endif
+		if (dprintf_find_string("time"))
+			(void) printf("%llu ", gethrtime());
+		if (dprintf_find_string("long"))
+			(void) printf("%s, line %d: ", newfile, line);
+		(void) printf("%s: ", func);
+		va_start(adx, fmt);
+		(void) vprintf(fmt, adx);
+		va_end(adx);
+		funlockfile(stdout);
+	}
+}
+
+#endif /* ZFS_DEBUG */
+
+/*
+ * =========================================================================
+ * cmn_err() and panic()
+ * =========================================================================
+ */
+static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
+static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
+
+void
+vpanic(const char *fmt, va_list adx)
+{
+	(void) fprintf(stderr, "error: ");
+	(void) vfprintf(stderr, fmt, adx);
+	(void) fprintf(stderr, "\n");
+
+	abort();	/* think of it as a "user-level crash dump" */
+}
+
+void
+panic(const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vpanic(fmt, adx);
+	va_end(adx);
+}
+
+void
+vcmn_err(int ce, const char *fmt, va_list adx)
+{
+	if (ce == CE_PANIC)
+		vpanic(fmt, adx);
+	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
+		(void) fprintf(stderr, "%s", ce_prefix[ce]);
+		(void) vfprintf(stderr, fmt, adx);
+		(void) fprintf(stderr, "%s", ce_suffix[ce]);
+	}
+}
+
+/*PRINTFLIKE2*/
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vcmn_err(ce, fmt, adx);
+	va_end(adx);
+}
+
+/*
+ * =========================================================================
+ * kobj interfaces
+ * =========================================================================
+ */
+struct _buf *
+kobj_open_file(char *name)
+{
+	struct _buf *file;
+	vnode_t *vp;
+
+	/* set vp as the _fd field of the file */
+	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0)
+		return ((void *)-1UL);
+
+	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
+	file->_fd = (intptr_t)vp;
+	return (file);
+}
+
+int
+kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
+{
+	ssize_t resid;
+
+	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
+	    UIO_SYSSPACE, 0, 0, 0, &resid);
+
+	return (size - resid);
+}
+
+void
+kobj_close_file(struct _buf *file)
+{
+	vn_close((vnode_t *)file->_fd);
+	umem_free(file, sizeof (struct _buf));
+}
+
+int
+kobj_get_filesize(struct _buf *file, uint64_t *size)
+{
+	struct stat64 st;
+	vnode_t *vp = (vnode_t *)file->_fd;
+
+	if (fstat64(vp->v_fd, &st) == -1) {
+		vn_close(vp);
+		return (errno);
+	}
+	*size = st.st_size;
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * misc routines
+ * =========================================================================
+ */
+
+void
+delay(clock_t ticks)
+{
+	poll(0, 0, ticks * (1000 / hz));
+}
+
+#if 0
+/*
+ * Find highest one bit set.
+ *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ * High order bit is 31 (or 63 in _LP64 kernel).
+ */
+int
+highbit(ulong_t i)
+{
+	register int h = 1;
+
+	if (i == 0)
+		return (0);
+#ifdef _LP64
+	if (i & 0xffffffff00000000ul) {
+		h += 32; i >>= 32;
+	}
+#endif
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+}
+#endif
+
+static int
+random_get_bytes_common(uint8_t *ptr, size_t len, char *devname)
+{
+	int fd = open(devname, O_RDONLY);
+	size_t resid = len;
+	ssize_t bytes;
+
+	ASSERT(fd != -1);
+
+	while (resid != 0) {
+		bytes = read(fd, ptr, resid);
+		ASSERT(bytes >= 0);
+		ptr += bytes;
+		resid -= bytes;
+	}
+
+	close(fd);
+
+	return (0);
+}
+
+int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, "/dev/random"));
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, "/dev/urandom"));
+}
+
+int
+ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
+{
+	char *end;
+
+	*result = strtoul(hw_serial, &end, base);
+	if (*result == 0)
+		return (errno);
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * kernel emulation setup & teardown
+ * =========================================================================
+ */
+static int
+umem_out_of_memory(void)
+{
+	char errmsg[] = "out of memory -- generating core dump\n";
+
+	write(fileno(stderr), errmsg, sizeof (errmsg));
+	abort();
+	return (0);
+}
+
+void
+kernel_init(int mode)
+{
+	umem_nofail_callback(umem_out_of_memory);
+
+	physmem = sysconf(_SC_PHYS_PAGES);
+
+	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
+	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
+
+	snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
+
+	spa_init(mode);
+}
+
+void
+kernel_fini(void)
+{
+	spa_fini();
+}
+
+int
+z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
+{
+	int ret;
+	uLongf len = *dstlen;
+
+	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
+		*dstlen = (size_t)len;
+
+	return (ret);
+}
+
+int
+z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
+    int level)
+{
+	int ret;
+	uLongf len = *dstlen;
+
+	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
+		*dstlen = (size_t)len;
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
@@ -0,0 +1,250 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/zfs_context.h>
+
+int taskq_now;
+
+typedef struct task {
+	struct task	*task_next;
+	struct task	*task_prev;
+	task_func_t	*task_func;
+	void		*task_arg;
+} task_t;
+
+#define	TASKQ_ACTIVE	0x00010000
+
+struct taskq {
+	kmutex_t	tq_lock;
+	krwlock_t	tq_threadlock;
+	kcondvar_t	tq_dispatch_cv;
+	kcondvar_t	tq_wait_cv;
+	thread_t	*tq_threadlist;
+	int		tq_flags;
+	int		tq_active;
+	int		tq_nthreads;
+	int		tq_nalloc;
+	int		tq_minalloc;
+	int		tq_maxalloc;
+	task_t		*tq_freelist;
+	task_t		tq_task;
+};
+
+static task_t *
+task_alloc(taskq_t *tq, int tqflags)
+{
+	task_t *t;
+
+	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
+		tq->tq_freelist = t->task_next;
+	} else {
+		mutex_exit(&tq->tq_lock);
+		if (tq->tq_nalloc >= tq->tq_maxalloc) {
+			if (!(tqflags & KM_SLEEP)) {
+				mutex_enter(&tq->tq_lock);
+				return (NULL);
+			}
+			/*
+			 * We don't want to exceed tq_maxalloc, but we can't
+			 * wait for other tasks to complete (and thus free up
+			 * task structures) without risking deadlock with
+			 * the caller.  So, we just delay for one second
+			 * to throttle the allocation rate.
+			 */
+			delay(hz);
+		}
+		t = kmem_alloc(sizeof (task_t), tqflags);
+		mutex_enter(&tq->tq_lock);
+		if (t != NULL)
+			tq->tq_nalloc++;
+	}
+	return (t);
+}
+
+static void
+task_free(taskq_t *tq, task_t *t)
+{
+	if (tq->tq_nalloc <= tq->tq_minalloc) {
+		t->task_next = tq->tq_freelist;
+		tq->tq_freelist = t;
+	} else {
+		tq->tq_nalloc--;
+		mutex_exit(&tq->tq_lock);
+		kmem_free(t, sizeof (task_t));
+		mutex_enter(&tq->tq_lock);
+	}
+}
+
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
+{
+	task_t *t;
+
+	if (taskq_now) {
+		func(arg);
+		return (1);
+	}
+
+	mutex_enter(&tq->tq_lock);
+	ASSERT(tq->tq_flags & TASKQ_ACTIVE);
+	if ((t = task_alloc(tq, tqflags)) == NULL) {
+		mutex_exit(&tq->tq_lock);
+		return (0);
+	}
+	t->task_next = &tq->tq_task;
+	t->task_prev = tq->tq_task.task_prev;
+	t->task_next->task_prev = t;
+	t->task_prev->task_next = t;
+	t->task_func = func;
+	t->task_arg = arg;
+	cv_signal(&tq->tq_dispatch_cv);
+	mutex_exit(&tq->tq_lock);
+	return (1);
+}
+
+void
+taskq_wait(taskq_t *tq)
+{
+	mutex_enter(&tq->tq_lock);
+	while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+	mutex_exit(&tq->tq_lock);
+}
+
+static void *
+taskq_thread(void *arg)
+{
+	taskq_t *tq = arg;
+	task_t *t;
+
+	mutex_enter(&tq->tq_lock);
+	while (tq->tq_flags & TASKQ_ACTIVE) {
+		if ((t = tq->tq_task.task_next) == &tq->tq_task) {
+			if (--tq->tq_active == 0)
+				cv_broadcast(&tq->tq_wait_cv);
+			cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
+			tq->tq_active++;
+			continue;
+		}
+		t->task_prev->task_next = t->task_next;
+		t->task_next->task_prev = t->task_prev;
+		mutex_exit(&tq->tq_lock);
+
+		rw_enter(&tq->tq_threadlock, RW_READER);
+		t->task_func(t->task_arg);
+		rw_exit(&tq->tq_threadlock);
+
+		mutex_enter(&tq->tq_lock);
+		task_free(tq, t);
+	}
+	tq->tq_nthreads--;
+	cv_broadcast(&tq->tq_wait_cv);
+	mutex_exit(&tq->tq_lock);
+	return (NULL);
+}
+
+/*ARGSUSED*/
+taskq_t *
+taskq_create(const char *name, int nthreads, pri_t pri,
+	int minalloc, int maxalloc, uint_t flags)
+{
+	taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
+	int t;
+
+	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
+	tq->tq_flags = flags | TASKQ_ACTIVE;
+	tq->tq_active = nthreads;
+	tq->tq_nthreads = nthreads;
+	tq->tq_minalloc = minalloc;
+	tq->tq_maxalloc = maxalloc;
+	tq->tq_task.task_next = &tq->tq_task;
+	tq->tq_task.task_prev = &tq->tq_task;
+	tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP);
+
+	if (flags & TASKQ_PREPOPULATE) {
+		mutex_enter(&tq->tq_lock);
+		while (minalloc-- > 0)
+			task_free(tq, task_alloc(tq, KM_SLEEP));
+		mutex_exit(&tq->tq_lock);
+	}
+
+	for (t = 0; t < nthreads; t++)
+		(void) thr_create(0, 0, taskq_thread,
+		    tq, THR_BOUND, &tq->tq_threadlist[t]);
+
+	return (tq);
+}
+
+void
+taskq_destroy(taskq_t *tq)
+{
+	int t;
+	int nthreads = tq->tq_nthreads;
+
+	taskq_wait(tq);
+
+	mutex_enter(&tq->tq_lock);
+
+	tq->tq_flags &= ~TASKQ_ACTIVE;
+	cv_broadcast(&tq->tq_dispatch_cv);
+
+	while (tq->tq_nthreads != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+
+	tq->tq_minalloc = 0;
+	while (tq->tq_nalloc != 0) {
+		ASSERT(tq->tq_freelist != NULL);
+		task_free(tq, task_alloc(tq, KM_SLEEP));
+	}
+
+	mutex_exit(&tq->tq_lock);
+
+	for (t = 0; t < nthreads; t++)
+		(void) thr_join(tq->tq_threadlist[t], NULL, NULL);
+
+	kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));
+
+	rw_destroy(&tq->tq_threadlock);
+
+	kmem_free(tq, sizeof (taskq_t));
+}
+
+int
+taskq_member(taskq_t *tq, void *t)
+{
+	int i;
+
+	if (taskq_now)
+		return (1);
+
+	for (i = 0; i < tq->tq_nthreads; i++)
+		if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t)
+			return (1);
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -0,0 +1,3495 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The objective of this program is to provide a DMU/ZAP/SPA stress test
+ * that runs entirely in userland, is easy to use, and easy to extend.
+ *
+ * The overall design of the ztest program is as follows:
+ *
+ * (1) For each major functional area (e.g. adding vdevs to a pool,
+ *     creating and destroying datasets, reading and writing objects, etc)
+ *     we have a simple routine to test that functionality.  These
+ *     individual routines do not have to do anything "stressful".
+ *
+ * (2) We turn these simple functionality tests into a stress test by
+ *     running them all in parallel, with as many threads as desired,
+ *     and spread across as many datasets, objects, and vdevs as desired.
+ *
+ * (3) While all this is happening, we inject faults into the pool to
+ *     verify that self-healing data really works.
+ *
+ * (4) Every time we open a dataset, we change its checksum and compression
+ *     functions.  Thus even individual objects vary from block to block
+ *     in which checksum they use and whether they're compressed.
+ *
+ * (5) To verify that we never lose on-disk consistency after a crash,
+ *     we run the entire test in a child of the main process.
+ *     At random times, the child self-immolates with a SIGKILL.
+ *     This is the software equivalent of pulling the power cord.
+ *     The parent then runs the test again, using the existing
+ *     storage pool, as many times as desired.
+ *
+ * (6) To verify that we don't have future leaks or temporal incursions,
+ *     many of the functional tests record the transaction group number
+ *     as part of their data.  When reading old data, they verify that
+ *     the transaction group number is less than the current, open txg.
+ *     If you add a new test, please do this if applicable.
+ *
+ * When run with no arguments, ztest runs for about five minutes and
+ * produces no output if successful.  To get a little bit of information,
+ * specify -V.  To get more information, specify -VV, and so on.
+ *
+ * To turn this into an overnight stress test, use -T to specify run time.
+ *
+ * You can ask more more vdevs [-v], datasets [-d], or threads [-t]
+ * to increase the pool capacity, fanout, and overall stress level.
+ *
+ * The -N(okill) option will suppress kills, so each child runs to completion.
+ * This can be useful when you're trying to distinguish temporal incursions
+ * from plain old race conditions.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/dmu.h>
+#include <sys/txg.h>
+#include <sys/zap.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dmu_objset.h>
+#include <sys/poll.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/zio.h>
+#include <sys/zio_checksum.h>
+#include <sys/zio_compress.h>
+#include <sys/zil.h>
+#include <sys/vdev_impl.h>
+#include <sys/spa_impl.h>
+#include <sys/dsl_prop.h>
+#include <sys/refcount.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <umem.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <sys/fs/zfs.h>
+
+static char cmdname[] = "ztest";
+static char *zopt_pool = cmdname;
+static char *progname;
+
+static uint64_t zopt_vdevs = 5;
+static uint64_t zopt_vdevtime;
+static int zopt_ashift = SPA_MINBLOCKSHIFT;
+static int zopt_mirrors = 2;
+static int zopt_raidz = 4;
+static int zopt_raidz_parity = 1;
+static size_t zopt_vdev_size = SPA_MINDEVSIZE;
+static int zopt_datasets = 7;
+static int zopt_threads = 23;
+static uint64_t zopt_passtime = 60;	/* 60 seconds */
+static uint64_t zopt_killrate = 70;	/* 70% kill rate */
+static int zopt_verbose = 0;
+static int zopt_init = 1;
+static char *zopt_dir = "/tmp";
+static uint64_t zopt_time = 300;	/* 5 minutes */
+static int zopt_maxfaults;
+
+typedef struct ztest_args {
+	char		*za_pool;
+	objset_t	*za_os;
+	zilog_t		*za_zilog;
+	thread_t	za_thread;
+	uint64_t	za_instance;
+	uint64_t	za_random;
+	uint64_t	za_diroff;
+	uint64_t	za_diroff_shared;
+	uint64_t	za_zil_seq;
+	hrtime_t	za_start;
+	hrtime_t	za_stop;
+	hrtime_t	za_kill;
+	traverse_handle_t *za_th;
+} ztest_args_t;
+
+typedef void ztest_func_t(ztest_args_t *);
+
+/*
+ * Note: these aren't static because we want dladdr() to work.
+ */
+ztest_func_t ztest_dmu_read_write;
+ztest_func_t ztest_dmu_write_parallel;
+ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_zap;
+ztest_func_t ztest_zap_parallel;
+ztest_func_t ztest_traverse;
+ztest_func_t ztest_dsl_prop_get_set;
+ztest_func_t ztest_dmu_objset_create_destroy;
+ztest_func_t ztest_dmu_snapshot_create_destroy;
+ztest_func_t ztest_spa_create_destroy;
+ztest_func_t ztest_fault_inject;
+ztest_func_t ztest_vdev_attach_detach;
+ztest_func_t ztest_vdev_LUN_growth;
+ztest_func_t ztest_vdev_add_remove;
+ztest_func_t ztest_scrub;
+ztest_func_t ztest_spa_rename;
+
+typedef struct ztest_info {
+	ztest_func_t	*zi_func;	/* test function */
+	uint64_t	*zi_interval;	/* execute every <interval> seconds */
+	uint64_t	zi_calls;	/* per-pass count */
+	uint64_t	zi_call_time;	/* per-pass time */
+	uint64_t	zi_call_total;	/* cumulative total */
+	uint64_t	zi_call_target;	/* target cumulative total */
+} ztest_info_t;
+
+uint64_t zopt_always = 0;		/* all the time */
+uint64_t zopt_often = 1;		/* every second */
+uint64_t zopt_sometimes = 10;		/* every 10 seconds */
+uint64_t zopt_rarely = 60;		/* every 60 seconds */
+
+ztest_info_t ztest_info[] = {
+	{ ztest_dmu_read_write,			&zopt_always	},
+	{ ztest_dmu_write_parallel,		&zopt_always	},
+	{ ztest_dmu_object_alloc_free,		&zopt_always	},
+	{ ztest_zap,				&zopt_always	},
+	{ ztest_zap_parallel,			&zopt_always	},
+	{ ztest_traverse,			&zopt_often	},
+	{ ztest_dsl_prop_get_set,		&zopt_sometimes	},
+	{ ztest_dmu_objset_create_destroy,	&zopt_sometimes	},
+	{ ztest_dmu_snapshot_create_destroy,	&zopt_rarely	},
+	{ ztest_spa_create_destroy,		&zopt_sometimes	},
+	{ ztest_fault_inject,			&zopt_sometimes	},
+	{ ztest_spa_rename,			&zopt_rarely	},
+	{ ztest_vdev_attach_detach,		&zopt_rarely	},
+	{ ztest_vdev_LUN_growth,		&zopt_rarely	},
+	{ ztest_vdev_add_remove,		&zopt_vdevtime	},
+	{ ztest_scrub,				&zopt_vdevtime	},
+};
+
+#define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
+
+#define	ZTEST_SYNC_LOCKS	16
+
+/*
+ * Stuff we need to share writably between parent and child.
+ */
+typedef struct ztest_shared {
+	mutex_t		zs_vdev_lock;
+	rwlock_t	zs_name_lock;
+	uint64_t	zs_vdev_primaries;
+	uint64_t	zs_enospc_count;
+	hrtime_t	zs_start_time;
+	hrtime_t	zs_stop_time;
+	uint64_t	zs_alloc;
+	uint64_t	zs_space;
+	uint64_t	zs_txg;
+	ztest_info_t	zs_info[ZTEST_FUNCS];
+	mutex_t		zs_sync_lock[ZTEST_SYNC_LOCKS];
+	uint64_t	zs_seq[ZTEST_SYNC_LOCKS];
+} ztest_shared_t;
+
+typedef struct ztest_block_tag {
+	uint64_t	bt_objset;
+	uint64_t	bt_object;
+	uint64_t	bt_offset;
+	uint64_t	bt_txg;
+	uint64_t	bt_thread;
+	uint64_t	bt_seq;
+} ztest_block_tag_t;
+
+static char ztest_dev_template[] = "%s/%s.%llua";
+static ztest_shared_t *ztest_shared;
+
+static int ztest_random_fd;
+static int ztest_dump_core = 1;
+
+extern uint64_t zio_gang_bang;
+extern uint16_t zio_zil_fail_shift;
+
+#define	ZTEST_DIROBJ		1
+#define	ZTEST_MICROZAP_OBJ	2
+#define	ZTEST_FATZAP_OBJ	3
+
+#define	ZTEST_DIROBJ_BLOCKSIZE	(1 << 10)
+#define	ZTEST_DIRSIZE		256
+
+static void usage(boolean_t) __NORETURN;
+
+/*
+ * These libumem hooks provide a reasonable set of defaults for the allocator's
+ * debugging facilities.
+ */
+const char *
+_umem_debug_init()
+{
+	return ("default,verbose"); /* $UMEM_DEBUG setting */
+}
+
+const char *
+_umem_logging_init(void)
+{
+	return ("fail,contents"); /* $UMEM_LOGGING setting */
+}
+
+#define	FATAL_MSG_SZ	1024
+
+char *fatal_msg;
+
+static void
+fatal(int do_perror, char *message, ...)
+{
+	va_list args;
+	int save_errno = errno;
+	char buf[FATAL_MSG_SZ];
+
+	(void) fflush(stdout);
+
+	va_start(args, message);
+	(void) sprintf(buf, "ztest: ");
+	/* LINTED */
+	(void) vsprintf(buf + strlen(buf), message, args);
+	va_end(args);
+	if (do_perror) {
+		(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
+		    ": %s", strerror(save_errno));
+	}
+	(void) fprintf(stderr, "%s\n", buf);
+	fatal_msg = buf;			/* to ease debugging */
+	if (ztest_dump_core)
+		abort();
+	exit(3);
+}
+
+static int
+str2shift(const char *buf)
+{
+	const char *ends = "BKMGTPEZ";
+	int i;
+
+	if (buf[0] == '\0')
+		return (0);
+	for (i = 0; i < strlen(ends); i++) {
+		if (toupper(buf[0]) == ends[i])
+			break;
+	}
+	if (i == strlen(ends)) {
+		(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
+		    buf);
+		usage(B_FALSE);
+	}
+	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
+		return (10*i);
+	}
+	(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
+	usage(B_FALSE);
+	/* NOTREACHED */
+}
+
+static uint64_t
+nicenumtoull(const char *buf)
+{
+	char *end;
+	uint64_t val;
+
+	val = strtoull(buf, &end, 0);
+	if (end == buf) {
+		(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
+		usage(B_FALSE);
+	} else if (end[0] == '.') {
+		double fval = strtod(buf, &end);
+		fval *= pow(2, str2shift(end));
+		if (fval > UINT64_MAX) {
+			(void) fprintf(stderr, "ztest: value too large: %s\n",
+			    buf);
+			usage(B_FALSE);
+		}
+		val = (uint64_t)fval;
+	} else {
+		int shift = str2shift(end);
+		if (shift >= 64 || (val << shift) >> shift != val) {
+			(void) fprintf(stderr, "ztest: value too large: %s\n",
+			    buf);
+			usage(B_FALSE);
+		}
+		val <<= shift;
+	}
+	return (val);
+}
+
+static void
+usage(boolean_t requested)
+{
+	char nice_vdev_size[10];
+	char nice_gang_bang[10];
+	FILE *fp = requested ? stdout : stderr;
+
+	nicenum(zopt_vdev_size, nice_vdev_size);
+	nicenum(zio_gang_bang, nice_gang_bang);
+
+	(void) fprintf(fp, "Usage: %s\n"
+	    "\t[-v vdevs (default: %llu)]\n"
+	    "\t[-s size_of_each_vdev (default: %s)]\n"
+	    "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
+	    "\t[-m mirror_copies (default: %d)]\n"
+	    "\t[-r raidz_disks (default: %d)]\n"
+	    "\t[-R raidz_parity (default: %d)]\n"
+	    "\t[-d datasets (default: %d)]\n"
+	    "\t[-t threads (default: %d)]\n"
+	    "\t[-g gang_block_threshold (default: %s)]\n"
+	    "\t[-i initialize pool i times (default: %d)]\n"
+	    "\t[-k kill percentage (default: %llu%%)]\n"
+	    "\t[-p pool_name (default: %s)]\n"
+	    "\t[-f file directory for vdev files (default: %s)]\n"
+	    "\t[-V(erbose)] (use multiple times for ever more blather)\n"
+	    "\t[-E(xisting)] (use existing pool instead of creating new one)\n"
+	    "\t[-T time] total run time (default: %llu sec)\n"
+	    "\t[-P passtime] time per pass (default: %llu sec)\n"
+	    "\t[-z zil failure rate (default: fail every 2^%llu allocs)]\n"
+	    "\t[-h] (print help)\n"
+	    "",
+	    cmdname,
+	    (u_longlong_t)zopt_vdevs,		/* -v */
+	    nice_vdev_size,			/* -s */
+	    zopt_ashift,			/* -a */
+	    zopt_mirrors,			/* -m */
+	    zopt_raidz,				/* -r */
+	    zopt_raidz_parity,			/* -R */
+	    zopt_datasets,			/* -d */
+	    zopt_threads,			/* -t */
+	    nice_gang_bang,			/* -g */
+	    zopt_init,				/* -i */
+	    (u_longlong_t)zopt_killrate,	/* -k */
+	    zopt_pool,				/* -p */
+	    zopt_dir,				/* -f */
+	    (u_longlong_t)zopt_time,		/* -T */
+	    (u_longlong_t)zopt_passtime,	/* -P */
+	    (u_longlong_t)zio_zil_fail_shift);	/* -z */
+	exit(requested ? 0 : 1);
+}
+
+static uint64_t
+ztest_random(uint64_t range)
+{
+	uint64_t r;
+
+	if (range == 0)
+		return (0);
+
+	if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
+		fatal(1, "short read from /dev/urandom");
+
+	return (r % range);
+}
+
+static void
+ztest_record_enospc(char *s)
+{
+	dprintf("ENOSPC doing: %s\n", s ? s : "<unknown>");
+	ztest_shared->zs_enospc_count++;
+}
+
+static void
+process_options(int argc, char **argv)
+{
+	int opt;
+	uint64_t value;
+
+	/* Remember program name. */
+	progname = argv[0];
+
+	/* By default, test gang blocks for blocks 32K and greater */
+	zio_gang_bang = 32 << 10;
+
+	/* Default value, fail every 32nd allocation */
+	zio_zil_fail_shift = 5;
+
+	while ((opt = getopt(argc, argv,
+	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:z:h")) != EOF) {
+		value = 0;
+		switch (opt) {
+		    case 'v':
+		    case 's':
+		    case 'a':
+		    case 'm':
+		    case 'r':
+		    case 'R':
+		    case 'd':
+		    case 't':
+		    case 'g':
+		    case 'i':
+		    case 'k':
+		    case 'T':
+		    case 'P':
+		    case 'z':
+			value = nicenumtoull(optarg);
+		}
+		switch (opt) {
+		    case 'v':
+			zopt_vdevs = value;
+			break;
+		    case 's':
+			zopt_vdev_size = MAX(SPA_MINDEVSIZE, value);
+			break;
+		    case 'a':
+			zopt_ashift = value;
+			break;
+		    case 'm':
+			zopt_mirrors = value;
+			break;
+		    case 'r':
+			zopt_raidz = MAX(1, value);
+			break;
+		    case 'R':
+			zopt_raidz_parity = MIN(MAX(value, 1), 2);
+			break;
+		    case 'd':
+			zopt_datasets = MAX(1, value);
+			break;
+		    case 't':
+			zopt_threads = MAX(1, value);
+			break;
+		    case 'g':
+			zio_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value);
+			break;
+		    case 'i':
+			zopt_init = value;
+			break;
+		    case 'k':
+			zopt_killrate = value;
+			break;
+		    case 'p':
+			zopt_pool = strdup(optarg);
+			break;
+		    case 'f':
+			zopt_dir = strdup(optarg);
+			break;
+		    case 'V':
+			zopt_verbose++;
+			break;
+		    case 'E':
+			zopt_init = 0;
+			break;
+		    case 'T':
+			zopt_time = value;
+			break;
+		    case 'P':
+			zopt_passtime = MAX(1, value);
+			break;
+		    case 'z':
+			zio_zil_fail_shift = MIN(value, 16);
+			break;
+		    case 'h':
+			usage(B_TRUE);
+			break;
+		    case '?':
+		    default:
+			usage(B_FALSE);
+			break;
+		}
+	}
+
+	zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
+
+	zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
+	zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
+}
+
+static uint64_t
+ztest_get_ashift(void)
+{
+	if (zopt_ashift == 0)
+		return (SPA_MINBLOCKSHIFT + ztest_random(3));
+	return (zopt_ashift);
+}
+
+static nvlist_t *
+make_vdev_file(size_t size)
+{
+	char dev_name[MAXPATHLEN];
+	uint64_t vdev;
+	uint64_t ashift = ztest_get_ashift();
+	int fd;
+	nvlist_t *file;
+
+	if (size == 0) {
+		(void) snprintf(dev_name, sizeof (dev_name), "%s",
+		    "/dev/bogus");
+	} else {
+		vdev = ztest_shared->zs_vdev_primaries++;
+		(void) sprintf(dev_name, ztest_dev_template,
+		    zopt_dir, zopt_pool, vdev);
+
+		fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+		if (fd == -1)
+			fatal(1, "can't open %s", dev_name);
+		if (ftruncate(fd, size) != 0)
+			fatal(1, "can't ftruncate %s", dev_name);
+		(void) close(fd);
+	}
+
+	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
+	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+
+	return (file);
+}
+
+static nvlist_t *
+make_vdev_raidz(size_t size, int r)
+{
+	nvlist_t *raidz, **child;
+	int c;
+
+	if (r < 2)
+		return (make_vdev_file(size));
+
+	child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
+
+	for (c = 0; c < r; c++)
+		child[c] = make_vdev_file(size);
+
+	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
+	    VDEV_TYPE_RAIDZ) == 0);
+	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
+	    zopt_raidz_parity) == 0);
+	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
+	    child, r) == 0);
+
+	for (c = 0; c < r; c++)
+		nvlist_free(child[c]);
+
+	umem_free(child, r * sizeof (nvlist_t *));
+
+	return (raidz);
+}
+
+static nvlist_t *
+make_vdev_mirror(size_t size, int r, int m)
+{
+	nvlist_t *mirror, **child;
+	int c;
+
+	if (m < 1)
+		return (make_vdev_raidz(size, r));
+
+	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
+
+	for (c = 0; c < m; c++)
+		child[c] = make_vdev_raidz(size, r);
+
+	VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
+	    VDEV_TYPE_MIRROR) == 0);
+	VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
+	    child, m) == 0);
+
+	for (c = 0; c < m; c++)
+		nvlist_free(child[c]);
+
+	umem_free(child, m * sizeof (nvlist_t *));
+
+	return (mirror);
+}
+
+static nvlist_t *
+make_vdev_root(size_t size, int r, int m, int t)
+{
+	nvlist_t *root, **child;
+	int c;
+
+	ASSERT(t > 0);
+
+	child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
+
+	for (c = 0; c < t; c++)
+		child[c] = make_vdev_mirror(size, r, m);
+
+	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
+	VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+	    child, t) == 0);
+
+	for (c = 0; c < t; c++)
+		nvlist_free(child[c]);
+
+	umem_free(child, t * sizeof (nvlist_t *));
+
+	return (root);
+}
+
+static void
+ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+	int bs = SPA_MINBLOCKSHIFT +
+	    ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
+	int ibs = DN_MIN_INDBLKSHIFT +
+	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
+	int error;
+
+	error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
+	if (error) {
+		char osname[300];
+		dmu_objset_name(os, osname);
+		fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
+		    osname, object, 1 << bs, ibs, error);
+	}
+}
+
+static uint8_t
+ztest_random_checksum(void)
+{
+	uint8_t checksum;
+
+	do {
+		checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
+	} while (zio_checksum_table[checksum].ci_zbt);
+
+	if (checksum == ZIO_CHECKSUM_OFF)
+		checksum = ZIO_CHECKSUM_ON;
+
+	return (checksum);
+}
+
+static uint8_t
+ztest_random_compress(void)
+{
+	return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
+}
+
+typedef struct ztest_replay {
+	objset_t	*zr_os;
+	uint64_t	zr_assign;
+} ztest_replay_t;
+
+static int
+ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
+{
+	objset_t *os = zr->zr_os;
+	dmu_tx_t *tx;
+	int error;
+
+	if (byteswap)
+		byteswap_uint64_array(lr, sizeof (*lr));
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+	error = dmu_tx_assign(tx, zr->zr_assign);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
+	    DMU_OT_NONE, 0, tx);
+	ASSERT3U(error, ==, 0);
+	dmu_tx_commit(tx);
+
+	if (zopt_verbose >= 5) {
+		char osname[MAXNAMELEN];
+		dmu_objset_name(os, osname);
+		(void) printf("replay create of %s object %llu"
+		    " in txg %llu = %d\n",
+		    osname, (u_longlong_t)lr->lr_doid,
+		    (u_longlong_t)zr->zr_assign, error);
+	}
+
+	return (error);
+}
+
+static int
+ztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap)
+{
+	objset_t *os = zr->zr_os;
+	dmu_tx_t *tx;
+	int error;
+
+	if (byteswap)
+		byteswap_uint64_array(lr, sizeof (*lr));
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
+	error = dmu_tx_assign(tx, zr->zr_assign);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	error = dmu_object_free(os, lr->lr_doid, tx);
+	dmu_tx_commit(tx);
+
+	return (error);
+}
+
+zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
+	NULL,			/* 0 no such transaction type */
+	ztest_replay_create,	/* TX_CREATE */
+	NULL,			/* TX_MKDIR */
+	NULL,			/* TX_MKXATTR */
+	NULL,			/* TX_SYMLINK */
+	ztest_replay_remove,	/* TX_REMOVE */
+	NULL,			/* TX_RMDIR */
+	NULL,			/* TX_LINK */
+	NULL,			/* TX_RENAME */
+	NULL,			/* TX_WRITE */
+	NULL,			/* TX_TRUNCATE */
+	NULL,			/* TX_SETATTR */
+	NULL,			/* TX_ACL */
+};
+
+/*
+ * Verify that we can't destroy an active pool, create an existing pool,
+ * or create a pool with a bad vdev spec.
+ */
+void
+ztest_spa_create_destroy(ztest_args_t *za)
+{
+	int error;
+	spa_t *spa;
+	nvlist_t *nvroot;
+
+	/*
+	 * Attempt to create using a bad file.
+	 */
+	nvroot = make_vdev_root(0, 0, 0, 1);
+	error = spa_create("ztest_bad_file", nvroot, NULL);
+	nvlist_free(nvroot);
+	if (error != ENOENT)
+		fatal(0, "spa_create(bad_file) = %d", error);
+
+	/*
+	 * Attempt to create using a bad mirror.
+	 */
+	nvroot = make_vdev_root(0, 0, 2, 1);
+	error = spa_create("ztest_bad_mirror", nvroot, NULL);
+	nvlist_free(nvroot);
+	if (error != ENOENT)
+		fatal(0, "spa_create(bad_mirror) = %d", error);
+
+	/*
+	 * Attempt to create an existing pool.  It shouldn't matter
+	 * what's in the nvroot; we should fail with EEXIST.
+	 */
+	(void) rw_rdlock(&ztest_shared->zs_name_lock);
+	nvroot = make_vdev_root(0, 0, 0, 1);
+	error = spa_create(za->za_pool, nvroot, NULL);
+	nvlist_free(nvroot);
+	if (error != EEXIST)
+		fatal(0, "spa_create(whatever) = %d", error);
+
+	error = spa_open(za->za_pool, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open() = %d", error);
+
+	error = spa_destroy(za->za_pool);
+	if (error != EBUSY)
+		fatal(0, "spa_destroy() = %d", error);
+
+	spa_close(spa, FTAG);
+	(void) rw_unlock(&ztest_shared->zs_name_lock);
+}
+
+/*
+ * Verify that vdev_add() works as expected.
+ */
+void
+ztest_vdev_add_remove(ztest_args_t *za)
+{
+	spa_t *spa = dmu_objset_spa(za->za_os);
+	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+	nvlist_t *nvroot;
+	int error;
+
+	if (zopt_verbose >= 6)
+		(void) printf("adding vdev\n");
+
+	(void) mutex_lock(&ztest_shared->zs_vdev_lock);
+
+	spa_config_enter(spa, RW_READER, FTAG);
+
+	ztest_shared->zs_vdev_primaries =
+	    spa->spa_root_vdev->vdev_children * leaves;
+
+	spa_config_exit(spa, FTAG);
+
+	nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
+	error = spa_vdev_add(spa, nvroot);
+	nvlist_free(nvroot);
+
+	(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+
+	if (error == ENOSPC)
+		ztest_record_enospc("spa_vdev_add");
+	else if (error != 0)
+		fatal(0, "spa_vdev_add() = %d", error);
+
+	if (zopt_verbose >= 6)
+		(void) printf("spa_vdev_add = %d, as expected\n", error);
+}
+
+static vdev_t *
+vdev_lookup_by_path(vdev_t *vd, const char *path)
+{
+	int c;
+	vdev_t *mvd;
+
+	if (vd->vdev_path != NULL) {
+		if (vd->vdev_wholedisk == 1) {
+			/*
+			 * For whole disks, the internal path has 's0', but the
+			 * path passed in by the user doesn't.
+			 */
+			if (strlen(path) == strlen(vd->vdev_path) - 2 &&
+			    strncmp(path, vd->vdev_path, strlen(path)) == 0)
+				return (vd);
+		} else if (strcmp(path, vd->vdev_path) == 0) {
+			return (vd);
+		}
+	}
+
+	for (c = 0; c < vd->vdev_children; c++)
+		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
+		    NULL)
+			return (mvd);
+
+	return (NULL);
+}
+
+/*
+ * Verify that we can attach and detach devices.
+ */
+void
+ztest_vdev_attach_detach(ztest_args_t *za)
+{
+	spa_t *spa = dmu_objset_spa(za->za_os);
+	vdev_t *rvd = spa->spa_root_vdev;
+	vdev_t *oldvd, *newvd, *pvd;
+	nvlist_t *root, *file;
+	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+	uint64_t leaf, top;
+	uint64_t ashift = ztest_get_ashift();
+	size_t oldsize, newsize;
+	char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
+	int replacing;
+	int error, expected_error;
+	int fd;
+
+	(void) mutex_lock(&ztest_shared->zs_vdev_lock);
+
+	spa_config_enter(spa, RW_READER, FTAG);
+
+	/*
+	 * Decide whether to do an attach or a replace.
+	 */
+	replacing = ztest_random(2);
+
+	/*
+	 * Pick a random top-level vdev.
+	 */
+	top = ztest_random(rvd->vdev_children);
+
+	/*
+	 * Pick a random leaf within it.
+	 */
+	leaf = ztest_random(leaves);
+
+	/*
+	 * Generate the path to this leaf.  The filename will end with 'a'.
+	 * We'll alternate replacements with a filename that ends with 'b'.
+	 */
+	(void) snprintf(oldpath, sizeof (oldpath),
+	    ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
+
+	bcopy(oldpath, newpath, MAXPATHLEN);
+
+	/*
+	 * If the 'a' file isn't part of the pool, the 'b' file must be.
+	 */
+	if (vdev_lookup_by_path(rvd, oldpath) == NULL)
+		oldpath[strlen(oldpath) - 1] = 'b';
+	else
+		newpath[strlen(newpath) - 1] = 'b';
+
+	/*
+	 * Now oldpath represents something that's already in the pool,
+	 * and newpath is the thing we'll try to attach.
+	 */
+	oldvd = vdev_lookup_by_path(rvd, oldpath);
+	newvd = vdev_lookup_by_path(rvd, newpath);
+	ASSERT(oldvd != NULL);
+	pvd = oldvd->vdev_parent;
+
+	/*
+	 * Make newsize a little bigger or smaller than oldsize.
+	 * If it's smaller, the attach should fail.
+	 * If it's larger, and we're doing a replace,
+	 * we should get dynamic LUN growth when we're done.
+	 */
+	oldsize = vdev_get_rsize(oldvd);
+	newsize = 10 * oldsize / (9 + ztest_random(3));
+
+	/*
+	 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
+	 * unless it's a replace; in that case any non-replacing parent is OK.
+	 *
+	 * If newvd is already part of the pool, it should fail with EBUSY.
+	 *
+	 * If newvd is too small, it should fail with EOVERFLOW.
+	 */
+	if (newvd != NULL)
+		expected_error = EBUSY;
+	else if (pvd->vdev_ops != &vdev_mirror_ops &&
+	    pvd->vdev_ops != &vdev_root_ops &&
+	    (!replacing || pvd->vdev_ops == &vdev_replacing_ops))
+		expected_error = ENOTSUP;
+	else if (newsize < oldsize)
+		expected_error = EOVERFLOW;
+	else if (ashift > oldvd->vdev_top->vdev_ashift)
+		expected_error = EDOM;
+	else
+		expected_error = 0;
+
+	/*
+	 * If newvd isn't already part of the pool, create it.
+	 */
+	if (newvd == NULL) {
+		fd = open(newpath, O_RDWR | O_CREAT | O_TRUNC, 0666);
+		if (fd == -1)
+			fatal(1, "can't open %s", newpath);
+		if (ftruncate(fd, newsize) != 0)
+			fatal(1, "can't ftruncate %s", newpath);
+		(void) close(fd);
+	}
+
+	spa_config_exit(spa, FTAG);
+
+	/*
+	 * Build the nvlist describing newpath.
+	 */
+	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, newpath) == 0);
+	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+
+	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
+	VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+	    &file, 1) == 0);
+
+	error = spa_vdev_attach(spa, oldvd->vdev_guid, root, replacing);
+
+	nvlist_free(file);
+	nvlist_free(root);
+
+	/*
+	 * If our parent was the replacing vdev, but the replace completed,
+	 * then instead of failing with ENOTSUP we may either succeed,
+	 * fail with ENODEV, or fail with EOVERFLOW.
+	 */
+	if (expected_error == ENOTSUP &&
+	    (error == 0 || error == ENODEV || error == EOVERFLOW))
+		expected_error = error;
+
+	/*
+	 * If someone grew the LUN, the replacement may be too small.
+	 */
+	if (error == EOVERFLOW)
+		expected_error = error;
+
+	if (error != expected_error) {
+		fatal(0, "attach (%s, %s, %d) returned %d, expected %d",
+		    oldpath, newpath, replacing, error, expected_error);
+	}
+
+	(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+}
+
+/*
+ * Verify that dynamic LUN growth works as expected.
+ */
+/* ARGSUSED */
+void
+ztest_vdev_LUN_growth(ztest_args_t *za)
+{
+	spa_t *spa = dmu_objset_spa(za->za_os);
+	char dev_name[MAXPATHLEN];
+	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+	uint64_t vdev;
+	size_t fsize;
+	int fd;
+
+	(void) mutex_lock(&ztest_shared->zs_vdev_lock);
+
+	/*
+	 * Pick a random leaf vdev.
+	 */
+	spa_config_enter(spa, RW_READER, FTAG);
+	vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
+	spa_config_exit(spa, FTAG);
+
+	(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+
+	if ((fd = open(dev_name, O_RDWR)) != -1) {
+		/*
+		 * Determine the size.
+		 */
+		fsize = lseek(fd, 0, SEEK_END);
+
+		/*
+		 * If it's less than 2x the original size, grow by around 3%.
+		 */
+		if (fsize < 2 * zopt_vdev_size) {
+			size_t newsize = fsize + ztest_random(fsize / 32);
+			(void) ftruncate(fd, newsize);
+			if (zopt_verbose >= 6) {
+				(void) printf("%s grew from %lu to %lu bytes\n",
+				    dev_name, (ulong_t)fsize, (ulong_t)newsize);
+			}
+		}
+		(void) close(fd);
+	}
+
+	(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+}
+
+/* ARGSUSED */
+static void
+ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
+{
+	/*
+	 * Create the directory object.
+	 */
+	VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
+	    DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
+	    DMU_OT_UINT64_OTHER, sizeof (ztest_block_tag_t), tx) == 0);
+
+	VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
+	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+
+	VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ,
+	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+}
+
+/* ARGSUSED */
+static int
+ztest_destroy_cb(char *name, void *arg)
+{
+	objset_t *os;
+	dmu_object_info_t doi;
+	int error;
+
+	/*
+	 * Verify that the dataset contains a directory object.
+	 */
+	error = dmu_objset_open(name, DMU_OST_OTHER,
+	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
+	ASSERT3U(error, ==, 0);
+	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
+	if (error != ENOENT) {
+		/* We could have crashed in the middle of destroying it */
+		ASSERT3U(error, ==, 0);
+		ASSERT3U(doi.doi_type, ==, DMU_OT_UINT64_OTHER);
+		ASSERT3S(doi.doi_physical_blks, >=, 0);
+	}
+	dmu_objset_close(os);
+
+	/*
+	 * Destroy the dataset.
+	 */
+	error = dmu_objset_destroy(name);
+	ASSERT3U(error, ==, 0);
+	return (0);
+}
+
+/*
+ * Verify that dmu_objset_{create,destroy,open,close} work as expected.
+ */
+static uint64_t
+ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode)
+{
+	itx_t *itx;
+	lr_create_t *lr;
+	size_t namesize;
+	char name[24];
+
+	(void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object);
+	namesize = strlen(name) + 1;
+
+	itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize +
+	    ztest_random(ZIL_MAX_BLKSZ));
+	lr = (lr_create_t *)&itx->itx_lr;
+	bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr));
+	lr->lr_doid = object;
+	lr->lr_foid = 0;
+	lr->lr_mode = mode;
+	lr->lr_uid = 0;
+	lr->lr_gid = 0;
+	lr->lr_gen = dmu_tx_get_txg(tx);
+	lr->lr_crtime[0] = time(NULL);
+	lr->lr_crtime[1] = 0;
+	lr->lr_rdev = 0;
+	bcopy(name, (char *)(lr + 1), namesize);
+
+	return (zil_itx_assign(zilog, itx, tx));
+}
+
+void
+ztest_dmu_objset_create_destroy(ztest_args_t *za)
+{
+	int error;
+	objset_t *os;
+	char name[100];
+	int mode, basemode, expected_error;
+	zilog_t *zilog;
+	uint64_t seq;
+	uint64_t objects;
+	ztest_replay_t zr;
+
+	(void) rw_rdlock(&ztest_shared->zs_name_lock);
+	(void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
+	    (u_longlong_t)za->za_instance);
+
+	basemode = DS_MODE_LEVEL(za->za_instance);
+	if (basemode == DS_MODE_NONE)
+		basemode++;
+
+	/*
+	 * If this dataset exists from a previous run, process its replay log
+	 * half of the time.  If we don't replay it, then dmu_objset_destroy()
+	 * (invoked from ztest_destroy_cb() below) should just throw it away.
+	 */
+	if (ztest_random(2) == 0 &&
+	    dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_PRIMARY, &os) == 0) {
+		zr.zr_os = os;
+		zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector);
+		dmu_objset_close(os);
+	}
+
+	/*
+	 * There may be an old instance of the dataset we're about to
+	 * create lying around from a previous run.  If so, destroy it
+	 * and all of its snapshots.
+	 */
+	(void) dmu_objset_find(name, ztest_destroy_cb, NULL,
+	    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+
+	/*
+	 * Verify that the destroyed dataset is no longer in the namespace.
+	 */
+	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	if (error != ENOENT)
+		fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
+		    name, os);
+
+	/*
+	 * Verify that we can create a new dataset.
+	 */
+	error = dmu_objset_create(name, DMU_OST_OTHER, NULL, ztest_create_cb,
+	    NULL);
+	if (error) {
+		if (error == ENOSPC) {
+			ztest_record_enospc("dmu_objset_create");
+			(void) rw_unlock(&ztest_shared->zs_name_lock);
+			return;
+		}
+		fatal(0, "dmu_objset_create(%s) = %d", name, error);
+	}
+
+	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	if (error) {
+		fatal(0, "dmu_objset_open(%s) = %d", name, error);
+	}
+
+	/*
+	 * Open the intent log for it.
+	 */
+	zilog = zil_open(os, NULL);
+
+	/*
+	 * Put a random number of objects in there.
+	 */
+	objects = ztest_random(20);
+	seq = 0;
+	while (objects-- != 0) {
+		uint64_t object;
+		dmu_tx_t *tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name));
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+			    DMU_OT_NONE, 0, tx);
+			ztest_set_random_blocksize(os, object, tx);
+			seq = ztest_log_create(zilog, tx, object,
+			    DMU_OT_UINT64_OTHER);
+			dmu_write(os, object, 0, sizeof (name), name, tx);
+			dmu_tx_commit(tx);
+		}
+		if (ztest_random(5) == 0) {
+			zil_commit(zilog, seq, object);
+		}
+		if (ztest_random(100) == 0) {
+			error = zil_suspend(zilog);
+			if (error == 0) {
+				zil_resume(zilog);
+			}
+		}
+	}
+
+	/*
+	 * Verify that we cannot create an existing dataset.
+	 */
+	error = dmu_objset_create(name, DMU_OST_OTHER, NULL, NULL, NULL);
+	if (error != EEXIST)
+		fatal(0, "created existing dataset, error = %d", error);
+
+	/*
+	 * Verify that multiple dataset opens are allowed, but only when
+	 * the new access mode is compatible with the base mode.
+	 * We use a mixture of typed and typeless opens, and when the
+	 * open succeeds, verify that the discovered type is correct.
+	 */
+	for (mode = DS_MODE_STANDARD; mode < DS_MODE_LEVELS; mode++) {
+		objset_t *os2;
+		error = dmu_objset_open(name, DMU_OST_OTHER, mode, &os2);
+		expected_error = (basemode + mode < DS_MODE_LEVELS) ? 0 : EBUSY;
+		if (error != expected_error)
+			fatal(0, "dmu_objset_open('%s') = %d, expected %d",
+			    name, error, expected_error);
+		if (error == 0)
+			dmu_objset_close(os2);
+	}
+
+	zil_close(zilog);
+	dmu_objset_close(os);
+
+	error = dmu_objset_destroy(name);
+	if (error)
+		fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
+
+	(void) rw_unlock(&ztest_shared->zs_name_lock);
+}
+
+/*
+ * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
+ */
+void
+ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
+{
+	int error;
+	objset_t *os = za->za_os;
+	char snapname[100];
+	char osname[MAXNAMELEN];
+
+	(void) rw_rdlock(&ztest_shared->zs_name_lock);
+	dmu_objset_name(os, osname);
+	(void) snprintf(snapname, 100, "%s@%llu", osname,
+	    (u_longlong_t)za->za_instance);
+
+	error = dmu_objset_destroy(snapname);
+	if (error != 0 && error != ENOENT)
+		fatal(0, "dmu_objset_destroy() = %d", error);
+	error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE);
+	if (error == ENOSPC)
+		ztest_record_enospc("dmu_take_snapshot");
+	else if (error != 0 && error != EEXIST)
+		fatal(0, "dmu_take_snapshot() = %d", error);
+	(void) rw_unlock(&ztest_shared->zs_name_lock);
+}
+
+#define	ZTEST_TRAVERSE_BLOCKS	1000
+
+static int
+ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+{
+	ztest_args_t *za = arg;
+	zbookmark_t *zb = &bc->bc_bookmark;
+	blkptr_t *bp = &bc->bc_blkptr;
+	dnode_phys_t *dnp = bc->bc_dnode;
+	traverse_handle_t *th = za->za_th;
+	uint64_t size = BP_GET_LSIZE(bp);
+
+	/*
+	 * Level -1 indicates the objset_phys_t or something in its intent log.
+	 */
+	if (zb->zb_level == -1) {
+		if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+			ASSERT3U(zb->zb_object, ==, 0);
+			ASSERT3U(zb->zb_blkid, ==, 0);
+			ASSERT3U(size, ==, sizeof (objset_phys_t));
+			za->za_zil_seq = 0;
+		} else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+			ASSERT3U(zb->zb_object, ==, 0);
+			ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
+			za->za_zil_seq = zb->zb_blkid;
+		} else {
+			ASSERT3U(zb->zb_object, !=, 0);	/* lr_write_t */
+		}
+
+		return (0);
+	}
+
+	ASSERT(dnp != NULL);
+
+	if (bc->bc_errno)
+		return (ERESTART);
+
+	/*
+	 * Once in a while, abort the traverse.   We only do this to odd
+	 * instance numbers to ensure that even ones can run to completion.
+	 */
+	if ((za->za_instance & 1) && ztest_random(10000) == 0)
+		return (EINTR);
+
+	if (bp->blk_birth == 0) {
+		ASSERT(th->th_advance & ADVANCE_HOLES);
+		return (0);
+	}
+
+	if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) &&
+	    bc == &th->th_cache[ZB_DN_CACHE][0]) {
+		ASSERT(bc->bc_data == NULL);
+		return (0);
+	}
+
+	ASSERT(bc->bc_data != NULL);
+
+	/*
+	 * This is an expensive question, so don't ask it too often.
+	 */
+	if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) {
+		void *xbuf = umem_alloc(size, UMEM_NOFAIL);
+		if (arc_tryread(spa, bp, xbuf) == 0) {
+			ASSERT(bcmp(bc->bc_data, xbuf, size) == 0);
+		}
+		umem_free(xbuf, size);
+	}
+
+	if (zb->zb_level > 0) {
+		ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift);
+		return (0);
+	}
+
+	ASSERT(zb->zb_level == 0);
+	ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
+
+	return (0);
+}
+
+/*
+ * Verify that live pool traversal works.
+ */
+void
+ztest_traverse(ztest_args_t *za)
+{
+	spa_t *spa = dmu_objset_spa(za->za_os);
+	traverse_handle_t *th = za->za_th;
+	int rc, advance;
+	uint64_t cbstart, cblimit;
+
+	if (th == NULL) {
+		advance = 0;
+
+		if (ztest_random(2) == 0)
+			advance |= ADVANCE_PRE;
+
+		if (ztest_random(2) == 0)
+			advance |= ADVANCE_PRUNE;
+
+		if (ztest_random(2) == 0)
+			advance |= ADVANCE_DATA;
+
+		if (ztest_random(2) == 0)
+			advance |= ADVANCE_HOLES;
+
+		if (ztest_random(2) == 0)
+			advance |= ADVANCE_ZIL;
+
+		th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
+		    ZIO_FLAG_CANFAIL);
+
+		traverse_add_pool(th, 0, -1ULL);
+	}
+
+	advance = th->th_advance;
+	cbstart = th->th_callbacks;
+	cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000);
+
+	while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit)
+		continue;
+
+	if (zopt_verbose >= 5)
+		(void) printf("traverse %s%s%s%s %llu blocks to "
+		    "<%llu, %llu, %lld, %llx>%s\n",
+		    (advance & ADVANCE_PRE) ? "pre" : "post",
+		    (advance & ADVANCE_PRUNE) ? "|prune" : "",
+		    (advance & ADVANCE_DATA) ? "|data" : "",
+		    (advance & ADVANCE_HOLES) ? "|holes" : "",
+		    (u_longlong_t)(th->th_callbacks - cbstart),
+		    (u_longlong_t)th->th_lastcb.zb_objset,
+		    (u_longlong_t)th->th_lastcb.zb_object,
+		    (u_longlong_t)th->th_lastcb.zb_level,
+		    (u_longlong_t)th->th_lastcb.zb_blkid,
+		    rc == 0 ? " [done]" :
+		    rc == EINTR ? " [aborted]" :
+		    rc == EAGAIN ? "" :
+		    strerror(rc));
+
+	if (rc != EAGAIN) {
+		if (rc != 0 && rc != EINTR)
+			fatal(0, "traverse_more(%p) = %d", th, rc);
+		traverse_fini(th);
+		za->za_th = NULL;
+	}
+}
+
+/*
+ * Verify that dmu_object_{alloc,free} work as expected.
+ */
+void
+ztest_dmu_object_alloc_free(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	dmu_buf_t *db;
+	dmu_tx_t *tx;
+	uint64_t batchobj, object, batchsize, endoff, temp;
+	int b, c, error, bonuslen;
+	dmu_object_info_t doi;
+	char osname[MAXNAMELEN];
+
+	dmu_objset_name(os, osname);
+
+	endoff = -8ULL;
+	batchsize = 2;
+
+	/*
+	 * Create a batch object if necessary, and record it in the directory.
+	 */
+	VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+	    sizeof (uint64_t), &batchobj));
+	if (batchobj == 0) {
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
+		    sizeof (uint64_t));
+		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("create a batch object");
+			dmu_tx_abort(tx);
+			return;
+		}
+		batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+		    DMU_OT_NONE, 0, tx);
+		ztest_set_random_blocksize(os, batchobj, tx);
+		dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
+		    sizeof (uint64_t), &batchobj, tx);
+		dmu_tx_commit(tx);
+	}
+
+	/*
+	 * Destroy the previous batch of objects.
+	 */
+	for (b = 0; b < batchsize; b++) {
+		VERIFY(0 == dmu_read(os, batchobj, b * sizeof (uint64_t),
+		    sizeof (uint64_t), &object));
+		if (object == 0)
+			continue;
+		/*
+		 * Read and validate contents.
+		 * We expect the nth byte of the bonus buffer to be n.
+		 */
+		VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
+
+		dmu_object_info_from_db(db, &doi);
+		ASSERT(doi.doi_type == DMU_OT_UINT64_OTHER);
+		ASSERT(doi.doi_bonus_type == DMU_OT_PLAIN_OTHER);
+		ASSERT3S(doi.doi_physical_blks, >=, 0);
+
+		bonuslen = db->db_size;
+
+		for (c = 0; c < bonuslen; c++) {
+			if (((uint8_t *)db->db_data)[c] !=
+			    (uint8_t)(c + bonuslen)) {
+				fatal(0,
+				    "bad bonus: %s, obj %llu, off %d: %u != %u",
+				    osname, object, c,
+				    ((uint8_t *)db->db_data)[c],
+				    (uint8_t)(c + bonuslen));
+			}
+		}
+
+		dmu_buf_rele(db, FTAG);
+
+		/*
+		 * We expect the word at endoff to be our object number.
+		 */
+		VERIFY(0 == dmu_read(os, object, endoff,
+		    sizeof (uint64_t), &temp));
+
+		if (temp != object) {
+			fatal(0, "bad data in %s, got %llu, expected %llu",
+			    osname, temp, object);
+		}
+
+		/*
+		 * Destroy old object and clear batch entry.
+		 */
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, batchobj,
+		    b * sizeof (uint64_t), sizeof (uint64_t));
+		dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("free object");
+			dmu_tx_abort(tx);
+			return;
+		}
+		error = dmu_object_free(os, object, tx);
+		if (error) {
+			fatal(0, "dmu_object_free('%s', %llu) = %d",
+			    osname, object, error);
+		}
+		object = 0;
+
+		dmu_object_set_checksum(os, batchobj,
+		    ztest_random_checksum(), tx);
+		dmu_object_set_compress(os, batchobj,
+		    ztest_random_compress(), tx);
+
+		dmu_write(os, batchobj, b * sizeof (uint64_t),
+		    sizeof (uint64_t), &object, tx);
+
+		dmu_tx_commit(tx);
+	}
+
+	/*
+	 * Before creating the new batch of objects, generate a bunch of churn.
+	 */
+	for (b = ztest_random(100); b > 0; b--) {
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("churn objects");
+			dmu_tx_abort(tx);
+			return;
+		}
+		object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+		    DMU_OT_NONE, 0, tx);
+		ztest_set_random_blocksize(os, object, tx);
+		error = dmu_object_free(os, object, tx);
+		if (error) {
+			fatal(0, "dmu_object_free('%s', %llu) = %d",
+			    osname, object, error);
+		}
+		dmu_tx_commit(tx);
+	}
+
+	/*
+	 * Create a new batch of objects with randomly chosen
+	 * blocksizes and record them in the batch directory.
+	 */
+	for (b = 0; b < batchsize; b++) {
+		uint32_t va_blksize;
+		u_longlong_t va_nblocks;
+
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t),
+		    sizeof (uint64_t));
+		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff,
+		    sizeof (uint64_t));
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("create batchobj");
+			dmu_tx_abort(tx);
+			return;
+		}
+		bonuslen = (int)ztest_random(dmu_bonus_max()) + 1;
+
+		object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+		    DMU_OT_PLAIN_OTHER, bonuslen, tx);
+
+		ztest_set_random_blocksize(os, object, tx);
+
+		dmu_object_set_checksum(os, object,
+		    ztest_random_checksum(), tx);
+		dmu_object_set_compress(os, object,
+		    ztest_random_compress(), tx);
+
+		dmu_write(os, batchobj, b * sizeof (uint64_t),
+		    sizeof (uint64_t), &object, tx);
+
+		/*
+		 * Write to both the bonus buffer and the regular data.
+		 */
+		VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
+		ASSERT3U(bonuslen, ==, db->db_size);
+
+		dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
+		ASSERT3S(va_nblocks, >=, 0);
+
+		dmu_buf_will_dirty(db, tx);
+
+		/*
+		 * See comments above regarding the contents of
+		 * the bonus buffer and the word at endoff.
+		 */
+		for (c = 0; c < db->db_size; c++)
+			((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
+
+		dmu_buf_rele(db, FTAG);
+
+		/*
+		 * Write to a large offset to increase indirection.
+		 */
+		dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx);
+
+		dmu_tx_commit(tx);
+	}
+}
+
+/*
+ * Verify that dmu_{read,write} work as expected.
+ */
+typedef struct bufwad {
+	uint64_t	bw_index;
+	uint64_t	bw_txg;
+	uint64_t	bw_data;
+} bufwad_t;
+
+typedef struct dmu_read_write_dir {
+	uint64_t	dd_packobj;
+	uint64_t	dd_bigobj;
+	uint64_t	dd_chunk;
+} dmu_read_write_dir_t;
+
+void
+ztest_dmu_read_write(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	dmu_read_write_dir_t dd;
+	dmu_tx_t *tx;
+	int i, freeit, error;
+	uint64_t n, s, txg;
+	bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
+	uint64_t packoff, packsize, bigoff, bigsize;
+	uint64_t regions = 997;
+	uint64_t stride = 123456789ULL;
+	uint64_t width = 40;
+	int free_percent = 5;
+
+	/*
+	 * This test uses two objects, packobj and bigobj, that are always
+	 * updated together (i.e. in the same tx) so that their contents are
+	 * in sync and can be compared.  Their contents relate to each other
+	 * in a simple way: packobj is a dense array of 'bufwad' structures,
+	 * while bigobj is a sparse array of the same bufwads.  Specifically,
+	 * for any index n, there are three bufwads that should be identical:
+	 *
+	 *	packobj, at offset n * sizeof (bufwad_t)
+	 *	bigobj, at the head of the nth chunk
+	 *	bigobj, at the tail of the nth chunk
+	 *
+	 * The chunk size is arbitrary. It doesn't have to be a power of two,
+	 * and it doesn't have any relation to the object blocksize.
+	 * The only requirement is that it can hold at least two bufwads.
+	 *
+	 * Normally, we write the bufwad to each of these locations.
+	 * However, free_percent of the time we instead write zeroes to
+	 * packobj and perform a dmu_free_range() on bigobj.  By comparing
+	 * bigobj to packobj, we can verify that the DMU is correctly
+	 * tracking which parts of an object are allocated and free,
+	 * and that the contents of the allocated blocks are correct.
+	 */
+
+	/*
+	 * Read the directory info.  If it's the first time, set things up.
+	 */
+	VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+	    sizeof (dd), &dd));
+	if (dd.dd_chunk == 0) {
+		ASSERT(dd.dd_packobj == 0);
+		ASSERT(dd.dd_bigobj == 0);
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
+		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("create r/w directory");
+			dmu_tx_abort(tx);
+			return;
+		}
+
+		dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+		    DMU_OT_NONE, 0, tx);
+		dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+		    DMU_OT_NONE, 0, tx);
+		dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t);
+
+		ztest_set_random_blocksize(os, dd.dd_packobj, tx);
+		ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
+
+		dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
+		    tx);
+		dmu_tx_commit(tx);
+	}
+
+	/*
+	 * Prefetch a random chunk of the big object.
+	 * Our aim here is to get some async reads in flight
+	 * for blocks that we may free below; the DMU should
+	 * handle this race correctly.
+	 */
+	n = ztest_random(regions) * stride + ztest_random(width);
+	s = 1 + ztest_random(2 * width - 1);
+	dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk);
+
+	/*
+	 * Pick a random index and compute the offsets into packobj and bigobj.
+	 */
+	n = ztest_random(regions) * stride + ztest_random(width);
+	s = 1 + ztest_random(width - 1);
+
+	packoff = n * sizeof (bufwad_t);
+	packsize = s * sizeof (bufwad_t);
+
+	bigoff = n * dd.dd_chunk;
+	bigsize = s * dd.dd_chunk;
+
+	packbuf = umem_alloc(packsize, UMEM_NOFAIL);
+	bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
+
+	/*
+	 * free_percent of the time, free a range of bigobj rather than
+	 * overwriting it.
+	 */
+	freeit = (ztest_random(100) < free_percent);
+
+	/*
+	 * Read the current contents of our objects.
+	 */
+	error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
+	ASSERT3U(error, ==, 0);
+	error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
+	ASSERT3U(error, ==, 0);
+
+	/*
+	 * Get a tx for the mods to both packobj and bigobj.
+	 */
+	tx = dmu_tx_create(os);
+
+	dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
+
+	if (freeit)
+		dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize);
+	else
+		dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+
+	if (error) {
+		ztest_record_enospc("dmu r/w range");
+		dmu_tx_abort(tx);
+		umem_free(packbuf, packsize);
+		umem_free(bigbuf, bigsize);
+		return;
+	}
+
+	txg = dmu_tx_get_txg(tx);
+
+	/*
+	 * For each index from n to n + s, verify that the existing bufwad
+	 * in packobj matches the bufwads at the head and tail of the
+	 * corresponding chunk in bigobj.  Then update all three bufwads
+	 * with the new values we want to write out.
+	 */
+	for (i = 0; i < s; i++) {
+		/* LINTED */
+		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
+		/* LINTED */
+		bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+		/* LINTED */
+		bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+
+		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
+		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
+
+		if (pack->bw_txg > txg)
+			fatal(0, "future leak: got %llx, open txg is %llx",
+			    pack->bw_txg, txg);
+
+		if (pack->bw_data != 0 && pack->bw_index != n + i)
+			fatal(0, "wrong index: got %llx, wanted %llx+%llx",
+			    pack->bw_index, n, i);
+
+		if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
+			fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
+
+		if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
+			fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
+
+		if (freeit) {
+			bzero(pack, sizeof (bufwad_t));
+		} else {
+			pack->bw_index = n + i;
+			pack->bw_txg = txg;
+			pack->bw_data = 1 + ztest_random(-2ULL);
+		}
+		*bigH = *pack;
+		*bigT = *pack;
+	}
+
+	/*
+	 * We've verified all the old bufwads, and made new ones.
+	 * Now write them out.
+	 */
+	dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
+
+	if (freeit) {
+		if (zopt_verbose >= 6) {
+			(void) printf("freeing offset %llx size %llx"
+			    " txg %llx\n",
+			    (u_longlong_t)bigoff,
+			    (u_longlong_t)bigsize,
+			    (u_longlong_t)txg);
+		}
+		VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
+		    bigsize, tx));
+	} else {
+		if (zopt_verbose >= 6) {
+			(void) printf("writing offset %llx size %llx"
+			    " txg %llx\n",
+			    (u_longlong_t)bigoff,
+			    (u_longlong_t)bigsize,
+			    (u_longlong_t)txg);
+		}
+		dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx);
+	}
+
+	dmu_tx_commit(tx);
+
+	/*
+	 * Sanity check the stuff we just wrote.
+	 */
+	{
+		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
+		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
+
+		VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+		    packsize, packcheck));
+		VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+		    bigsize, bigcheck));
+
+		ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
+		ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
+
+		umem_free(packcheck, packsize);
+		umem_free(bigcheck, bigsize);
+	}
+
+	umem_free(packbuf, packsize);
+	umem_free(bigbuf, bigsize);
+}
+
+void
+ztest_dmu_check_future_leak(objset_t *os, uint64_t txg)
+{
+	dmu_buf_t *db;
+	ztest_block_tag_t rbt;
+
+	if (zopt_verbose >= 3) {
+		char osname[MAXNAMELEN];
+		dmu_objset_name(os, osname);
+		(void) printf("checking %s for future leaks in txg %lld...\n",
+		    osname, (u_longlong_t)txg);
+	}
+
+	/*
+	 * Make sure that, if there is a write record in the bonus buffer
+	 * of the ZTEST_DIROBJ, that the txg for this record is <= the
+	 * last synced txg of the pool.
+	 */
+
+	VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db));
+	ASSERT3U(db->db_size, ==, sizeof (rbt));
+	bcopy(db->db_data, &rbt, db->db_size);
+	if (rbt.bt_objset != 0) {
+		ASSERT3U(rbt.bt_objset, ==, dmu_objset_id(os));
+		ASSERT3U(rbt.bt_object, ==, ZTEST_DIROBJ);
+		ASSERT3U(rbt.bt_offset, ==, -1ULL);
+		if (rbt.bt_txg > txg) {
+			fatal(0,
+			    "future leak: got %llx, last synced txg is %llx",
+			    rbt.bt_txg, txg);
+		}
+	}
+	dmu_buf_rele(db, FTAG);
+}
+
+void
+ztest_dmu_write_parallel(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	dmu_tx_t *tx;
+	dmu_buf_t *db;
+	int i, b, error, do_free, bs;
+	uint64_t off, txg_how, txg;
+	mutex_t *lp;
+	char osname[MAXNAMELEN];
+	char iobuf[SPA_MAXBLOCKSIZE];
+	ztest_block_tag_t rbt, wbt;
+
+	dmu_objset_name(os, osname);
+	bs = ZTEST_DIROBJ_BLOCKSIZE;
+
+	/*
+	 * Have multiple threads write to large offsets in ZTEST_DIROBJ
+	 * to verify that having multiple threads writing to the same object
+	 * in parallel doesn't cause any trouble.
+	 * Also do parallel writes to the bonus buffer on occasion.
+	 */
+	for (i = 0; i < 50; i++) {
+		b = ztest_random(ZTEST_SYNC_LOCKS);
+		lp = &ztest_shared->zs_sync_lock[b];
+
+		do_free = (ztest_random(4) == 0);
+
+		off = za->za_diroff_shared + ((uint64_t)b << SPA_MAXBLOCKSHIFT);
+
+		if (ztest_random(4) == 0) {
+			/*
+			 * Do the bonus buffer instead of a regular block.
+			 */
+			do_free = 0;
+			off = -1ULL;
+		}
+
+		tx = dmu_tx_create(os);
+
+		if (off == -1ULL)
+			dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
+		else if (do_free)
+			dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs);
+		else
+			dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs);
+
+		txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
+		error = dmu_tx_assign(tx, txg_how);
+		if (error) {
+			if (error == ERESTART) {
+				ASSERT(txg_how == TXG_NOWAIT);
+				dmu_tx_wait(tx);
+				dmu_tx_abort(tx);
+				continue;
+			}
+			dmu_tx_abort(tx);
+			ztest_record_enospc("dmu write parallel");
+			return;
+		}
+		txg = dmu_tx_get_txg(tx);
+
+		if (do_free) {
+			(void) mutex_lock(lp);
+			VERIFY(0 == dmu_free_range(os, ZTEST_DIROBJ, off,
+			    bs, tx));
+			(void) mutex_unlock(lp);
+			dmu_tx_commit(tx);
+			continue;
+		}
+
+		wbt.bt_objset = dmu_objset_id(os);
+		wbt.bt_object = ZTEST_DIROBJ;
+		wbt.bt_offset = off;
+		wbt.bt_txg = txg;
+		wbt.bt_thread = za->za_instance;
+
+		if (off == -1ULL) {
+			wbt.bt_seq = 0;
+			VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ,
+			    FTAG, &db));
+			ASSERT3U(db->db_size, ==, sizeof (wbt));
+			bcopy(db->db_data, &rbt, db->db_size);
+			if (rbt.bt_objset != 0) {
+				ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
+				ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
+				ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
+				ASSERT3U(rbt.bt_txg, <=, wbt.bt_txg);
+			}
+			dmu_buf_will_dirty(db, tx);
+			bcopy(&wbt, db->db_data, db->db_size);
+			dmu_buf_rele(db, FTAG);
+			dmu_tx_commit(tx);
+			continue;
+		}
+
+		(void) mutex_lock(lp);
+
+		wbt.bt_seq = ztest_shared->zs_seq[b]++;
+
+		dmu_write(os, ZTEST_DIROBJ, off, sizeof (wbt), &wbt, tx);
+
+		(void) mutex_unlock(lp);
+
+		if (ztest_random(100) == 0)
+			(void) poll(NULL, 0, 1); /* open dn_notxholds window */
+
+		dmu_tx_commit(tx);
+
+		if (ztest_random(1000) == 0)
+			txg_wait_synced(dmu_objset_pool(os), txg);
+
+		if (ztest_random(2) == 0) {
+			blkptr_t blk = { 0 };
+			uint64_t blkoff;
+			zbookmark_t zb;
+
+			(void) mutex_lock(lp);
+			blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
+			error = dmu_buf_hold(os,
+			    ZTEST_DIROBJ, blkoff, FTAG, &db);
+			if (error) {
+				dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
+				    osname, ZTEST_DIROBJ, blkoff, error);
+				(void) mutex_unlock(lp);
+				continue;
+			}
+			blkoff = off - blkoff;
+			error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
+			dmu_buf_rele(db, FTAG);
+			(void) mutex_unlock(lp);
+			if (error) {
+				dprintf("dmu_sync(%s, %d, %llx) = %d\n",
+				    osname, ZTEST_DIROBJ, off, error);
+				continue;
+			}
+
+			if (blk.blk_birth == 0)	{	/* concurrent free */
+				continue;
+			}
+			txg_suspend(dmu_objset_pool(os));
+
+			ASSERT(blk.blk_fill == 1);
+			ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
+			ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
+			ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
+
+			/*
+			 * Read the block that dmu_sync() returned to
+			 * make sure its contents match what we wrote.
+			 * We do this while still txg_suspend()ed to ensure
+			 * that the block can't be reused before we read it.
+			 */
+			zb.zb_objset = dmu_objset_id(os);
+			zb.zb_object = ZTEST_DIROBJ;
+			zb.zb_level = 0;
+			zb.zb_blkid = off / bs;
+			error = zio_wait(zio_read(NULL, dmu_objset_spa(os),
+			    &blk, iobuf, bs, NULL, NULL,
+			    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
+			ASSERT(error == 0);
+
+			txg_resume(dmu_objset_pool(os));
+
+			bcopy(&iobuf[blkoff], &rbt, sizeof (rbt));
+
+			if (rbt.bt_objset == 0)		/* concurrent free */
+				continue;
+
+			ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
+			ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
+			ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
+
+			/*
+			 * The semantic of dmu_sync() is that we always
+			 * push the most recent version of the data,
+			 * so in the face of concurrent updates we may
+			 * see a newer version of the block.  That's OK.
+			 */
+			ASSERT3U(rbt.bt_txg, >=, wbt.bt_txg);
+			if (rbt.bt_thread == wbt.bt_thread)
+				ASSERT3U(rbt.bt_seq, ==, wbt.bt_seq);
+			else
+				ASSERT3U(rbt.bt_seq, >, wbt.bt_seq);
+		}
+	}
+}
+
+/*
+ * Verify that zap_{create,destroy,add,remove,update} work as expected.
+ */
+#define	ZTEST_ZAP_MIN_INTS	1
+#define	ZTEST_ZAP_MAX_INTS	4
+#define	ZTEST_ZAP_MAX_PROPS	1000
+
+void
+ztest_zap(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	uint64_t object;
+	uint64_t txg, last_txg;
+	uint64_t value[ZTEST_ZAP_MAX_INTS];
+	uint64_t zl_ints, zl_intsize, prop;
+	int i, ints;
+	int iters = 100;
+	dmu_tx_t *tx;
+	char propname[100], txgname[100];
+	int error;
+	char osname[MAXNAMELEN];
+	char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
+
+	dmu_objset_name(os, osname);
+
+	/*
+	 * Create a new object if necessary, and record it in the directory.
+	 */
+	VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+	    sizeof (uint64_t), &object));
+
+	if (object == 0) {
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
+		    sizeof (uint64_t));
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("create zap test obj");
+			dmu_tx_abort(tx);
+			return;
+		}
+		object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
+		if (error) {
+			fatal(0, "zap_create('%s', %llu) = %d",
+			    osname, object, error);
+		}
+		ASSERT(object != 0);
+		dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
+		    sizeof (uint64_t), &object, tx);
+		/*
+		 * Generate a known hash collision, and verify that
+		 * we can lookup and remove both entries.
+		 */
+		for (i = 0; i < 2; i++) {
+			value[i] = i;
+			error = zap_add(os, object, hc[i], sizeof (uint64_t),
+			    1, &value[i], tx);
+			ASSERT3U(error, ==, 0);
+		}
+		for (i = 0; i < 2; i++) {
+			error = zap_add(os, object, hc[i], sizeof (uint64_t),
+			    1, &value[i], tx);
+			ASSERT3U(error, ==, EEXIST);
+			error = zap_length(os, object, hc[i],
+			    &zl_intsize, &zl_ints);
+			ASSERT3U(error, ==, 0);
+			ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+			ASSERT3U(zl_ints, ==, 1);
+		}
+		for (i = 0; i < 2; i++) {
+			error = zap_remove(os, object, hc[i], tx);
+			ASSERT3U(error, ==, 0);
+		}
+
+		dmu_tx_commit(tx);
+	}
+
+	ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
+
+	while (--iters >= 0) {
+		prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
+		(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
+		(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
+		bzero(value, sizeof (value));
+		last_txg = 0;
+
+		/*
+		 * If these zap entries already exist, validate their contents.
+		 */
+		error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
+		if (error == 0) {
+			ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+			ASSERT3U(zl_ints, ==, 1);
+
+			error = zap_lookup(os, object, txgname, zl_intsize,
+			    zl_ints, &last_txg);
+
+			ASSERT3U(error, ==, 0);
+
+			error = zap_length(os, object, propname, &zl_intsize,
+			    &zl_ints);
+
+			ASSERT3U(error, ==, 0);
+			ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+			ASSERT3U(zl_ints, ==, ints);
+
+			error = zap_lookup(os, object, propname, zl_intsize,
+			    zl_ints, value);
+
+			ASSERT3U(error, ==, 0);
+
+			for (i = 0; i < ints; i++) {
+				ASSERT3U(value[i], ==, last_txg + object + i);
+			}
+		} else {
+			ASSERT3U(error, ==, ENOENT);
+		}
+
+		/*
+		 * Atomically update two entries in our zap object.
+		 * The first is named txg_%llu, and contains the txg
+		 * in which the property was last updated.  The second
+		 * is named prop_%llu, and the nth element of its value
+		 * should be txg + object + n.
+		 */
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_zap(tx, object, TRUE, NULL);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("create zap entry");
+			dmu_tx_abort(tx);
+			return;
+		}
+		txg = dmu_tx_get_txg(tx);
+
+		if (last_txg > txg)
+			fatal(0, "zap future leak: old %llu new %llu",
+			    last_txg, txg);
+
+		for (i = 0; i < ints; i++)
+			value[i] = txg + object + i;
+
+		error = zap_update(os, object, txgname, sizeof (uint64_t),
+		    1, &txg, tx);
+		if (error)
+			fatal(0, "zap_update('%s', %llu, '%s') = %d",
+			    osname, object, txgname, error);
+
+		error = zap_update(os, object, propname, sizeof (uint64_t),
+		    ints, value, tx);
+		if (error)
+			fatal(0, "zap_update('%s', %llu, '%s') = %d",
+			    osname, object, propname, error);
+
+		dmu_tx_commit(tx);
+
+		/*
+		 * Remove a random pair of entries.
+		 */
+		prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
+		(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
+		(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
+
+		error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
+
+		if (error == ENOENT)
+			continue;
+
+		ASSERT3U(error, ==, 0);
+
+		tx = dmu_tx_create(os);
+		dmu_tx_hold_zap(tx, object, TRUE, NULL);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			ztest_record_enospc("remove zap entry");
+			dmu_tx_abort(tx);
+			return;
+		}
+		error = zap_remove(os, object, txgname, tx);
+		if (error)
+			fatal(0, "zap_remove('%s', %llu, '%s') = %d",
+			    osname, object, txgname, error);
+
+		error = zap_remove(os, object, propname, tx);
+		if (error)
+			fatal(0, "zap_remove('%s', %llu, '%s') = %d",
+			    osname, object, propname, error);
+
+		dmu_tx_commit(tx);
+	}
+
+	/*
+	 * Once in a while, destroy the object.
+	 */
+	if (ztest_random(100) != 0)
+		return;
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
+	dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		ztest_record_enospc("destroy zap object");
+		dmu_tx_abort(tx);
+		return;
+	}
+	error = zap_destroy(os, object, tx);
+	if (error)
+		fatal(0, "zap_destroy('%s', %llu) = %d",
+		    osname, object, error);
+	object = 0;
+	dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
+	    &object, tx);
+	dmu_tx_commit(tx);
+}
+
+void
+ztest_zap_parallel(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
+	int iters = 100;
+	dmu_tx_t *tx;
+	int i, namelen, error;
+	char name[20], string_value[20];
+	void *data;
+
+	while (--iters >= 0) {
+		/*
+		 * Generate a random name of the form 'xxx.....' where each
+		 * x is a random printable character and the dots are dots.
+		 * There are 94 such characters, and the name length goes from
+		 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
+		 */
+		namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
+
+		for (i = 0; i < 3; i++)
+			name[i] = '!' + ztest_random('~' - '!' + 1);
+		for (; i < namelen - 1; i++)
+			name[i] = '.';
+		name[i] = '\0';
+
+		if (ztest_random(2) == 0)
+			object = ZTEST_MICROZAP_OBJ;
+		else
+			object = ZTEST_FATZAP_OBJ;
+
+		if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
+			wsize = sizeof (txg);
+			wc = 1;
+			data = &txg;
+		} else {
+			wsize = 1;
+			wc = namelen;
+			data = string_value;
+		}
+
+		count = -1ULL;
+		VERIFY(zap_count(os, object, &count) == 0);
+		ASSERT(count != -1ULL);
+
+		/*
+		 * Select an operation: length, lookup, add, update, remove.
+		 */
+		i = ztest_random(5);
+
+		if (i >= 2) {
+			tx = dmu_tx_create(os);
+			dmu_tx_hold_zap(tx, object, TRUE, NULL);
+			error = dmu_tx_assign(tx, TXG_WAIT);
+			if (error) {
+				ztest_record_enospc("zap parallel");
+				dmu_tx_abort(tx);
+				return;
+			}
+			txg = dmu_tx_get_txg(tx);
+			bcopy(name, string_value, namelen);
+		} else {
+			tx = NULL;
+			txg = 0;
+			bzero(string_value, namelen);
+		}
+
+		switch (i) {
+
+		case 0:
+			error = zap_length(os, object, name, &zl_wsize, &zl_wc);
+			if (error == 0) {
+				ASSERT3U(wsize, ==, zl_wsize);
+				ASSERT3U(wc, ==, zl_wc);
+			} else {
+				ASSERT3U(error, ==, ENOENT);
+			}
+			break;
+
+		case 1:
+			error = zap_lookup(os, object, name, wsize, wc, data);
+			if (error == 0) {
+				if (data == string_value &&
+				    bcmp(name, data, namelen) != 0)
+					fatal(0, "name '%s' != val '%s' len %d",
+					    name, data, namelen);
+			} else {
+				ASSERT3U(error, ==, ENOENT);
+			}
+			break;
+
+		case 2:
+			error = zap_add(os, object, name, wsize, wc, data, tx);
+			ASSERT(error == 0 || error == EEXIST);
+			break;
+
+		case 3:
+			VERIFY(zap_update(os, object, name, wsize, wc,
+			    data, tx) == 0);
+			break;
+
+		case 4:
+			error = zap_remove(os, object, name, tx);
+			ASSERT(error == 0 || error == ENOENT);
+			break;
+		}
+
+		if (tx != NULL)
+			dmu_tx_commit(tx);
+	}
+}
+
+void
+ztest_dsl_prop_get_set(ztest_args_t *za)
+{
+	objset_t *os = za->za_os;
+	int i, inherit;
+	uint64_t value;
+	const char *prop, *valname;
+	char setpoint[MAXPATHLEN];
+	char osname[MAXNAMELEN];
+	int error;
+
+	(void) rw_rdlock(&ztest_shared->zs_name_lock);
+
+	dmu_objset_name(os, osname);
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0) {
+			prop = "checksum";
+			value = ztest_random_checksum();
+			inherit = (value == ZIO_CHECKSUM_INHERIT);
+		} else {
+			prop = "compression";
+			value = ztest_random_compress();
+			inherit = (value == ZIO_COMPRESS_INHERIT);
+		}
+
+		error = dsl_prop_set(osname, prop, sizeof (value),
+		    !inherit, &value);
+
+		if (error == ENOSPC) {
+			ztest_record_enospc("dsl_prop_set");
+			break;
+		}
+
+		ASSERT3U(error, ==, 0);
+
+		VERIFY3U(dsl_prop_get(osname, prop, sizeof (value),
+		    1, &value, setpoint), ==, 0);
+
+		if (i == 0)
+			valname = zio_checksum_table[value].ci_name;
+		else
+			valname = zio_compress_table[value].ci_name;
+
+		if (zopt_verbose >= 6) {
+			(void) printf("%s %s = %s for '%s'\n",
+			    osname, prop, valname, setpoint);
+		}
+	}
+
+	(void) rw_unlock(&ztest_shared->zs_name_lock);
+}
+
+static void
+ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg)
+{
+	int c;
+
+	for (c = 0; c < vd->vdev_children; c++)
+		ztest_error_setup(vd->vdev_child[c], mode, mask, arg);
+
+	if (vd->vdev_path != NULL) {
+		vd->vdev_fault_mode = mode;
+		vd->vdev_fault_mask = mask;
+		vd->vdev_fault_arg = arg;
+	}
+}
+
+/*
+ * Inject random faults into the on-disk data.
+ */
+void
+ztest_fault_inject(ztest_args_t *za)
+{
+	int fd;
+	uint64_t offset;
+	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+	uint64_t bad = 0x1990c0ffeedecadeULL;
+	uint64_t top, leaf;
+	char path0[MAXPATHLEN];
+	char pathrand[MAXPATHLEN];
+	size_t fsize;
+	spa_t *spa = dmu_objset_spa(za->za_os);
+	int bshift = SPA_MAXBLOCKSHIFT + 2;	/* don't scrog all labels */
+	int iters = 1000;
+	vdev_t *vd0;
+	uint64_t guid0 = 0;
+
+	/*
+	 * We can't inject faults when we have no fault tolerance.
+	 */
+	if (zopt_maxfaults == 0)
+		return;
+
+	ASSERT(leaves >= 2);
+
+	/*
+	 * Pick a random top-level vdev.
+	 */
+	spa_config_enter(spa, RW_READER, FTAG);
+	top = ztest_random(spa->spa_root_vdev->vdev_children);
+	spa_config_exit(spa, FTAG);
+
+	/*
+	 * Pick a random leaf.
+	 */
+	leaf = ztest_random(leaves);
+
+	/*
+	 * Generate paths to the first two leaves in this top-level vdev,
+	 * and to the random leaf we selected.  We'll induce transient
+	 * I/O errors and random online/offline activity on leaf 0,
+	 * and we'll write random garbage to the randomly chosen leaf.
+	 */
+	(void) snprintf(path0, sizeof (path0),
+	    ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 0);
+	(void) snprintf(pathrand, sizeof (pathrand),
+	    ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
+
+	dprintf("damaging %s and %s\n", path0, pathrand);
+
+	spa_config_enter(spa, RW_READER, FTAG);
+
+	/*
+	 * If we can tolerate two or more faults, make vd0 fail randomly.
+	 */
+	vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
+	if (vd0 != NULL && zopt_maxfaults >= 2) {
+		guid0 = vd0->vdev_guid;
+		ztest_error_setup(vd0, VDEV_FAULT_COUNT,
+		    (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE), 100);
+	}
+
+	spa_config_exit(spa, FTAG);
+
+	/*
+	 * If we can tolerate two or more faults, randomly online/offline vd0.
+	 */
+	if (zopt_maxfaults >= 2 && guid0 != 0) {
+		if (ztest_random(10) < 6)
+			(void) vdev_offline(spa, guid0, B_TRUE);
+		else
+			(void) vdev_online(spa, guid0);
+	}
+
+	/*
+	 * We have at least single-fault tolerance, so inject data corruption.
+	 */
+	fd = open(pathrand, O_RDWR);
+
+	if (fd == -1)	/* we hit a gap in the device namespace */
+		return;
+
+	fsize = lseek(fd, 0, SEEK_END);
+
+	while (--iters != 0) {
+		offset = ztest_random(fsize / (leaves << bshift)) *
+		    (leaves << bshift) + (leaf << bshift) +
+		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
+
+		if (offset >= fsize)
+			continue;
+
+		if (zopt_verbose >= 6)
+			(void) printf("injecting bad word into %s,"
+			    " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
+
+		if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
+			fatal(1, "can't inject bad word at 0x%llx in %s",
+			    offset, pathrand);
+	}
+
+	(void) close(fd);
+}
+
+/*
+ * Scrub the pool.
+ */
+void
+ztest_scrub(ztest_args_t *za)
+{
+	spa_t *spa = dmu_objset_spa(za->za_os);
+
+	(void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
+	(void) poll(NULL, 0, 1000); /* wait a second, then force a restart */
+	(void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
+}
+
+/*
+ * Rename the pool to a different name and then rename it back.
+ */
+void
+ztest_spa_rename(ztest_args_t *za)
+{
+	char *oldname, *newname;
+	int error;
+	spa_t *spa;
+
+	(void) rw_wrlock(&ztest_shared->zs_name_lock);
+
+	oldname = za->za_pool;
+	newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
+	(void) strcpy(newname, oldname);
+	(void) strcat(newname, "_tmp");
+
+	/*
+	 * Do the rename
+	 */
+	error = spa_rename(oldname, newname);
+	if (error)
+		fatal(0, "spa_rename('%s', '%s') = %d", oldname,
+		    newname, error);
+
+	/*
+	 * Try to open it under the old name, which shouldn't exist
+	 */
+	error = spa_open(oldname, &spa, FTAG);
+	if (error != ENOENT)
+		fatal(0, "spa_open('%s') = %d", oldname, error);
+
+	/*
+	 * Open it under the new name and make sure it's still the same spa_t.
+	 */
+	error = spa_open(newname, &spa, FTAG);
+	if (error != 0)
+		fatal(0, "spa_open('%s') = %d", newname, error);
+
+	ASSERT(spa == dmu_objset_spa(za->za_os));
+	spa_close(spa, FTAG);
+
+	/*
+	 * Rename it back to the original
+	 */
+	error = spa_rename(newname, oldname);
+	if (error)
+		fatal(0, "spa_rename('%s', '%s') = %d", newname,
+		    oldname, error);
+
+	/*
+	 * Make sure it can still be opened
+	 */
+	error = spa_open(oldname, &spa, FTAG);
+	if (error != 0)
+		fatal(0, "spa_open('%s') = %d", oldname, error);
+
+	ASSERT(spa == dmu_objset_spa(za->za_os));
+	spa_close(spa, FTAG);
+
+	umem_free(newname, strlen(newname) + 1);
+
+	(void) rw_unlock(&ztest_shared->zs_name_lock);
+}
+
+
+/*
+ * Completely obliterate one disk.
+ */
+static void
+ztest_obliterate_one_disk(uint64_t vdev)
+{
+	int fd;
+	char dev_name[MAXPATHLEN], copy_name[MAXPATHLEN];
+	size_t fsize;
+
+	if (zopt_maxfaults < 2)
+		return;
+
+	(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+	(void) snprintf(copy_name, MAXPATHLEN, "%s.old", dev_name);
+
+	fd = open(dev_name, O_RDWR);
+
+	if (fd == -1)
+		fatal(1, "can't open %s", dev_name);
+
+	/*
+	 * Determine the size.
+	 */
+	fsize = lseek(fd, 0, SEEK_END);
+
+	(void) close(fd);
+
+	/*
+	 * Rename the old device to dev_name.old (useful for debugging).
+	 */
+	VERIFY(rename(dev_name, copy_name) == 0);
+
+	/*
+	 * Create a new one.
+	 */
+	VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0);
+	VERIFY(ftruncate(fd, fsize) == 0);
+	(void) close(fd);
+}
+
+static void
+ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
+{
+	char dev_name[MAXPATHLEN];
+	nvlist_t *file, *root;
+	int error;
+	uint64_t guid;
+	uint64_t ashift = ztest_get_ashift();
+	vdev_t *vd;
+
+	(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+
+	/*
+	 * Build the nvlist describing dev_name.
+	 */
+	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
+	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
+	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+
+	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
+	VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+	    &file, 1) == 0);
+
+	spa_config_enter(spa, RW_READER, FTAG);
+	if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
+		guid = 0;
+	else
+		guid = vd->vdev_guid;
+	spa_config_exit(spa, FTAG);
+	error = spa_vdev_attach(spa, guid, root, B_TRUE);
+	if (error != 0 &&
+	    error != EBUSY &&
+	    error != ENOTSUP &&
+	    error != ENODEV &&
+	    error != EDOM)
+		fatal(0, "spa_vdev_attach(in-place) = %d", error);
+
+	nvlist_free(file);
+	nvlist_free(root);
+}
+
+static void
+ztest_verify_blocks(char *pool)
+{
+	int status;
+	char zdb[MAXPATHLEN + MAXNAMELEN + 20];
+	char zbuf[1024];
+	char *bin;
+	FILE *fp;
+
+	if (realpath(progname, zdb) == NULL)
+		assert(!"realpath() failed");
+
+	/* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
+	bin = strstr(zdb, "/usr/bin/");
+	if (bin == NULL)
+		bin = zdb;
+	/* LINTED */
+	(void) sprintf(bin, "/usr/sbin/zdb -bc%s%s -U -O %s %s",
+	    zopt_verbose >= 3 ? "s" : "",
+	    zopt_verbose >= 4 ? "v" : "",
+	    ztest_random(2) == 0 ? "pre" : "post", pool);
+
+	if (zopt_verbose >= 5)
+		(void) printf("Executing %s\n", strstr(zdb, "zdb "));
+
+	fp = popen(zdb, "r");
+	assert(fp != NULL);
+
+	while (fgets(zbuf, sizeof (zbuf), fp) != NULL)
+		if (zopt_verbose >= 3)
+			(void) printf("%s", zbuf);
+
+	status = pclose(fp);
+
+	if (status == 0)
+		return;
+
+	ztest_dump_core = 0;
+	if (WIFEXITED(status))
+		fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status));
+	else
+		fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status));
+}
+
+static void
+ztest_walk_pool_directory(char *header)
+{
+	spa_t *spa = NULL;
+
+	if (zopt_verbose >= 6)
+		(void) printf("%s\n", header);
+
+	mutex_enter(&spa_namespace_lock);
+	while ((spa = spa_next(spa)) != NULL)
+		if (zopt_verbose >= 6)
+			(void) printf("\t%s\n", spa_name(spa));
+	mutex_exit(&spa_namespace_lock);
+}
+
+static void
+ztest_spa_import_export(char *oldname, char *newname)
+{
+	nvlist_t *config;
+	uint64_t pool_guid;
+	spa_t *spa;
+	int error;
+
+	if (zopt_verbose >= 4) {
+		(void) printf("import/export: old = %s, new = %s\n",
+		    oldname, newname);
+	}
+
+	/*
+	 * Clean up from previous runs.
+	 */
+	(void) spa_destroy(newname);
+
+	/*
+	 * Get the pool's configuration and guid.
+	 */
+	error = spa_open(oldname, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open('%s') = %d", oldname, error);
+
+	pool_guid = spa_guid(spa);
+	spa_close(spa, FTAG);
+
+	ztest_walk_pool_directory("pools before export");
+
+	/*
+	 * Export it.
+	 */
+	error = spa_export(oldname, &config);
+	if (error)
+		fatal(0, "spa_export('%s') = %d", oldname, error);
+
+	ztest_walk_pool_directory("pools after export");
+
+	/*
+	 * Import it under the new name.
+	 */
+	error = spa_import(newname, config, NULL);
+	if (error)
+		fatal(0, "spa_import('%s') = %d", newname, error);
+
+	ztest_walk_pool_directory("pools after import");
+
+	/*
+	 * Try to import it again -- should fail with EEXIST.
+	 */
+	error = spa_import(newname, config, NULL);
+	if (error != EEXIST)
+		fatal(0, "spa_import('%s') twice", newname);
+
+	/*
+	 * Try to import it under a different name -- should fail with EEXIST.
+	 */
+	error = spa_import(oldname, config, NULL);
+	if (error != EEXIST)
+		fatal(0, "spa_import('%s') under multiple names", newname);
+
+	/*
+	 * Verify that the pool is no longer visible under the old name.
+	 */
+	error = spa_open(oldname, &spa, FTAG);
+	if (error != ENOENT)
+		fatal(0, "spa_open('%s') = %d", newname, error);
+
+	/*
+	 * Verify that we can open and close the pool using the new name.
+	 */
+	error = spa_open(newname, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open('%s') = %d", newname, error);
+	ASSERT(pool_guid == spa_guid(spa));
+	spa_close(spa, FTAG);
+
+	nvlist_free(config);
+}
+
+static void *
+ztest_thread(void *arg)
+{
+	ztest_args_t *za = arg;
+	ztest_shared_t *zs = ztest_shared;
+	hrtime_t now, functime;
+	ztest_info_t *zi;
+	int f;
+
+	while ((now = gethrtime()) < za->za_stop) {
+		/*
+		 * See if it's time to force a crash.
+		 */
+		if (now > za->za_kill) {
+			dmu_tx_t *tx;
+			uint64_t txg;
+
+			mutex_enter(&spa_namespace_lock);
+			tx = dmu_tx_create(za->za_os);
+			VERIFY(0 == dmu_tx_assign(tx, TXG_NOWAIT));
+			txg = dmu_tx_get_txg(tx);
+			dmu_tx_commit(tx);
+			zs->zs_txg = txg;
+			if (zopt_verbose >= 3)
+				(void) printf(
+				    "killing process after txg %lld\n",
+				    (u_longlong_t)txg);
+			txg_wait_synced(dmu_objset_pool(za->za_os), txg);
+			zs->zs_alloc = spa_get_alloc(dmu_objset_spa(za->za_os));
+			zs->zs_space = spa_get_space(dmu_objset_spa(za->za_os));
+			(void) kill(getpid(), SIGKILL);
+		}
+
+		/*
+		 * Pick a random function.
+		 */
+		f = ztest_random(ZTEST_FUNCS);
+		zi = &zs->zs_info[f];
+
+		/*
+		 * Decide whether to call it, based on the requested frequency.
+		 */
+		if (zi->zi_call_target == 0 ||
+		    (double)zi->zi_call_total / zi->zi_call_target >
+		    (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC))
+			continue;
+
+		atomic_add_64(&zi->zi_calls, 1);
+		atomic_add_64(&zi->zi_call_total, 1);
+
+		za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) *
+		    ZTEST_DIRSIZE;
+		za->za_diroff_shared = (1ULL << 63);
+
+		ztest_dmu_write_parallel(za);
+
+		zi->zi_func(za);
+
+		functime = gethrtime() - now;
+
+		atomic_add_64(&zi->zi_call_time, functime);
+
+		if (zopt_verbose >= 4) {
+			Dl_info dli;
+			(void) dladdr((void *)zi->zi_func, &dli);
+			(void) printf("%6.2f sec in %s\n",
+			    (double)functime / NANOSEC, dli.dli_sname);
+		}
+
+		/*
+		 * If we're getting ENOSPC with some regularity, stop.
+		 */
+		if (zs->zs_enospc_count > 10)
+			break;
+	}
+
+	return (NULL);
+}
+
+/*
+ * Kick off threads to run tests on all datasets in parallel.
+ */
+static void
+ztest_run(char *pool)
+{
+	int t, d, error;
+	ztest_shared_t *zs = ztest_shared;
+	ztest_args_t *za;
+	spa_t *spa;
+	char name[100];
+
+	(void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL);
+	(void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL);
+
+	for (t = 0; t < ZTEST_SYNC_LOCKS; t++)
+		(void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL);
+
+	/*
+	 * Destroy one disk before we even start.
+	 * It's mirrored, so everything should work just fine.
+	 * This makes us exercise fault handling very early in spa_load().
+	 */
+	ztest_obliterate_one_disk(0);
+
+	/*
+	 * Verify that the sum of the sizes of all blocks in the pool
+	 * equals the SPA's allocated space total.
+	 */
+	ztest_verify_blocks(pool);
+
+	/*
+	 * Kick off a replacement of the disk we just obliterated.
+	 */
+	kernel_init(FREAD | FWRITE);
+	error = spa_open(pool, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open(%s) = %d", pool, error);
+	ztest_replace_one_disk(spa, 0);
+	if (zopt_verbose >= 5)
+		show_pool_stats(spa);
+	spa_close(spa, FTAG);
+	kernel_fini();
+
+	kernel_init(FREAD | FWRITE);
+
+	/*
+	 * Verify that we can export the pool and reimport it under a
+	 * different name.
+	 */
+	if (ztest_random(2) == 0) {
+		(void) snprintf(name, 100, "%s_import", pool);
+		ztest_spa_import_export(pool, name);
+		ztest_spa_import_export(name, pool);
+	}
+
+	/*
+	 * Verify that we can loop over all pools.
+	 */
+	mutex_enter(&spa_namespace_lock);
+	for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) {
+		if (zopt_verbose > 3) {
+			(void) printf("spa_next: found %s\n", spa_name(spa));
+		}
+	}
+	mutex_exit(&spa_namespace_lock);
+
+	/*
+	 * Open our pool.
+	 */
+	error = spa_open(pool, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open() = %d", error);
+
+	/*
+	 * Verify that we can safely inquire about about any object,
+	 * whether it's allocated or not.  To make it interesting,
+	 * we probe a 5-wide window around each power of two.
+	 * This hits all edge cases, including zero and the max.
+	 */
+	for (t = 0; t < 64; t++) {
+		for (d = -5; d <= 5; d++) {
+			error = dmu_object_info(spa->spa_meta_objset,
+			    (1ULL << t) + d, NULL);
+			ASSERT(error == 0 || error == ENOENT ||
+			    error == EINVAL);
+		}
+	}
+
+	/*
+	 * Now kick off all the tests that run in parallel.
+	 */
+	zs->zs_enospc_count = 0;
+
+	za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL);
+
+	if (zopt_verbose >= 4)
+		(void) printf("starting main threads...\n");
+
+	za[0].za_start = gethrtime();
+	za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC;
+	za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time);
+	za[0].za_kill = za[0].za_stop;
+	if (ztest_random(100) < zopt_killrate)
+		za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC);
+
+	for (t = 0; t < zopt_threads; t++) {
+		d = t % zopt_datasets;
+		if (t < zopt_datasets) {
+			ztest_replay_t zr;
+			int test_future = FALSE;
+			(void) rw_rdlock(&ztest_shared->zs_name_lock);
+			(void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
+			error = dmu_objset_create(name, DMU_OST_OTHER, NULL,
+			    ztest_create_cb, NULL);
+			if (error == EEXIST) {
+				test_future = TRUE;
+			} else if (error != 0) {
+				if (error == ENOSPC) {
+					zs->zs_enospc_count++;
+					(void) rw_unlock(
+					    &ztest_shared->zs_name_lock);
+					break;
+				}
+				fatal(0, "dmu_objset_create(%s) = %d",
+				    name, error);
+			}
+			error = dmu_objset_open(name, DMU_OST_OTHER,
+			    DS_MODE_STANDARD, &za[d].za_os);
+			if (error)
+				fatal(0, "dmu_objset_open('%s') = %d",
+				    name, error);
+			(void) rw_unlock(&ztest_shared->zs_name_lock);
+			if (test_future && ztest_shared->zs_txg > 0)
+				ztest_dmu_check_future_leak(za[d].za_os,
+				    ztest_shared->zs_txg);
+			zr.zr_os = za[d].za_os;
+			zil_replay(zr.zr_os, &zr, &zr.zr_assign,
+			    ztest_replay_vector);
+			za[d].za_zilog = zil_open(za[d].za_os, NULL);
+		}
+		za[t].za_pool = spa_strdup(pool);
+		za[t].za_os = za[d].za_os;
+		za[t].za_zilog = za[d].za_zilog;
+		za[t].za_instance = t;
+		za[t].za_random = ztest_random(-1ULL);
+		za[t].za_start = za[0].za_start;
+		za[t].za_stop = za[0].za_stop;
+		za[t].za_kill = za[0].za_kill;
+
+		error = thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
+		    &za[t].za_thread);
+		if (error)
+			fatal(0, "can't create thread %d: error %d",
+			    t, error);
+	}
+	ztest_shared->zs_txg = 0;
+
+	while (--t >= 0) {
+		error = thr_join(za[t].za_thread, NULL, NULL);
+		if (error)
+			fatal(0, "thr_join(%d) = %d", t, error);
+		if (za[t].za_th)
+			traverse_fini(za[t].za_th);
+		if (t < zopt_datasets) {
+			zil_close(za[t].za_zilog);
+			dmu_objset_close(za[t].za_os);
+		}
+		spa_strfree(za[t].za_pool);
+	}
+
+	umem_free(za, zopt_threads * sizeof (ztest_args_t));
+
+	if (zopt_verbose >= 3)
+		show_pool_stats(spa);
+
+	txg_wait_synced(spa_get_dsl(spa), 0);
+
+	zs->zs_alloc = spa_get_alloc(spa);
+	zs->zs_space = spa_get_space(spa);
+
+	/*
+	 * Did we have out-of-space errors?  If so, destroy a random objset.
+	 */
+	if (zs->zs_enospc_count != 0) {
+		(void) rw_rdlock(&ztest_shared->zs_name_lock);
+		(void) snprintf(name, 100, "%s/%s_%d", pool, pool,
+		    (int)ztest_random(zopt_datasets));
+		if (zopt_verbose >= 3)
+			(void) printf("Destroying %s to free up space\n", name);
+		(void) dmu_objset_find(name, ztest_destroy_cb, NULL,
+		    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+		(void) rw_unlock(&ztest_shared->zs_name_lock);
+	}
+
+	txg_wait_synced(spa_get_dsl(spa), 0);
+
+	/*
+	 * Right before closing the pool, kick off a bunch of async I/O;
+	 * spa_close() should wait for it to complete.
+	 */
+	for (t = 1; t < 50; t++)
+		dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15);
+
+	spa_close(spa, FTAG);
+
+	kernel_fini();
+}
+
+void
+print_time(hrtime_t t, char *timebuf)
+{
+	hrtime_t s = t / NANOSEC;
+	hrtime_t m = s / 60;
+	hrtime_t h = m / 60;
+	hrtime_t d = h / 24;
+
+	s -= m * 60;
+	m -= h * 60;
+	h -= d * 24;
+
+	timebuf[0] = '\0';
+
+	if (d)
+		(void) sprintf(timebuf,
+		    "%llud%02lluh%02llum%02llus", d, h, m, s);
+	else if (h)
+		(void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
+	else if (m)
+		(void) sprintf(timebuf, "%llum%02llus", m, s);
+	else
+		(void) sprintf(timebuf, "%llus", s);
+}
+
+/*
+ * Create a storage pool with the given name and initial vdev size.
+ * Then create the specified number of datasets in the pool.
+ */
+static void
+ztest_init(char *pool)
+{
+	spa_t *spa;
+	int error;
+	nvlist_t *nvroot;
+
+	kernel_init(FREAD | FWRITE);
+
+	/*
+	 * Create the storage pool.
+	 */
+	(void) spa_destroy(pool);
+	ztest_shared->zs_vdev_primaries = 0;
+	nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
+	error = spa_create(pool, nvroot, NULL);
+	nvlist_free(nvroot);
+
+	if (error)
+		fatal(0, "spa_create() = %d", error);
+	error = spa_open(pool, &spa, FTAG);
+	if (error)
+		fatal(0, "spa_open() = %d", error);
+
+	if (zopt_verbose >= 3)
+		show_pool_stats(spa);
+
+	spa_close(spa, FTAG);
+
+	kernel_fini();
+}
+
+int
+main(int argc, char **argv)
+{
+	int kills = 0;
+	int iters = 0;
+	int i, f;
+	ztest_shared_t *zs;
+	ztest_info_t *zi;
+	char timebuf[100];
+	char numbuf[6];
+
+	(void) setvbuf(stdout, NULL, _IOLBF, 0);
+
+	/* Override location of zpool.cache */
+	spa_config_dir = "/tmp";
+
+	ztest_random_fd = open("/dev/urandom", O_RDONLY);
+
+	process_options(argc, argv);
+
+	argc -= optind;
+	argv += optind;
+
+	dprintf_setup(&argc, argv);
+
+	/*
+	 * Blow away any existing copy of zpool.cache
+	 */
+	if (zopt_init != 0)
+		(void) remove("/tmp/zpool.cache");
+
+	zs = ztest_shared = (void *)mmap(0,
+	    P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()),
+	    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	if (zopt_verbose >= 1) {
+		(void) printf("%llu vdevs, %d datasets, %d threads,"
+		    " %llu seconds...\n",
+		    (u_longlong_t)zopt_vdevs, zopt_datasets, zopt_threads,
+		    (u_longlong_t)zopt_time);
+	}
+
+	/*
+	 * Create and initialize our storage pool.
+	 */
+	for (i = 1; i <= zopt_init; i++) {
+		bzero(zs, sizeof (ztest_shared_t));
+		if (zopt_verbose >= 3 && zopt_init != 1)
+			(void) printf("ztest_init(), pass %d\n", i);
+		ztest_init(zopt_pool);
+	}
+
+	/*
+	 * Initialize the call targets for each function.
+	 */
+	for (f = 0; f < ZTEST_FUNCS; f++) {
+		zi = &zs->zs_info[f];
+
+		*zi = ztest_info[f];
+
+		if (*zi->zi_interval == 0)
+			zi->zi_call_target = UINT64_MAX;
+		else
+			zi->zi_call_target = zopt_time / *zi->zi_interval;
+	}
+
+	zs->zs_start_time = gethrtime();
+	zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC;
+
+	/*
+	 * Run the tests in a loop.  These tests include fault injection
+	 * to verify that self-healing data works, and forced crashes
+	 * to verify that we never lose on-disk consistency.
+	 */
+	while (gethrtime() < zs->zs_stop_time) {
+		int status;
+		pid_t pid;
+		char *tmp;
+
+		/*
+		 * Initialize the workload counters for each function.
+		 */
+		for (f = 0; f < ZTEST_FUNCS; f++) {
+			zi = &zs->zs_info[f];
+			zi->zi_calls = 0;
+			zi->zi_call_time = 0;
+		}
+
+		pid = fork();
+
+		if (pid == -1)
+			fatal(1, "fork failed");
+
+		if (pid == 0) {	/* child */
+			struct rlimit rl = { 1024, 1024 };
+			(void) setrlimit(RLIMIT_NOFILE, &rl);
+			(void) enable_extended_FILE_stdio(-1, -1);
+			ztest_run(zopt_pool);
+			exit(0);
+		}
+
+		while (waitpid(pid, &status, 0) != pid)
+			continue;
+
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				(void) fprintf(stderr,
+				    "child exited with code %d\n",
+				    WEXITSTATUS(status));
+				exit(2);
+			}
+		} else if (WIFSIGNALED(status)) {
+			if (WTERMSIG(status) != SIGKILL) {
+				(void) fprintf(stderr,
+				    "child died with signal %d\n",
+				    WTERMSIG(status));
+				exit(3);
+			}
+			kills++;
+		} else {
+			(void) fprintf(stderr, "something strange happened "
+			    "to child\n");
+			exit(4);
+		}
+
+		iters++;
+
+		if (zopt_verbose >= 1) {
+			hrtime_t now = gethrtime();
+
+			now = MIN(now, zs->zs_stop_time);
+			print_time(zs->zs_stop_time - now, timebuf);
+			nicenum(zs->zs_space, numbuf);
+
+			(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
+			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
+			    iters,
+			    WIFEXITED(status) ? "Complete" : "SIGKILL",
+			    (u_longlong_t)zs->zs_enospc_count,
+			    100.0 * zs->zs_alloc / zs->zs_space,
+			    numbuf,
+			    100.0 * (now - zs->zs_start_time) /
+			    (zopt_time * NANOSEC), timebuf);
+		}
+
+		if (zopt_verbose >= 2) {
+			(void) printf("\nWorkload summary:\n\n");
+			(void) printf("%7s %9s   %s\n",
+			    "Calls", "Time", "Function");
+			(void) printf("%7s %9s   %s\n",
+			    "-----", "----", "--------");
+			for (f = 0; f < ZTEST_FUNCS; f++) {
+				Dl_info dli;
+
+				zi = &zs->zs_info[f];
+				print_time(zi->zi_call_time, timebuf);
+				(void) dladdr((void *)zi->zi_func, &dli);
+				(void) printf("%7llu %9s   %s\n",
+				    (u_longlong_t)zi->zi_calls, timebuf,
+				    dli.dli_sname);
+			}
+			(void) printf("\n");
+		}
+
+		/*
+		 * It's possible that we killed a child during a rename test, in
+		 * which case we'll have a 'ztest_tmp' pool lying around instead
+		 * of 'ztest'.  Do a blind rename in case this happened.
+		 */
+		tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL);
+		(void) strcpy(tmp, zopt_pool);
+		(void) strcat(tmp, "_tmp");
+		kernel_init(FREAD | FWRITE);
+		(void) spa_rename(tmp, zopt_pool);
+		kernel_fini();
+		umem_free(tmp, strlen(tmp) + 1);
+	}
+
+	ztest_verify_blocks(zopt_pool);
+
+	if (zopt_verbose >= 1) {
+		(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
+		    kills, iters - kills, (100.0 * kills) / MAX(1, iters));
+	}
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/head/thread.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_THREAD_H
+#define	_THREAD_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <pthread.h>
+#include <assert.h>
+
+/*
+ * Compatibility thread stuff needed for Solaris -> Linux port
+ */
+
+typedef pthread_t thread_t;
+typedef pthread_mutex_t mutex_t;
+typedef pthread_cond_t cond_t;
+typedef pthread_rwlock_t rwlock_t;
+
+#define USYNC_THREAD 0
+
+#define	thr_self()		(unsigned long)pthread_self()
+#define	thr_equal(a,b)		pthread_equal(a,b)
+#define	thr_join(t,d,s)		pthread_join(t,s)
+#define	thr_exit(r)		pthread_exit(r)
+#define	_mutex_init(l,f,a)	pthread_mutex_init(l,NULL)
+#define	_mutex_destroy(l)	pthread_mutex_destroy(l)
+#define	mutex_lock(l)		pthread_mutex_lock(l)
+#define	mutex_trylock(l)	pthread_mutex_trylock(l)
+#define	mutex_unlock(l)		pthread_mutex_unlock(l)
+#define	rwlock_init(l,f,a)	pthread_rwlock_init(l,NULL)
+#define	rwlock_destroy(l)	pthread_rwlock_destroy(l)
+#define	rw_rdlock(l)		pthread_rwlock_rdlock(l)
+#define	rw_wrlock(l)		pthread_rwlock_wrlock(l)
+#define	rw_tryrdlock(l)		pthread_rwlock_tryrdlock(l)
+#define	rw_trywrlock(l)		pthread_rwlock_trywrlock(l)
+#define	rw_unlock(l)		pthread_rwlock_unlock(l)
+#define	cond_init(l,f,a)	pthread_cond_init(l,NULL)
+#define	cond_destroy(l)		pthread_cond_destroy(l)
+#define	cond_wait(l,m)		pthread_cond_wait(l,m)
+#define	cond_signal(l)		pthread_cond_signal(l)
+#define	cond_broadcast(l)	pthread_cond_broadcast(l)
+
+#define THR_BOUND     0x00000001  /* = PTHREAD_SCOPE_SYSTEM */
+#define THR_NEW_LWP   0x00000002
+#define THR_DETACHED  0x00000040  /* = PTHREAD_CREATE_DETACHED */
+#define THR_SUSPENDED 0x00000080
+#define THR_DAEMON    0x00000100
+
+static __inline int
+thr_create(void *stack_base, size_t stack_size, void *(*start_func) (void*),
+    void *arg, long flags, thread_t *new_thread_ID)
+{
+	int ret;
+
+	assert(stack_base == NULL);
+	assert(stack_size == 0);
+	assert((flags & ~THR_BOUND & ~THR_DETACHED) == 0);
+
+	pthread_attr_t attr;
+	pthread_attr_init(&attr);
+
+	if(flags & THR_DETACHED)
+		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+	/* This function ignores the THR_BOUND flag, since NPTL doesn't seem to support PTHREAD_SCOPE_PROCESS */
+
+	ret = pthread_create(new_thread_ID, &attr, start_func, arg);
+
+	pthread_attr_destroy(&attr);
+
+	return (ret);
+}
+
+#endif	/* _THREAD_H */
--- /dev/null
+++ cddl/contrib/opensolaris/head/libintl.h
@@ -0,0 +1,124 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Libintl is a library of advanced internationalization functions. */
+
+#ifndef	_LIBINTL_H
+#define	_LIBINTL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/isa_defs.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * wchar_t is a built-in type in standard C++ and as such is not
+ * defined here when using standard C++. However, the GNU compiler
+ * fixincludes utility nonetheless creates its own version of this
+ * header for use by gcc and g++. In that version it adds a redundant
+ * guard for __cplusplus. To avoid the creation of a gcc/g++ specific
+ * header we need to include the following magic comment:
+ *
+ * we must use the C++ compiler's type
+ *
+ * The above comment should not be removed or changed until GNU
+ * gcc/fixinc/inclhack.def is updated to bypass this header.
+ */
+#if !defined(__cplusplus) || (__cplusplus < 199711L && !defined(__GNUG__))
+#ifndef _WCHAR_T
+#define	_WCHAR_T
+#if defined(_LP64)
+typedef int	wchar_t;
+#else
+typedef long	wchar_t;
+#endif
+#endif	/* !_WCHAR_T */
+#endif	/* !defined(__cplusplus) ... */
+
+#define	TEXTDOMAINMAX	256
+
+#ifdef __STDC__
+extern char *dcgettext(const char *, const char *, const int);
+extern char *dgettext(const char *, const char *);
+extern char *gettext(const char *);
+extern char *textdomain(const char *);
+extern char *bindtextdomain(const char *, const char *);
+
+/*
+ * LI18NUX 2000 Globalization Specification Version 1.0
+ * with Amendment 2
+ */
+extern char *dcngettext(const char *, const char *,
+	const char *, unsigned long int, int);
+extern char *dngettext(const char *, const char *,
+	const char *, unsigned long int);
+extern char *ngettext(const char *, const char *, unsigned long int);
+extern char *bind_textdomain_codeset(const char *, const char *);
+
+/* Word handling functions --- requires dynamic linking */
+/* Warning: these are experimental and subject to change. */
+extern int wdinit(void);
+extern int wdchkind(wchar_t);
+extern int wdbindf(wchar_t, wchar_t, int);
+extern wchar_t *wddelim(wchar_t, wchar_t, int);
+extern wchar_t mcfiller(void);
+extern int mcwrap(void);
+
+#else
+extern char *dcgettext();
+extern char *dgettext();
+extern char *gettext();
+extern char *textdomain();
+extern char *bindtextdomain();
+
+/*
+ * LI18NUX 2000 Globalization Specification Version 1.0
+ * with Amendment 2
+ */
+extern char *dcngettext();
+extern char *dngettext();
+extern char *ngettext();
+extern char *bind_textdomain_codeset();
+
+/* Word handling functions --- requires dynamic linking */
+/* Warning: these are experimental and subject to change. */
+extern int wdinit();
+extern int wdchkind();
+extern int wdbindf();
+extern wchar_t *wddelim();
+extern wchar_t mcfiller();
+extern int mcwrap();
+
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _LIBINTL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/head/atomic.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ATOMIC_H
+#define	_ATOMIC_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/atomic.h>
+
+#endif	/* _ATOMIC_H */
--- /dev/null
+++ cddl/contrib/opensolaris/head/assert.h
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*	Copyright (c) 1988 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ASSERT_H
+#define	_ASSERT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.6.1.4 */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#if defined(__STDC__)
+#if __STDC_VERSION__ - 0 >= 199901L
+extern void __assert_c99(const char *, const char *, int, const char *);
+#else
+extern void __assert(const char *, const char *, int);
+#endif /* __STDC_VERSION__ - 0 >= 199901L */
+#else
+extern void _assert();
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ASSERT_H */
+
+/*
+ * Note that the ANSI C Standard requires all headers to be idempotent except
+ * <assert.h> which is explicitly required not to be idempotent (section 4.1.2).
+ * Therefore, it is by intent that the header guards (#ifndef _ASSERT_H) do
+ * not span this entire file.
+ */
+
+#undef	assert
+
+#ifdef	NDEBUG
+
+#define	assert(EX) ((void)0)
+
+#else
+
+#if defined(__STDC__)
+#if __STDC_VERSION__ - 0 >= 199901L
+#define	assert(EX) (void)((EX) || \
+	(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
+#else
+#define	assert(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
+#endif /* __STDC_VERSION__ - 0 >= 199901L */
+#else
+#define	assert(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
+#endif	/* __STDC__ */
+
+#endif	/* NDEBUG */
--- /dev/null
+++ cddl/contrib/opensolaris/head/stdio_ext.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _STDIO_EXT_H
+#define _STDIO_EXT_H
+
+#define enable_extended_FILE_stdio(x,y) (0)
+
+#endif
--- /dev/null
+++ cddl/contrib/opensolaris/head/synch.h
@@ -0,0 +1,264 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYNCH_H
+#define	_SYNCH_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * synch.h:
+ * definitions needed to use the thread synchronization interface
+ */
+
+#ifndef _ASM
+#include <sys/machlock.h>
+#include <sys/time_impl.h>
+#include <sys/synch.h>
+#endif /* _ASM */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ASM
+
+/*
+ * Semaphores
+ */
+typedef struct _sema {
+	/* this structure must be the same as sem_t in <semaphore.h> */
+	uint32_t	count;		/* semaphore count */
+	uint16_t	type;
+	uint16_t	magic;
+	upad64_t	pad1[3];	/* reserved for a mutex_t */
+	upad64_t 	pad2[2];	/* reserved for a cond_t */
+} sema_t;
+
+/*
+ * POSIX.1c Note:
+ * POSIX.1c requires that <pthread.h> define the structures pthread_mutex_t
+ * and pthread_cond_t.  These structures are identical to mutex_t (lwp_mutex_t)
+ * and cond_t (lwp_cond_t) which are defined in <synch.h>.  A nested included
+ * of <synch.h> (to allow a "#typedef mutex_t  pthread_mutex_t") would pull in
+ * non-posix symbols/constants violating the namespace restrictions.  Hence,
+ * pthread_mutex_t/pthread_cond_t have been redefined in <pthread.h> (actually
+ * in <sys/types.h>).  Any modifications done to mutex_t/lwp_mutex_t or
+ * cond_t/lwp_cond_t should also be done to pthread_mutex_t/pthread_cond_t.
+ */
+typedef lwp_mutex_t mutex_t;
+typedef lwp_cond_t cond_t;
+
+/*
+ * Readers/writer locks
+ *
+ * NOTE: The layout of this structure should be kept in sync with the layout
+ * of the correponding structure of pthread_rwlock_t in sys/types.h.
+ * Also, there is an identical structure for lwp_rwlock_t in <sys/synch.h>.
+ * Because we have to deal with C++, we cannot redefine this one as that one.
+ */
+typedef struct _rwlock {
+	int32_t		readers;	/* -1 == writer else # of readers */
+	uint16_t	type;
+	uint16_t	magic;
+	mutex_t		mutex;		/* used to indicate ownership */
+	cond_t		readercv;	/* unused */
+	cond_t		writercv;	/* unused */
+} rwlock_t;
+
+#ifdef	__STDC__
+int	_lwp_mutex_lock(lwp_mutex_t *);
+int	_lwp_mutex_unlock(lwp_mutex_t *);
+int	_lwp_mutex_trylock(lwp_mutex_t *);
+int	_lwp_cond_wait(lwp_cond_t *, lwp_mutex_t *);
+int	_lwp_cond_timedwait(lwp_cond_t *, lwp_mutex_t *, timespec_t *);
+int	_lwp_cond_reltimedwait(lwp_cond_t *, lwp_mutex_t *, timespec_t *);
+int	_lwp_cond_signal(lwp_cond_t *);
+int	_lwp_cond_broadcast(lwp_cond_t *);
+int	_lwp_sema_init(lwp_sema_t *, int);
+int	_lwp_sema_wait(lwp_sema_t *);
+int	_lwp_sema_trywait(lwp_sema_t *);
+int	_lwp_sema_post(lwp_sema_t *);
+int	cond_init(cond_t *, int, void *);
+int	cond_destroy(cond_t *);
+int	cond_wait(cond_t *, mutex_t *);
+int	cond_timedwait(cond_t *, mutex_t *, const timespec_t *);
+int	cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *);
+int	cond_signal(cond_t *);
+int	cond_broadcast(cond_t *);
+int	mutex_init(mutex_t *, int, void *);
+int	mutex_destroy(mutex_t *);
+int	mutex_lock(mutex_t *);
+int	mutex_trylock(mutex_t *);
+int	mutex_unlock(mutex_t *);
+int	rwlock_init(rwlock_t *, int, void *);
+int	rwlock_destroy(rwlock_t *);
+int	rw_rdlock(rwlock_t *);
+int	rw_wrlock(rwlock_t *);
+int	rw_unlock(rwlock_t *);
+int	rw_tryrdlock(rwlock_t *);
+int	rw_trywrlock(rwlock_t *);
+int	sema_init(sema_t *, unsigned int, int, void *);
+int	sema_destroy(sema_t *);
+int	sema_wait(sema_t *);
+int	sema_timedwait(sema_t *, const timespec_t *);
+int	sema_reltimedwait(sema_t *, const timespec_t *);
+int	sema_post(sema_t *);
+int	sema_trywait(sema_t *);
+
+#else	/* __STDC__ */
+
+int	_lwp_mutex_lock();
+int	_lwp_mutex_unlock();
+int	_lwp_mutex_trylock();
+int	_lwp_cond_wait();
+int	_lwp_cond_timedwait();
+int	_lwp_cond_reltimedwait();
+int	_lwp_cond_signal();
+int	_lwp_cond_broadcast();
+int	_lwp_sema_init();
+int	_lwp_sema_wait();
+int	_lwp_sema_trywait();
+int	_lwp_sema_post();
+int	cond_init();
+int	cond_destroy();
+int	cond_wait();
+int	cond_timedwait();
+int	cond_reltimedwait();
+int	cond_signal();
+int	cond_broadcast();
+int	mutex_init();
+int	mutex_destroy();
+int	mutex_lock();
+int	mutex_trylock();
+int	mutex_unlock();
+int	rwlock_init();
+int	rwlock_destroy();
+int	rw_rdlock();
+int	rw_wrlock();
+int	rw_unlock();
+int	rw_tryrdlock();
+int	rw_trywrlock();
+int	sema_init();
+int	sema_destroy();
+int	sema_wait();
+int	sema_timedwait();
+int	sema_reltimedwait();
+int	sema_post();
+int	sema_trywait();
+
+#endif	/* __STDC__ */
+
+#endif /* _ASM */
+
+/* "Magic numbers" tagging synchronization object types */
+#define	MUTEX_MAGIC	_MUTEX_MAGIC
+#define	SEMA_MAGIC	_SEMA_MAGIC
+#define	COND_MAGIC	_COND_MAGIC
+#define	RWL_MAGIC	_RWL_MAGIC
+
+/*
+ * POSIX.1c Note:
+ * DEFAULTMUTEX is defined same as PTHREAD_MUTEX_INITIALIZER in <pthread.h>.
+ * DEFAULTCV is defined same as PTHREAD_COND_INITIALIZER in <pthread.h>.
+ * DEFAULTRWLOCK is defined same as PTHREAD_RWLOCK_INITIALIZER in <pthread.h>.
+ * Any changes to these macros should be reflected in <pthread.h>
+ */
+#define	DEFAULTMUTEX	\
+	{{0, 0, 0, {USYNC_THREAD}, MUTEX_MAGIC}, \
+	{{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
+#define	SHAREDMUTEX	\
+	{{0, 0, 0, {USYNC_PROCESS}, MUTEX_MAGIC}, \
+	{{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
+#define	RECURSIVEMUTEX	\
+	{{0, 0, 0, {USYNC_THREAD|LOCK_RECURSIVE}, MUTEX_MAGIC}, \
+	{{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
+#define	ERRORCHECKMUTEX	\
+	{{0, 0, 0, {USYNC_THREAD|LOCK_ERRORCHECK}, MUTEX_MAGIC}, \
+	{{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
+#define	RECURSIVE_ERRORCHECKMUTEX	\
+	{{0, 0, 0, {USYNC_THREAD|LOCK_RECURSIVE|LOCK_ERRORCHECK}, \
+	MUTEX_MAGIC}, {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
+#define	DEFAULTCV	\
+	{{{0, 0, 0, 0}, USYNC_THREAD, COND_MAGIC}, 0}
+#define	SHAREDCV	\
+	{{{0, 0, 0, 0}, USYNC_PROCESS, COND_MAGIC}, 0}
+#define	DEFAULTSEMA	\
+	{0, USYNC_THREAD, SEMA_MAGIC, {0, 0, 0}, {0, 0}}
+#define	SHAREDSEMA	\
+	{0, USYNC_PROCESS, SEMA_MAGIC, {0, 0, 0}, {0, 0}}
+#define	DEFAULTRWLOCK	\
+	{0, USYNC_THREAD, RWL_MAGIC, DEFAULTMUTEX, DEFAULTCV, DEFAULTCV}
+#define	SHAREDRWLOCK	\
+	{0, USYNC_PROCESS, RWL_MAGIC, SHAREDMUTEX, SHAREDCV, SHAREDCV}
+
+/*
+ * Tests on lock states.
+ */
+#define	SEMA_HELD(x)		_sema_held(x)
+#define	RW_READ_HELD(x)		_rw_read_held(x)
+#define	RW_WRITE_HELD(x)	_rw_write_held(x)
+#define	RW_LOCK_HELD(x)		(RW_READ_HELD(x) || RW_WRITE_HELD(x))
+#define	MUTEX_HELD(x)		_mutex_held(x)
+
+/*
+ * The following definitions are for assertions which can be checked
+ * statically by tools like lock_lint.  You can also define your own
+ * run-time test for each.  If you don't, we define them to 1 so that
+ * such assertions simply pass.
+ */
+#ifndef NO_LOCKS_HELD
+#define	NO_LOCKS_HELD	1
+#endif
+#ifndef NO_COMPETING_THREADS
+#define	NO_COMPETING_THREADS	1
+#endif
+
+#ifndef _ASM
+
+#ifdef	__STDC__
+
+int _sema_held(sema_t *);
+int _rw_read_held(rwlock_t *);
+int _rw_write_held(rwlock_t *);
+int _mutex_held(mutex_t *);
+
+#else	/* __STDC__ */
+
+int _sema_held();
+int _rw_read_held();
+int _rw_write_held();
+int _mutex_held();
+
+#endif	/* __STDC__ */
+
+#endif /* _ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYNCH_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBNVPAIR_H
+#define	_LIBNVPAIR_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/nvpair.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+void nvlist_print(FILE *, nvlist_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBNVPAIR_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
@@ -0,0 +1,266 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <unistd.h>
+#include <strings.h>
+#include "libnvpair.h"
+
+/*
+ * libnvpair - A tools library for manipulating <name, value> pairs.
+ *
+ *	This library provides routines packing an unpacking nv pairs
+ *	for transporting data across process boundaries, transporting
+ *	between kernel and userland, and possibly saving onto disk files.
+ */
+
+static void
+indent(FILE *fp, int depth)
+{
+	while (depth-- > 0)
+		(void) fprintf(fp, "\t");
+}
+
+/*
+ * nvlist_print - Prints elements in an event buffer
+ */
+static
+void
+nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
+{
+	int i;
+	char *name;
+	uint_t nelem;
+	nvpair_t *nvp;
+
+	if (nvl == NULL)
+		return;
+
+	indent(fp, depth);
+	(void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl));
+
+	nvp = nvlist_next_nvpair(nvl, NULL);
+
+	while (nvp) {
+		data_type_t type = nvpair_type(nvp);
+
+		indent(fp, depth);
+		name = nvpair_name(nvp);
+		(void) fprintf(fp, "\t%s =", name);
+		nelem = 0;
+		switch (type) {
+		case DATA_TYPE_BOOLEAN: {
+			(void) fprintf(fp, " 1");
+			break;
+		}
+		case DATA_TYPE_BOOLEAN_VALUE: {
+			boolean_t val;
+			(void) nvpair_value_boolean_value(nvp, &val);
+			(void) fprintf(fp, " %d", val);
+			break;
+		}
+		case DATA_TYPE_BYTE: {
+			uchar_t val;
+			(void) nvpair_value_byte(nvp, &val);
+			(void) fprintf(fp, " 0x%2.2x", val);
+			break;
+		}
+		case DATA_TYPE_INT8: {
+			int8_t val;
+			(void) nvpair_value_int8(nvp, &val);
+			(void) fprintf(fp, " %d", val);
+			break;
+		}
+		case DATA_TYPE_UINT8: {
+			uint8_t val;
+			(void) nvpair_value_uint8(nvp, &val);
+			(void) fprintf(fp, " 0x%x", val);
+			break;
+		}
+		case DATA_TYPE_INT16: {
+			int16_t val;
+			(void) nvpair_value_int16(nvp, &val);
+			(void) fprintf(fp, " %d", val);
+			break;
+		}
+		case DATA_TYPE_UINT16: {
+			uint16_t val;
+			(void) nvpair_value_uint16(nvp, &val);
+			(void) fprintf(fp, " 0x%x", val);
+			break;
+		}
+		case DATA_TYPE_INT32: {
+			int32_t val;
+			(void) nvpair_value_int32(nvp, &val);
+			(void) fprintf(fp, " %d", val);
+			break;
+		}
+		case DATA_TYPE_UINT32: {
+			uint32_t val;
+			(void) nvpair_value_uint32(nvp, &val);
+			(void) fprintf(fp, " 0x%x", val);
+			break;
+		}
+		case DATA_TYPE_INT64: {
+			int64_t val;
+			(void) nvpair_value_int64(nvp, &val);
+			(void) fprintf(fp, " %lld", (longlong_t)val);
+			break;
+		}
+		case DATA_TYPE_UINT64: {
+			uint64_t val;
+			(void) nvpair_value_uint64(nvp, &val);
+			(void) fprintf(fp, " 0x%llx", (u_longlong_t)val);
+			break;
+		}
+		case DATA_TYPE_STRING: {
+			char *val;
+			(void) nvpair_value_string(nvp, &val);
+			(void) fprintf(fp, " %s", val);
+			break;
+		}
+		case DATA_TYPE_BOOLEAN_ARRAY: {
+			boolean_t *val;
+			(void) nvpair_value_boolean_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %d", val[i]);
+			break;
+		}
+		case DATA_TYPE_BYTE_ARRAY: {
+			uchar_t *val;
+			(void) nvpair_value_byte_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " 0x%2.2x", val[i]);
+			break;
+		}
+		case DATA_TYPE_INT8_ARRAY: {
+			int8_t *val;
+			(void) nvpair_value_int8_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %d", val[i]);
+			break;
+		}
+		case DATA_TYPE_UINT8_ARRAY: {
+			uint8_t *val;
+			(void) nvpair_value_uint8_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " 0x%x", val[i]);
+			break;
+		}
+		case DATA_TYPE_INT16_ARRAY: {
+			int16_t *val;
+			(void) nvpair_value_int16_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %d", val[i]);
+			break;
+		}
+		case DATA_TYPE_UINT16_ARRAY: {
+			uint16_t *val;
+			(void) nvpair_value_uint16_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " 0x%x", val[i]);
+			break;
+		}
+		case DATA_TYPE_INT32_ARRAY: {
+			int32_t *val;
+			(void) nvpair_value_int32_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %d", val[i]);
+			break;
+		}
+		case DATA_TYPE_UINT32_ARRAY: {
+			uint32_t *val;
+			(void) nvpair_value_uint32_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " 0x%x", val[i]);
+			break;
+		}
+		case DATA_TYPE_INT64_ARRAY: {
+			int64_t *val;
+			(void) nvpair_value_int64_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %lld", (longlong_t)val[i]);
+			break;
+		}
+		case DATA_TYPE_UINT64_ARRAY: {
+			uint64_t *val;
+			(void) nvpair_value_uint64_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " 0x%llx",
+				    (u_longlong_t)val[i]);
+			break;
+		}
+		case DATA_TYPE_STRING_ARRAY: {
+			char **val;
+			(void) nvpair_value_string_array(nvp, &val, &nelem);
+			for (i = 0; i < nelem; i++)
+				(void) fprintf(fp, " %s", val[i]);
+			break;
+		}
+		case DATA_TYPE_HRTIME: {
+			hrtime_t val;
+			(void) nvpair_value_hrtime(nvp, &val);
+			(void) fprintf(fp, " 0x%llx", val);
+			break;
+		}
+		case DATA_TYPE_NVLIST: {
+			nvlist_t *val;
+			(void) nvpair_value_nvlist(nvp, &val);
+			(void) fprintf(fp, " (embedded nvlist)\n");
+			nvlist_print_with_indent(fp, val, depth + 1);
+			indent(fp, depth + 1);
+			(void) fprintf(fp, "(end %s)\n", name);
+			break;
+		}
+		case DATA_TYPE_NVLIST_ARRAY: {
+			nvlist_t **val;
+			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
+			(void) fprintf(fp, " (array of embedded nvlists)\n");
+			for (i = 0; i < nelem; i++) {
+				indent(fp, depth + 1);
+				(void) fprintf(fp,
+				    "(start %s[%d])\n", name, i);
+				nvlist_print_with_indent(fp, val[i], depth + 1);
+				indent(fp, depth + 1);
+				(void) fprintf(fp, "(end %s[%d])\n", name, i);
+			}
+			break;
+		}
+		default:
+			(void) fprintf(fp, " unknown data type (%d)", type);
+			break;
+		}
+		(void) fprintf(fp, "\n");
+		nvp = nvlist_next_nvpair(nvl, nvp);
+	}
+}
+
+void
+nvlist_print(FILE *fp, nvlist_t *nvl)
+{
+	nvlist_print_with_indent(fp, nvl, 0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/nvpair.h>
+#include <stdlib.h>
+
+/*ARGSUSED*/
+static void *
+nv_alloc_sys(nv_alloc_t *nva, size_t size)
+{
+	return (malloc(size));
+}
+
+/*ARGSUSED*/
+static void
+nv_free_sys(nv_alloc_t *nva, void *buf, size_t size)
+{
+	free(buf);
+}
+
+const nv_alloc_ops_t system_ops_def = {
+	NULL,			/* nv_ao_init() */
+	NULL,			/* nv_ao_fini() */
+	nv_alloc_sys,		/* nv_ao_alloc() */
+	nv_free_sys,		/* nv_ao_free() */
+	NULL			/* nv_ao_reset() */
+};
+
+nv_alloc_t nv_alloc_nosleep_def = {
+	&system_ops_def,
+	NULL
+};
+
+nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c
@@ -0,0 +1,205 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <libintl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <errno.h>
+#include <wchar.h>
+#include <unistd.h>
+
+static const char PNAME_FMT[] = "%s: ";
+static const char ERRNO_FMT[] = ": %s\n";
+
+static const char *pname;
+
+static void
+uu_die_internal(int status, const char *format, va_list alist) __NORETURN;
+
+int uu_exit_ok_value = EXIT_SUCCESS;
+int uu_exit_fatal_value = EXIT_FAILURE;
+int uu_exit_usage_value = 2;
+
+int *
+uu_exit_ok(void)
+{
+	return (&uu_exit_ok_value);
+}
+
+int *
+uu_exit_fatal(void)
+{
+	return (&uu_exit_fatal_value);
+}
+
+int *
+uu_exit_usage(void)
+{
+	return (&uu_exit_usage_value);
+}
+
+void
+uu_alt_exit(int profile)
+{
+	switch (profile) {
+	case UU_PROFILE_DEFAULT:
+		uu_exit_ok_value = EXIT_SUCCESS;
+		uu_exit_fatal_value = EXIT_FAILURE;
+		uu_exit_usage_value = 2;
+		break;
+	case UU_PROFILE_LAUNCHER:
+		uu_exit_ok_value = EXIT_SUCCESS;
+		uu_exit_fatal_value = 124;
+		uu_exit_usage_value = 125;
+		break;
+	}
+}
+
+static void
+uu_warn_internal(int err, const char *format, va_list alist)
+{
+	if (pname != NULL)
+		(void) fprintf(stderr, PNAME_FMT, pname);
+
+	(void) vfprintf(stderr, format, alist);
+
+	if (strrchr(format, '\n') == NULL)
+		(void) fprintf(stderr, ERRNO_FMT, strerror(err));
+}
+
+void
+uu_vwarn(const char *format, va_list alist)
+{
+	uu_warn_internal(errno, format, alist);
+}
+
+/*PRINTFLIKE1*/
+void
+uu_warn(const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_warn_internal(errno, format, alist);
+	va_end(alist);
+}
+
+static void
+uu_die_internal(int status, const char *format, va_list alist)
+{
+	uu_warn_internal(errno, format, alist);
+#ifdef DEBUG
+	{
+		char *cp;
+
+		if (!issetugid()) {
+			cp = getenv("UU_DIE_ABORTS");
+			if (cp != NULL && *cp != '\0')
+				abort();
+		}
+	}
+#endif
+	exit(status);
+}
+
+void
+uu_vdie(const char *format, va_list alist)
+{
+	uu_die_internal(UU_EXIT_FATAL, format, alist);
+}
+
+/*PRINTFLIKE1*/
+void
+uu_die(const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_die_internal(UU_EXIT_FATAL, format, alist);
+	va_end(alist);
+}
+
+void
+uu_vxdie(int status, const char *format, va_list alist)
+{
+	uu_die_internal(status, format, alist);
+}
+
+/*PRINTFLIKE2*/
+void
+uu_xdie(int status, const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_die_internal(status, format, alist);
+	va_end(alist);
+}
+
+const char *
+uu_setpname(char *arg0)
+{
+	/*
+	 * Having a NULL argv[0], while uncommon, is possible.  It
+	 * makes more sense to handle this event in uu_setpname rather
+	 * than in each of its consumers.
+	 */
+	if (arg0 == NULL) {
+		pname = "unknown_command";
+		return (pname);
+	}
+
+	/*
+	 * Guard against '/' at end of command invocation.
+	 */
+	for (;;) {
+		char *p = strrchr(arg0, '/');
+		if (p == NULL) {
+			pname = arg0;
+			break;
+		} else {
+			if (*(p + 1) == '\0') {
+				*p = '\0';
+				continue;
+			}
+
+			pname = p + 1;
+			break;
+		}
+	}
+
+	return (pname);
+}
+
+const char *
+uu_getpname(void)
+{
+	return (pname);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <sys/time.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#ifdef _LP64
+#define	TMPPATHFMT	"%s/uu%ld"
+#else /* _LP64 */
+#define	TMPPATHFMT	"%s/uu%lld"
+#endif /* _LP64 */
+
+/*ARGSUSED*/
+int
+uu_open_tmp(const char *dir, uint_t uflags)
+{
+	int f;
+	char *fname = uu_zalloc(PATH_MAX);
+
+	if (fname == NULL)
+		return (-1);
+
+	for (;;) {
+		(void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime());
+
+		f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
+
+		if (f >= 0 || errno != EEXIST)
+			break;
+	}
+
+	if (f >= 0)
+		(void) unlink(fname);
+
+	uu_free(fname);
+
+	return (f);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <errno.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#define	FACILITY_FMT	"%s (%s): "
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+static const char *
+strseverity(uu_dprintf_severity_t severity)
+{
+	switch (severity) {
+	case UU_DPRINTF_SILENT:
+		return (dgettext(TEXT_DOMAIN, "silent"));
+	case UU_DPRINTF_FATAL:
+		return (dgettext(TEXT_DOMAIN, "FATAL"));
+	case UU_DPRINTF_WARNING:
+		return (dgettext(TEXT_DOMAIN, "WARNING"));
+	case UU_DPRINTF_NOTICE:
+		return (dgettext(TEXT_DOMAIN, "note"));
+	case UU_DPRINTF_INFO:
+		return (dgettext(TEXT_DOMAIN, "info"));
+	case UU_DPRINTF_DEBUG:
+		return (dgettext(TEXT_DOMAIN, "debug"));
+	default:
+		return (dgettext(TEXT_DOMAIN, "unspecified"));
+	}
+}
+
+uu_dprintf_t *
+uu_dprintf_create(const char *name, uu_dprintf_severity_t severity,
+    uint_t flags)
+{
+	uu_dprintf_t *D;
+
+	if (uu_check_name(name, UU_NAME_DOMAIN) == -1) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if ((D = uu_zalloc(sizeof (uu_dprintf_t))) == NULL)
+		return (NULL);
+
+	if (name != NULL) {
+		D->uud_name = strdup(name);
+		if (D->uud_name == NULL) {
+			uu_free(D);
+			return (NULL);
+		}
+	} else {
+		D->uud_name = NULL;
+	}
+
+	D->uud_severity = severity;
+	D->uud_flags = flags;
+
+	return (D);
+}
+
+/*PRINTFLIKE3*/
+void
+uu_dprintf(uu_dprintf_t *D, uu_dprintf_severity_t severity,
+    const char *format, ...)
+{
+	va_list alist;
+
+	/* XXX Assert that severity is not UU_DPRINTF_SILENT. */
+
+	if (severity > D->uud_severity)
+		return;
+
+	(void) fprintf(stderr, FACILITY_FMT, D->uud_name,
+	    strseverity(severity));
+
+	va_start(alist, format);
+	(void) vfprintf(stderr, format, alist);
+	va_end(alist);
+}
+
+void
+uu_dprintf_destroy(uu_dprintf_t *D)
+{
+	if (D->uud_name)
+		free(D->uud_name);
+
+	uu_free(D);
+}
+
+const char *
+uu_dprintf_getname(uu_dprintf_t *D)
+{
+	return (D->uud_name);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_COMMON_H
+#define	_LIBUUTIL_COMMON_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <solaris.h>
+
+/*
+ * We don't bind to the internal libc interfaces if this is a
+ * native build.
+ */
+#ifndef NATIVE_BUILD
+#include "c_synonyms.h"
+#endif
+
+#include <libuutil.h>
+#include <libuutil_impl.h>
+
+#endif	/* _LIBUUTIL_COMMON_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
@@ -0,0 +1,85 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *
+uu_zalloc(size_t n)
+{
+	void *p = malloc(n);
+
+	if (p == NULL) {
+		uu_set_error(UU_ERROR_SYSTEM);
+		return (NULL);
+	}
+
+	(void) memset(p, 0, n);
+
+	return (p);
+}
+
+void
+uu_free(void *p)
+{
+	free(p);
+}
+
+char *
+uu_msprintf(const char *format, ...)
+{
+	va_list args;
+	char attic[1];
+	uint_t M, m;
+	char *b;
+
+	va_start(args, format);
+	M = vsnprintf(attic, 1, format, args);
+	va_end(args);
+
+	for (;;) {
+		m = M;
+		if ((b = uu_zalloc(m + 1)) == NULL)
+			return (NULL);
+
+		va_start(args, format);
+		M = vsnprintf(b, m + 1, format, args);
+		va_end(args);
+
+		if (M == m)
+			break;		/* sizes match */
+
+		uu_free(b);
+	}
+
+	return (b);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c
@@ -0,0 +1,250 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/debug.h>
+#include <thread.h>
+#include <unistd.h>
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * All of the old code under !defined(PTHREAD_ONCE_KEY_NP)
+ * is here to enable the building of a native version of
+ * libuutil.so when the build machine has not yet been upgraded
+ * to a version of libc that provides pthread_key_create_once_np().
+ * It should all be deleted when solaris_nevada ships.
+ * The code is not MT-safe in a relaxed memory model.
+ */
+
+#if defined(PTHREAD_ONCE_KEY_NP)
+static pthread_key_t	uu_error_key = PTHREAD_ONCE_KEY_NP;
+#else	/* PTHREAD_ONCE_KEY_NP */
+static pthread_key_t	uu_error_key = 0;
+static pthread_mutex_t	uu_key_lock = PTHREAD_MUTEX_INITIALIZER;
+#endif	/* PTHREAD_ONCE_KEY_NP */
+
+static int		uu_error_key_setup = 0;
+
+static pthread_mutex_t	uu_panic_lock = PTHREAD_MUTEX_INITIALIZER;
+/* LINTED static unused */
+static const char	*uu_panic_format;
+/* LINTED static unused */
+static va_list		uu_panic_args;
+static pthread_t	uu_panic_thread;
+
+static uint32_t		_uu_main_error;
+
+void
+uu_set_error(uint_t code)
+{
+
+#if defined(PTHREAD_ONCE_KEY_NP)
+	if (pthread_key_create_once_np(&uu_error_key, NULL) != 0)
+		uu_error_key_setup = -1;
+	else
+		uu_error_key_setup = 1;
+#else	/* PTHREAD_ONCE_KEY_NP */
+	if (uu_error_key_setup == 0) {
+		(void) pthread_mutex_lock(&uu_key_lock);
+		if (uu_error_key_setup == 0) {
+			if (pthread_key_create(&uu_error_key, NULL) != 0)
+				uu_error_key_setup = -1;
+			else
+				uu_error_key_setup = 1;
+		}
+		(void) pthread_mutex_unlock(&uu_key_lock);
+	}
+#endif	/* PTHREAD_ONCE_KEY_NP */
+	if (uu_error_key_setup > 0)
+		(void) pthread_setspecific(uu_error_key,
+		    (void *)(uintptr_t)code);
+}
+
+uint32_t
+uu_error(void)
+{
+
+	if (uu_error_key_setup < 0)	/* can't happen? */
+		return (UU_ERROR_UNKNOWN);
+
+	/*
+	 * Because UU_ERROR_NONE == 0, if uu_set_error() was
+	 * never called, then this will return UU_ERROR_NONE:
+	 */
+	return ((uint32_t)(uintptr_t)pthread_getspecific(uu_error_key));
+}
+
+const char *
+uu_strerror(uint32_t code)
+{
+	const char *str;
+
+	switch (code) {
+	case UU_ERROR_NONE:
+		str = dgettext(TEXT_DOMAIN, "No error");
+		break;
+
+	case UU_ERROR_INVALID_ARGUMENT:
+		str = dgettext(TEXT_DOMAIN, "Invalid argument");
+		break;
+
+	case UU_ERROR_UNKNOWN_FLAG:
+		str = dgettext(TEXT_DOMAIN, "Unknown flag passed");
+		break;
+
+	case UU_ERROR_NO_MEMORY:
+		str = dgettext(TEXT_DOMAIN, "Out of memory");
+		break;
+
+	case UU_ERROR_CALLBACK_FAILED:
+		str = dgettext(TEXT_DOMAIN, "Callback-initiated failure");
+		break;
+
+	case UU_ERROR_NOT_SUPPORTED:
+		str = dgettext(TEXT_DOMAIN, "Operation not supported");
+		break;
+
+	case UU_ERROR_EMPTY:
+		str = dgettext(TEXT_DOMAIN, "No value provided");
+		break;
+
+	case UU_ERROR_UNDERFLOW:
+		str = dgettext(TEXT_DOMAIN, "Value too small");
+		break;
+
+	case UU_ERROR_OVERFLOW:
+		str = dgettext(TEXT_DOMAIN, "Value too large");
+		break;
+
+	case UU_ERROR_INVALID_CHAR:
+		str = dgettext(TEXT_DOMAIN,
+		    "Value contains unexpected character");
+		break;
+
+	case UU_ERROR_INVALID_DIGIT:
+		str = dgettext(TEXT_DOMAIN,
+		    "Value contains digit not in base");
+		break;
+
+	case UU_ERROR_SYSTEM:
+		str = dgettext(TEXT_DOMAIN, "Underlying system error");
+		break;
+
+	case UU_ERROR_UNKNOWN:
+		str = dgettext(TEXT_DOMAIN, "Error status not known");
+		break;
+
+	default:
+		errno = ESRCH;
+		str = NULL;
+		break;
+	}
+	return (str);
+}
+
+void
+uu_panic(const char *format, ...)
+{
+	va_list args;
+
+	va_start(args, format);
+
+	(void) pthread_mutex_lock(&uu_panic_lock);
+	if (uu_panic_thread == 0) {
+		uu_panic_thread = pthread_self();
+		uu_panic_format = format;
+		va_copy(uu_panic_args, args);
+	}
+	(void) pthread_mutex_unlock(&uu_panic_lock);
+
+	(void) vfprintf(stderr, format, args);
+
+	if (uu_panic_thread == pthread_self())
+		abort();
+	else
+		for (;;)
+			(void) pause();
+}
+
+int
+assfail(const char *astring, const char *file, int line)
+{
+	__assert(astring, file, line);
+	/*NOTREACHED*/
+	return (0);
+}
+
+static void
+uu_lockup(void)
+{
+	(void) pthread_mutex_lock(&uu_panic_lock);
+#if !defined(PTHREAD_ONCE_KEY_NP)
+	(void) pthread_mutex_lock(&uu_key_lock);
+#endif
+	uu_avl_lockup();
+	uu_list_lockup();
+}
+
+static void
+uu_release(void)
+{
+	(void) pthread_mutex_unlock(&uu_panic_lock);
+#if !defined(PTHREAD_ONCE_KEY_NP)
+	(void) pthread_mutex_unlock(&uu_key_lock);
+#endif
+	uu_avl_release();
+	uu_list_release();
+}
+
+static void
+uu_release_child(void)
+{
+	uu_panic_format = NULL;
+	uu_panic_thread = 0;
+
+	uu_release();
+}
+
+#pragma init(uu_init)
+static void
+uu_init(void)
+{
+	(void) pthread_atfork(uu_lockup, uu_release, uu_release_child);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c
@@ -0,0 +1,567 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/avl.h>
+
+static uu_avl_pool_t	uu_null_apool = { &uu_null_apool, &uu_null_apool };
+static pthread_mutex_t	uu_apool_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * The index mark change on every insert and delete, to catch stale
+ * references.
+ *
+ * We leave the low bit alone, since the avl code uses it.
+ */
+#define	INDEX_MAX		(sizeof (uintptr_t) - 2)
+#define	INDEX_NEXT(m)		(((m) == INDEX_MAX)? 2 : ((m) + 2) & INDEX_MAX)
+
+#define	INDEX_DECODE(i)		((i) & ~INDEX_MAX)
+#define	INDEX_ENCODE(p, n)	(((n) & ~INDEX_MAX) | (p)->ua_index)
+#define	INDEX_VALID(p, i)	(((i) & INDEX_MAX) == (p)->ua_index)
+#define	INDEX_CHECK(i)		(((i) & INDEX_MAX) != 0)
+
+/*
+ * When an element is inactive (not in a tree), we keep a marked pointer to
+ * its containing pool in its first word, and a NULL pointer in its second.
+ *
+ * On insert, we use these to verify that it comes from the correct pool.
+ */
+#define	NODE_ARRAY(p, n)	((uintptr_t *)((uintptr_t)(n) + \
+				    (pp)->uap_nodeoffset))
+
+#define	POOL_TO_MARKER(pp) (((uintptr_t)(pp) | 1))
+
+#define	DEAD_MARKER		0xc4
+
+uu_avl_pool_t *
+uu_avl_pool_create(const char *name, size_t objsize, size_t nodeoffset,
+    uu_compare_fn_t *compare_func, uint32_t flags)
+{
+	uu_avl_pool_t *pp, *next, *prev;
+
+	if (name == NULL ||
+	    uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
+	    nodeoffset + sizeof (uu_avl_node_t) > objsize ||
+	    compare_func == NULL) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if (flags & ~UU_AVL_POOL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	pp = uu_zalloc(sizeof (uu_avl_pool_t));
+	if (pp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	(void) strlcpy(pp->uap_name, name, sizeof (pp->uap_name));
+	pp->uap_nodeoffset = nodeoffset;
+	pp->uap_objsize = objsize;
+	pp->uap_cmp = compare_func;
+	if (flags & UU_AVL_POOL_DEBUG)
+		pp->uap_debug = 1;
+	pp->uap_last_index = 0;
+
+	(void) pthread_mutex_init(&pp->uap_lock, NULL);
+
+	pp->uap_null_avl.ua_next_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
+	pp->uap_null_avl.ua_prev_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
+
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	pp->uap_next = next = &uu_null_apool;
+	pp->uap_prev = prev = next->uap_prev;
+	next->uap_prev = pp;
+	prev->uap_next = pp;
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+
+	return (pp);
+}
+
+void
+uu_avl_pool_destroy(uu_avl_pool_t *pp)
+{
+	if (pp->uap_debug) {
+		if (pp->uap_null_avl.ua_next_enc !=
+		    UU_PTR_ENCODE(&pp->uap_null_avl) ||
+		    pp->uap_null_avl.ua_prev_enc !=
+		    UU_PTR_ENCODE(&pp->uap_null_avl)) {
+			uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has "
+			    "outstanding avls, or is corrupt.\n",
+			    sizeof (pp->uap_name), pp->uap_name, pp);
+		}
+	}
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	pp->uap_next->uap_prev = pp->uap_prev;
+	pp->uap_prev->uap_next = pp->uap_next;
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+	pp->uap_prev = NULL;
+	pp->uap_next = NULL;
+	uu_free(pp);
+}
+
+void
+uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
+{
+	uintptr_t *na = (uintptr_t *)np;
+
+	if (pp->uap_debug) {
+		uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
+		if (offset + sizeof (*np) > pp->uap_objsize) {
+			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't fit in object (size %ld)\n",
+			    base, np, pp, pp->uap_name, offset,
+			    pp->uap_objsize);
+		}
+		if (offset != pp->uap_nodeoffset) {
+			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't match pool's offset (%ld)\n",
+			    base, np, pp, pp->uap_name, offset,
+			    pp->uap_objsize);
+		}
+	}
+
+	na[0] = POOL_TO_MARKER(pp);
+	na[1] = 0;
+}
+
+void
+uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
+{
+	uintptr_t *na = (uintptr_t *)np;
+
+	if (pp->uap_debug) {
+		if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) {
+			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
+			    "node already finied\n",
+			    base, np, pp, pp->uap_name);
+		}
+		if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) {
+			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
+			    "node corrupt, in tree, or in different pool\n",
+			    base, np, pp, pp->uap_name);
+		}
+	}
+
+	na[0] = DEAD_MARKER;
+	na[1] = DEAD_MARKER;
+	na[2] = DEAD_MARKER;
+}
+
+struct uu_avl_node_compare_info {
+	uu_compare_fn_t	*ac_compare;
+	void		*ac_private;
+	void		*ac_right;
+	void		*ac_found;
+};
+
+static int
+uu_avl_node_compare(const void *l, const void *r)
+{
+	struct uu_avl_node_compare_info *info =
+	    (struct uu_avl_node_compare_info *)l;
+
+	int res = info->ac_compare(r, info->ac_right, info->ac_private);
+
+	if (res == 0) {
+		if (info->ac_found == NULL)
+			info->ac_found = (void *)r;
+		return (-1);
+	}
+	if (res < 0)
+		return (1);
+	return (-1);
+}
+
+uu_avl_t *
+uu_avl_create(uu_avl_pool_t *pp, void *parent, uint32_t flags)
+{
+	uu_avl_t *ap, *next, *prev;
+
+	if (flags & ~UU_AVL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	ap = uu_zalloc(sizeof (*ap));
+	if (ap == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	ap->ua_pool = pp;
+	ap->ua_parent_enc = UU_PTR_ENCODE(parent);
+	ap->ua_debug = pp->uap_debug || (flags & UU_AVL_DEBUG);
+	ap->ua_index = (pp->uap_last_index = INDEX_NEXT(pp->uap_last_index));
+
+	avl_create(&ap->ua_tree, &uu_avl_node_compare, pp->uap_objsize,
+	    pp->uap_nodeoffset);
+
+	ap->ua_null_walk.uaw_next = &ap->ua_null_walk;
+	ap->ua_null_walk.uaw_prev = &ap->ua_null_walk;
+
+	(void) pthread_mutex_lock(&pp->uap_lock);
+	next = &pp->uap_null_avl;
+	prev = UU_PTR_DECODE(next->ua_prev_enc);
+	ap->ua_next_enc = UU_PTR_ENCODE(next);
+	ap->ua_prev_enc = UU_PTR_ENCODE(prev);
+	next->ua_prev_enc = UU_PTR_ENCODE(ap);
+	prev->ua_next_enc = UU_PTR_ENCODE(ap);
+	(void) pthread_mutex_unlock(&pp->uap_lock);
+
+	return (ap);
+}
+
+void
+uu_avl_destroy(uu_avl_t *ap)
+{
+	uu_avl_pool_t *pp = ap->ua_pool;
+
+	if (ap->ua_debug) {
+		if (avl_numnodes(&ap->ua_tree) != 0) {
+			uu_panic("uu_avl_destroy(%p): tree not empty\n", ap);
+		}
+		if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk ||
+		    ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) {
+			uu_panic("uu_avl_destroy(%p):  outstanding walkers\n",
+			    ap);
+		}
+	}
+	(void) pthread_mutex_lock(&pp->uap_lock);
+	UU_AVL_PTR(ap->ua_next_enc)->ua_prev_enc = ap->ua_prev_enc;
+	UU_AVL_PTR(ap->ua_prev_enc)->ua_next_enc = ap->ua_next_enc;
+	(void) pthread_mutex_unlock(&pp->uap_lock);
+	ap->ua_prev_enc = UU_PTR_ENCODE(NULL);
+	ap->ua_next_enc = UU_PTR_ENCODE(NULL);
+
+	ap->ua_pool = NULL;
+	avl_destroy(&ap->ua_tree);
+
+	uu_free(ap);
+}
+
+size_t
+uu_avl_numnodes(uu_avl_t *ap)
+{
+	return (avl_numnodes(&ap->ua_tree));
+}
+
+void *
+uu_avl_first(uu_avl_t *ap)
+{
+	return (avl_first(&ap->ua_tree));
+}
+
+void *
+uu_avl_last(uu_avl_t *ap)
+{
+	return (avl_last(&ap->ua_tree));
+}
+
+void *
+uu_avl_next(uu_avl_t *ap, void *node)
+{
+	return (AVL_NEXT(&ap->ua_tree, node));
+}
+
+void *
+uu_avl_prev(uu_avl_t *ap, void *node)
+{
+	return (AVL_PREV(&ap->ua_tree, node));
+}
+
+static void
+_avl_walk_init(uu_avl_walk_t *wp, uu_avl_t *ap, uint32_t flags)
+{
+	uu_avl_walk_t *next, *prev;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
+
+	(void) memset(wp, 0, sizeof (*wp));
+	wp->uaw_avl = ap;
+	wp->uaw_robust = robust;
+	wp->uaw_dir = direction;
+
+	if (direction > 0)
+		wp->uaw_next_result = avl_first(&ap->ua_tree);
+	else
+		wp->uaw_next_result = avl_last(&ap->ua_tree);
+
+	if (ap->ua_debug || robust) {
+		wp->uaw_next = next = &ap->ua_null_walk;
+		wp->uaw_prev = prev = next->uaw_prev;
+		next->uaw_prev = wp;
+		prev->uaw_next = wp;
+	}
+}
+
+static void *
+_avl_walk_advance(uu_avl_walk_t *wp, uu_avl_t *ap)
+{
+	void *np = wp->uaw_next_result;
+
+	avl_tree_t *t = &ap->ua_tree;
+
+	if (np == NULL)
+		return (NULL);
+
+	wp->uaw_next_result = (wp->uaw_dir > 0)? AVL_NEXT(t, np) :
+	    AVL_PREV(t, np);
+
+	return (np);
+}
+
+static void
+_avl_walk_fini(uu_avl_walk_t *wp)
+{
+	if (wp->uaw_next != NULL) {
+		wp->uaw_next->uaw_prev = wp->uaw_prev;
+		wp->uaw_prev->uaw_next = wp->uaw_next;
+		wp->uaw_next = NULL;
+		wp->uaw_prev = NULL;
+	}
+	wp->uaw_avl = NULL;
+	wp->uaw_next_result = NULL;
+}
+
+uu_avl_walk_t *
+uu_avl_walk_start(uu_avl_t *ap, uint32_t flags)
+{
+	uu_avl_walk_t *wp;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	wp = uu_zalloc(sizeof (*wp));
+	if (wp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	_avl_walk_init(wp, ap, flags);
+	return (wp);
+}
+
+void *
+uu_avl_walk_next(uu_avl_walk_t *wp)
+{
+	return (_avl_walk_advance(wp, wp->uaw_avl));
+}
+
+void
+uu_avl_walk_end(uu_avl_walk_t *wp)
+{
+	_avl_walk_fini(wp);
+	uu_free(wp);
+}
+
+int
+uu_avl_walk(uu_avl_t *ap, uu_walk_fn_t *func, void *private, uint32_t flags)
+{
+	void *e;
+	uu_avl_walk_t my_walk;
+
+	int status = UU_WALK_NEXT;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	_avl_walk_init(&my_walk, ap, flags);
+	while (status == UU_WALK_NEXT &&
+	    (e = _avl_walk_advance(&my_walk, ap)) != NULL)
+		status = (*func)(e, private);
+	_avl_walk_fini(&my_walk);
+
+	if (status >= 0)
+		return (0);
+	uu_set_error(UU_ERROR_CALLBACK_FAILED);
+	return (-1);
+}
+
+void
+uu_avl_remove(uu_avl_t *ap, void *elem)
+{
+	uu_avl_walk_t *wp;
+	uu_avl_pool_t *pp = ap->ua_pool;
+	uintptr_t *na = NODE_ARRAY(pp, elem);
+
+	if (ap->ua_debug) {
+		/*
+		 * invalidate outstanding uu_avl_index_ts.
+		 */
+		ap->ua_index = INDEX_NEXT(ap->ua_index);
+	}
+
+	/*
+	 * Robust walkers most be advanced, if we are removing the node
+	 * they are currently using.  In debug mode, non-robust walkers
+	 * are also on the walker list.
+	 */
+	for (wp = ap->ua_null_walk.uaw_next; wp != &ap->ua_null_walk;
+	    wp = wp->uaw_next) {
+		if (wp->uaw_robust) {
+			if (elem == wp->uaw_next_result)
+				(void) _avl_walk_advance(wp, ap);
+		} else if (wp->uaw_next_result != NULL) {
+			uu_panic("uu_avl_remove(%p, %p): active non-robust "
+			    "walker\n", ap, elem);
+		}
+	}
+
+	avl_remove(&ap->ua_tree, elem);
+
+	na[0] = POOL_TO_MARKER(pp);
+	na[1] = 0;
+}
+
+void *
+uu_avl_teardown(uu_avl_t *ap, void **cookie)
+{
+	void *elem = avl_destroy_nodes(&ap->ua_tree, cookie);
+
+	if (elem != NULL) {
+		uu_avl_pool_t *pp = ap->ua_pool;
+		uintptr_t *na = NODE_ARRAY(pp, elem);
+
+		na[0] = POOL_TO_MARKER(pp);
+		na[1] = 0;
+	}
+	return (elem);
+}
+
+void *
+uu_avl_find(uu_avl_t *ap, void *elem, void *private, uu_avl_index_t *out)
+{
+	struct uu_avl_node_compare_info info;
+	void *result;
+
+	info.ac_compare = ap->ua_pool->uap_cmp;
+	info.ac_private = private;
+	info.ac_right = elem;
+	info.ac_found = NULL;
+
+	result = avl_find(&ap->ua_tree, &info, out);
+	if (out != NULL)
+		*out = INDEX_ENCODE(ap, *out);
+
+	if (ap->ua_debug && result != NULL)
+		uu_panic("uu_avl_find: internal error: avl_find succeeded\n");
+
+	return (info.ac_found);
+}
+
+void
+uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx)
+{
+	if (ap->ua_debug) {
+		uu_avl_pool_t *pp = ap->ua_pool;
+		uintptr_t *na = NODE_ARRAY(pp, elem);
+
+		if (na[1] != 0)
+			uu_panic("uu_avl_insert(%p, %p, %p): node already "
+			    "in tree, or corrupt\n",
+			    ap, elem, idx);
+		if (na[0] == 0)
+			uu_panic("uu_avl_insert(%p, %p, %p): node not "
+			    "initialized\n",
+			    ap, elem, idx);
+		if (na[0] != POOL_TO_MARKER(pp))
+			uu_panic("uu_avl_insert(%p, %p, %p): node from "
+			    "other pool, or corrupt\n",
+			    ap, elem, idx);
+
+		if (!INDEX_VALID(ap, idx))
+			uu_panic("uu_avl_insert(%p, %p, %p): %s\n",
+			    ap, elem, idx,
+			    INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+
+		/*
+		 * invalidate outstanding uu_avl_index_ts.
+		 */
+		ap->ua_index = INDEX_NEXT(ap->ua_index);
+	}
+	avl_insert(&ap->ua_tree, elem, INDEX_DECODE(idx));
+}
+
+void *
+uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx)
+{
+	if (ap->ua_debug && !INDEX_VALID(ap, idx))
+		uu_panic("uu_avl_nearest_next(%p, %p): %s\n",
+		    ap, idx, INDEX_CHECK(idx)? "outdated index" :
+		    "invalid index");
+	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER));
+}
+
+void *
+uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx)
+{
+	if (ap->ua_debug && !INDEX_VALID(ap, idx))
+		uu_panic("uu_avl_nearest_prev(%p, %p): %s\n",
+		    ap, idx, INDEX_CHECK(idx)? "outdated index" :
+		    "invalid index");
+	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE));
+}
+
+/*
+ * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
+ */
+void
+uu_avl_lockup(void)
+{
+	uu_avl_pool_t *pp;
+
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
+	    pp = pp->uap_next)
+		(void) pthread_mutex_lock(&pp->uap_lock);
+}
+
+void
+uu_avl_release(void)
+{
+	uu_avl_pool_t *pp;
+
+	for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
+	    pp = pp->uap_next)
+		(void) pthread_mutex_unlock(&pp->uap_lock);
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h
@@ -0,0 +1,181 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_IMPL_H
+#define	_LIBUUTIL_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libuutil.h>
+#include <pthread.h>
+
+#include <sys/avl_impl.h>
+#include <sys/byteorder.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+void uu_set_error(uint_t);
+#pragma rarely_called(uu_set_error)
+
+/*PRINTFLIKE1*/
+void uu_panic(const char *format, ...);
+#pragma rarely_called(uu_panic)
+
+struct uu_dprintf {
+	char	*uud_name;
+	uu_dprintf_severity_t uud_severity;
+	uint_t	uud_flags;
+};
+
+/*
+ * For debugging purposes, libuutil keeps around linked lists of all uu_lists
+ * and uu_avls, along with pointers to their parents.  These can cause false
+ * negatives when looking for memory leaks, so we encode the pointers by
+ * storing them with swapped endianness;  this is not perfect, but it's about
+ * the best we can do without wasting a lot of space.
+ */
+#ifdef _LP64
+#define	UU_PTR_ENCODE(ptr)		BSWAP_64((uintptr_t)(void *)(ptr))
+#else
+#define	UU_PTR_ENCODE(ptr)		BSWAP_32((uintptr_t)(void *)(ptr))
+#endif
+
+#define	UU_PTR_DECODE(ptr)		((void *)UU_PTR_ENCODE(ptr))
+
+/*
+ * uu_list structures
+ */
+typedef struct uu_list_node_impl {
+	struct uu_list_node_impl *uln_next;
+	struct uu_list_node_impl *uln_prev;
+} uu_list_node_impl_t;
+
+struct uu_list_walk {
+	uu_list_walk_t	*ulw_next;
+	uu_list_walk_t	*ulw_prev;
+
+	uu_list_t	*ulw_list;
+	int8_t		ulw_dir;
+	uint8_t		ulw_robust;
+	uu_list_node_impl_t *ulw_next_result;
+};
+
+struct uu_list {
+	uintptr_t	ul_next_enc;
+	uintptr_t	ul_prev_enc;
+
+	uu_list_pool_t	*ul_pool;
+	uintptr_t	ul_parent_enc;	/* encoded parent pointer */
+	size_t		ul_offset;
+	size_t		ul_numnodes;
+	uint8_t		ul_debug;
+	uint8_t		ul_sorted;
+	uint8_t		ul_index;	/* mark for uu_list_index_ts */
+
+	uu_list_node_impl_t ul_null_node;
+	uu_list_walk_t	ul_null_walk;	/* for robust walkers */
+};
+
+#define	UU_LIST_PTR(ptr)		((uu_list_t *)UU_PTR_DECODE(ptr))
+
+#define	UU_LIST_POOL_MAXNAME	64
+
+struct uu_list_pool {
+	uu_list_pool_t	*ulp_next;
+	uu_list_pool_t	*ulp_prev;
+
+	char		ulp_name[UU_LIST_POOL_MAXNAME];
+	size_t		ulp_nodeoffset;
+	size_t		ulp_objsize;
+	uu_compare_fn_t	*ulp_cmp;
+	uint8_t		ulp_debug;
+	uint8_t		ulp_last_index;
+	pthread_mutex_t	ulp_lock;		/* protects null_list */
+	uu_list_t	ulp_null_list;
+};
+
+/*
+ * uu_avl structures
+ */
+typedef struct avl_node		uu_avl_node_impl_t;
+
+struct uu_avl_walk {
+	uu_avl_walk_t	*uaw_next;
+	uu_avl_walk_t	*uaw_prev;
+
+	uu_avl_t	*uaw_avl;
+	void		*uaw_next_result;
+	int8_t		uaw_dir;
+	uint8_t		uaw_robust;
+};
+
+struct uu_avl {
+	uintptr_t	ua_next_enc;
+	uintptr_t	ua_prev_enc;
+
+	uu_avl_pool_t	*ua_pool;
+	uintptr_t	ua_parent_enc;
+	uint8_t		ua_debug;
+	uint8_t		ua_index;	/* mark for uu_avl_index_ts */
+
+	struct avl_tree	ua_tree;
+	uu_avl_walk_t	ua_null_walk;
+};
+
+#define	UU_AVL_PTR(x)		((uu_avl_t *)UU_PTR_DECODE(x))
+
+#define	UU_AVL_POOL_MAXNAME	64
+
+struct uu_avl_pool {
+	uu_avl_pool_t	*uap_next;
+	uu_avl_pool_t	*uap_prev;
+
+	char		uap_name[UU_AVL_POOL_MAXNAME];
+	size_t		uap_nodeoffset;
+	size_t		uap_objsize;
+	uu_compare_fn_t	*uap_cmp;
+	uint8_t		uap_debug;
+	uint8_t		uap_last_index;
+	pthread_mutex_t	uap_lock;		/* protects null_avl */
+	uu_avl_t	uap_null_avl;
+};
+
+/*
+ * atfork() handlers
+ */
+void uu_avl_lockup(void);
+void uu_avl_release(void);
+
+void uu_list_lockup(void);
+void uu_list_release(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBUUTIL_IMPL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c
@@ -0,0 +1,122 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <string.h>
+
+/*
+ * We require names of the form:
+ *	[provider,]identifier[/[provider,]identifier]...
+ *
+ * Where provider is either a stock symbol (SUNW) or a java-style reversed
+ * domain name (com.sun).
+ *
+ * Both providers and identifiers must start with a letter, and may
+ * only contain alphanumerics, dashes, and underlines.  Providers
+ * may also contain periods.
+ *
+ * Note that we do _not_ use the macros in <ctype.h>, since they are affected
+ * by the current locale settings.
+ */
+
+#define	IS_ALPHA(c) \
+	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+
+#define	IS_DIGIT(c) \
+	((c) >= '0' && (c) <= '9')
+
+static int
+is_valid_ident(const char *s, const char *e, int allowdot)
+{
+	char c;
+
+	if (s >= e)
+		return (0);		/* name is empty */
+
+	c = *s++;
+	if (!IS_ALPHA(c))
+		return (0);		/* does not start with letter */
+
+	while (s < e && (c = *s++) != 0) {
+		if (IS_ALPHA(c) || IS_DIGIT(c) || c == '-' || c == '_' ||
+		    (allowdot && c == '.'))
+			continue;
+		return (0);		/* invalid character */
+	}
+	return (1);
+}
+
+static int
+is_valid_component(const char *b, const char *e, uint_t flags)
+{
+	char *sp;
+
+	if (flags & UU_NAME_DOMAIN) {
+		sp = strchr(b, ',');
+		if (sp != NULL && sp < e) {
+			if (!is_valid_ident(b, sp, 1))
+				return (0);
+			b = sp + 1;
+		}
+	}
+
+	return (is_valid_ident(b, e, 0));
+}
+
+int
+uu_check_name(const char *name, uint_t flags)
+{
+	const char *end = name + strlen(name);
+	const char *p;
+
+	if (flags & ~(UU_NAME_DOMAIN | UU_NAME_PATH)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	if (!(flags & UU_NAME_PATH)) {
+		if (!is_valid_component(name, end, flags))
+			goto bad;
+		return (0);
+	}
+
+	while ((p = strchr(name, '/')) != NULL) {
+		if (!is_valid_component(name, p - 1, flags))
+			goto bad;
+		name = p + 1;
+	}
+	if (!is_valid_component(name, end, flags))
+		goto bad;
+
+	return (0);
+
+bad:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c
@@ -0,0 +1,711 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#define	ELEM_TO_NODE(lp, e) \
+	((uu_list_node_impl_t *)((uintptr_t)(e) + (lp)->ul_offset))
+
+#define	NODE_TO_ELEM(lp, n) \
+	((void *)((uintptr_t)(n) - (lp)->ul_offset))
+
+/*
+ * uu_list_index_ts define a location for insertion.  They are simply a
+ * pointer to the object after the insertion point.  We store a mark
+ * in the low-bits of the index, to help prevent mistakes.
+ *
+ * When debugging, the index mark changes on every insert and delete, to
+ * catch stale references.
+ */
+#define	INDEX_MAX		(sizeof (uintptr_t) - 1)
+#define	INDEX_NEXT(m)		(((m) == INDEX_MAX)? 1 : ((m) + 1) & INDEX_MAX)
+
+#define	INDEX_TO_NODE(i)	((uu_list_node_impl_t *)((i) & ~INDEX_MAX))
+#define	NODE_TO_INDEX(p, n)	(((uintptr_t)(n) & ~INDEX_MAX) | (p)->ul_index)
+#define	INDEX_VALID(p, i)	(((i) & INDEX_MAX) == (p)->ul_index)
+#define	INDEX_CHECK(i)		(((i) & INDEX_MAX) != 0)
+
+#define	POOL_TO_MARKER(pp) ((void *)((uintptr_t)(pp) | 1))
+
+static uu_list_pool_t	uu_null_lpool = { &uu_null_lpool, &uu_null_lpool };
+static pthread_mutex_t	uu_lpool_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+uu_list_pool_t *
+uu_list_pool_create(const char *name, size_t objsize,
+    size_t nodeoffset, uu_compare_fn_t *compare_func, uint32_t flags)
+{
+	uu_list_pool_t *pp, *next, *prev;
+
+	if (name == NULL ||
+	    uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
+	    nodeoffset + sizeof (uu_list_node_t) > objsize) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if (flags & ~UU_LIST_POOL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	pp = uu_zalloc(sizeof (uu_list_pool_t));
+	if (pp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	(void) strlcpy(pp->ulp_name, name, sizeof (pp->ulp_name));
+	pp->ulp_nodeoffset = nodeoffset;
+	pp->ulp_objsize = objsize;
+	pp->ulp_cmp = compare_func;
+	if (flags & UU_LIST_POOL_DEBUG)
+		pp->ulp_debug = 1;
+	pp->ulp_last_index = 0;
+
+	(void) pthread_mutex_init(&pp->ulp_lock, NULL);
+
+	pp->ulp_null_list.ul_next_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
+	pp->ulp_null_list.ul_prev_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
+
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	pp->ulp_next = next = &uu_null_lpool;
+	pp->ulp_prev = prev = next->ulp_prev;
+	next->ulp_prev = pp;
+	prev->ulp_next = pp;
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+
+	return (pp);
+}
+
+void
+uu_list_pool_destroy(uu_list_pool_t *pp)
+{
+	if (pp->ulp_debug) {
+		if (pp->ulp_null_list.ul_next_enc !=
+		    UU_PTR_ENCODE(&pp->ulp_null_list) ||
+		    pp->ulp_null_list.ul_prev_enc !=
+		    UU_PTR_ENCODE(&pp->ulp_null_list)) {
+			uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has "
+			    "outstanding lists, or is corrupt.\n",
+			    sizeof (pp->ulp_name), pp->ulp_name, pp);
+		}
+	}
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	pp->ulp_next->ulp_prev = pp->ulp_prev;
+	pp->ulp_prev->ulp_next = pp->ulp_next;
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+	pp->ulp_prev = NULL;
+	pp->ulp_next = NULL;
+	uu_free(pp);
+}
+
+void
+uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
+{
+	uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
+
+	if (pp->ulp_debug) {
+		uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
+		if (offset + sizeof (*np) > pp->ulp_objsize) {
+			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't fit in object (size %ld)\n",
+			    base, np, pp, pp->ulp_name, offset,
+			    pp->ulp_objsize);
+		}
+		if (offset != pp->ulp_nodeoffset) {
+			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't match pool's offset (%ld)\n",
+			    base, np, pp, pp->ulp_name, offset,
+			    pp->ulp_objsize);
+		}
+	}
+	np->uln_next = POOL_TO_MARKER(pp);
+	np->uln_prev = NULL;
+}
+
+void
+uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
+{
+	uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
+
+	if (pp->ulp_debug) {
+		if (np->uln_next == NULL &&
+		    np->uln_prev == NULL) {
+			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
+			    "node already finied\n",
+			    base, np_arg, pp, pp->ulp_name);
+		}
+		if (np->uln_next != POOL_TO_MARKER(pp) ||
+		    np->uln_prev != NULL) {
+			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
+			    "node corrupt or on list\n",
+			    base, np_arg, pp, pp->ulp_name);
+		}
+	}
+	np->uln_next = NULL;
+	np->uln_prev = NULL;
+}
+
+uu_list_t *
+uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags)
+{
+	uu_list_t *lp, *next, *prev;
+
+	if (flags & ~(UU_LIST_DEBUG | UU_LIST_SORTED)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	if ((flags & UU_LIST_SORTED) && pp->ulp_cmp == NULL) {
+		if (pp->ulp_debug)
+			uu_panic("uu_list_create(%p, ...): requested "
+			    "UU_LIST_SORTED, but pool has no comparison func\n",
+			    pp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (NULL);
+	}
+
+	lp = uu_zalloc(sizeof (*lp));
+	if (lp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	lp->ul_pool = pp;
+	lp->ul_parent_enc = UU_PTR_ENCODE(parent);
+	lp->ul_offset = pp->ulp_nodeoffset;
+	lp->ul_debug = pp->ulp_debug || (flags & UU_LIST_DEBUG);
+	lp->ul_sorted = (flags & UU_LIST_SORTED);
+	lp->ul_numnodes = 0;
+	lp->ul_index = (pp->ulp_last_index = INDEX_NEXT(pp->ulp_last_index));
+
+	lp->ul_null_node.uln_next = &lp->ul_null_node;
+	lp->ul_null_node.uln_prev = &lp->ul_null_node;
+
+	lp->ul_null_walk.ulw_next = &lp->ul_null_walk;
+	lp->ul_null_walk.ulw_prev = &lp->ul_null_walk;
+
+	(void) pthread_mutex_lock(&pp->ulp_lock);
+	next = &pp->ulp_null_list;
+	prev = UU_PTR_DECODE(next->ul_prev_enc);
+	lp->ul_next_enc = UU_PTR_ENCODE(next);
+	lp->ul_prev_enc = UU_PTR_ENCODE(prev);
+	next->ul_prev_enc = UU_PTR_ENCODE(lp);
+	prev->ul_next_enc = UU_PTR_ENCODE(lp);
+	(void) pthread_mutex_unlock(&pp->ulp_lock);
+
+	return (lp);
+}
+
+void
+uu_list_destroy(uu_list_t *lp)
+{
+	uu_list_pool_t *pp = lp->ul_pool;
+
+	if (lp->ul_debug) {
+		if (lp->ul_null_node.uln_next != &lp->ul_null_node ||
+		    lp->ul_null_node.uln_prev != &lp->ul_null_node) {
+			uu_panic("uu_list_destroy(%p):  list not empty\n",
+			    lp);
+		}
+		if (lp->ul_numnodes != 0) {
+			uu_panic("uu_list_destroy(%p):  numnodes is nonzero, "
+			    "but list is empty\n", lp);
+		}
+		if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk ||
+		    lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) {
+			uu_panic("uu_list_destroy(%p):  outstanding walkers\n",
+			    lp);
+		}
+	}
+
+	(void) pthread_mutex_lock(&pp->ulp_lock);
+	UU_LIST_PTR(lp->ul_next_enc)->ul_prev_enc = lp->ul_prev_enc;
+	UU_LIST_PTR(lp->ul_prev_enc)->ul_next_enc = lp->ul_next_enc;
+	(void) pthread_mutex_unlock(&pp->ulp_lock);
+	lp->ul_prev_enc = UU_PTR_ENCODE(NULL);
+	lp->ul_next_enc = UU_PTR_ENCODE(NULL);
+	lp->ul_pool = NULL;
+	uu_free(lp);
+}
+
+static void
+list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev,
+    uu_list_node_impl_t *next)
+{
+	if (lp->ul_debug) {
+		if (next->uln_prev != prev || prev->uln_next != next)
+			uu_panic("insert(%p): internal error: %p and %p not "
+			    "neighbors\n", lp, next, prev);
+
+		if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) ||
+		    np->uln_prev != NULL) {
+			uu_panic("insert(%p): elem %p node %p corrupt, "
+			    "not initialized, or already in a list.\n",
+			    lp, NODE_TO_ELEM(lp, np), np);
+		}
+		/*
+		 * invalidate outstanding uu_list_index_ts.
+		 */
+		lp->ul_index = INDEX_NEXT(lp->ul_index);
+	}
+	np->uln_next = next;
+	np->uln_prev = prev;
+	next->uln_prev = np;
+	prev->uln_next = np;
+
+	lp->ul_numnodes++;
+}
+
+void
+uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np;
+
+	np = INDEX_TO_NODE(idx);
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_insert(%p, %p, %p): %s\n",
+			    lp, elem, idx,
+			    INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert(%p, %p, %p): out-of-date "
+			    "index\n", lp, elem, idx);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
+}
+
+void *
+uu_list_find(uu_list_t *lp, void *elem, void *private, uu_list_index_t *out)
+{
+	int sorted = lp->ul_sorted;
+	uu_compare_fn_t *func = lp->ul_pool->ulp_cmp;
+	uu_list_node_impl_t *np;
+
+	if (func == NULL) {
+		if (out != NULL)
+			*out = 0;
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (NULL);
+	}
+	for (np = lp->ul_null_node.uln_next; np != &lp->ul_null_node;
+	    np = np->uln_next) {
+		void *ep = NODE_TO_ELEM(lp, np);
+		int cmp = func(ep, elem, private);
+		if (cmp == 0) {
+			if (out != NULL)
+				*out = NODE_TO_INDEX(lp, np);
+			return (ep);
+		}
+		if (sorted && cmp > 0) {
+			if (out != NULL)
+				*out = NODE_TO_INDEX(lp, np);
+			return (NULL);
+		}
+	}
+	if (out != NULL)
+		*out = NODE_TO_INDEX(lp, 0);
+	return (NULL);
+}
+
+void *
+uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
+
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_nearest_next(%p, %p): %s\n",
+			    lp, idx, INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_nearest_next(%p, %p): out-of-date "
+			    "index\n", lp, idx);
+	}
+
+	if (np == &lp->ul_null_node)
+		return (NULL);
+	else
+		return (NODE_TO_ELEM(lp, np));
+}
+
+void *
+uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
+
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_nearest_prev(%p, %p): %s\n",
+			    lp, idx, INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_nearest_prev(%p, %p): out-of-date "
+			    "index\n", lp, idx);
+	}
+
+	if ((np = np->uln_prev) == &lp->ul_null_node)
+		return (NULL);
+	else
+		return (NODE_TO_ELEM(lp, np));
+}
+
+static void
+list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags)
+{
+	uu_list_walk_t *next, *prev;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
+
+	(void) memset(wp, 0, sizeof (*wp));
+	wp->ulw_list = lp;
+	wp->ulw_robust = robust;
+	wp->ulw_dir = direction;
+	if (direction > 0)
+		wp->ulw_next_result = lp->ul_null_node.uln_next;
+	else
+		wp->ulw_next_result = lp->ul_null_node.uln_prev;
+
+	if (lp->ul_debug || robust) {
+		wp->ulw_next = next = &lp->ul_null_walk;
+		wp->ulw_prev = prev = next->ulw_prev;
+		next->ulw_prev = wp;
+		prev->ulw_next = wp;
+	}
+}
+
+static uu_list_node_impl_t *
+list_walk_advance(uu_list_walk_t *wp, uu_list_t *lp)
+{
+	uu_list_node_impl_t *np = wp->ulw_next_result;
+	uu_list_node_impl_t *next;
+
+	if (np == &lp->ul_null_node)
+		return (NULL);
+
+	next = (wp->ulw_dir > 0)? np->uln_next : np->uln_prev;
+
+	wp->ulw_next_result = next;
+	return (np);
+}
+
+static void
+list_walk_fini(uu_list_walk_t *wp)
+{
+	/* GLXXX debugging? */
+	if (wp->ulw_next != NULL) {
+		wp->ulw_next->ulw_prev = wp->ulw_prev;
+		wp->ulw_prev->ulw_next = wp->ulw_next;
+		wp->ulw_next = NULL;
+		wp->ulw_prev = NULL;
+	}
+	wp->ulw_list = NULL;
+	wp->ulw_next_result = NULL;
+}
+
+uu_list_walk_t *
+uu_list_walk_start(uu_list_t *lp, uint32_t flags)
+{
+	uu_list_walk_t *wp;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	wp = uu_zalloc(sizeof (*wp));
+	if (wp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	list_walk_init(wp, lp, flags);
+	return (wp);
+}
+
+void *
+uu_list_walk_next(uu_list_walk_t *wp)
+{
+	uu_list_t *lp = wp->ulw_list;
+	uu_list_node_impl_t *np = list_walk_advance(wp, lp);
+
+	if (np == NULL)
+		return (NULL);
+
+	return (NODE_TO_ELEM(lp, np));
+}
+
+void
+uu_list_walk_end(uu_list_walk_t *wp)
+{
+	list_walk_fini(wp);
+	uu_free(wp);
+}
+
+int
+uu_list_walk(uu_list_t *lp, uu_walk_fn_t *func, void *private, uint32_t flags)
+{
+	uu_list_node_impl_t *np;
+
+	int status = UU_WALK_NEXT;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int reverse = (flags & UU_WALK_REVERSE);
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	if (lp->ul_debug || robust) {
+		uu_list_walk_t my_walk;
+		void *e;
+
+		list_walk_init(&my_walk, lp, flags);
+		while (status == UU_WALK_NEXT &&
+		    (e = uu_list_walk_next(&my_walk)) != NULL)
+			status = (*func)(e, private);
+		list_walk_fini(&my_walk);
+	} else {
+		if (!reverse) {
+			for (np = lp->ul_null_node.uln_next;
+			    status == UU_WALK_NEXT && np != &lp->ul_null_node;
+			    np = np->uln_next) {
+				status = (*func)(NODE_TO_ELEM(lp, np), private);
+			}
+		} else {
+			for (np = lp->ul_null_node.uln_prev;
+			    status == UU_WALK_NEXT && np != &lp->ul_null_node;
+			    np = np->uln_prev) {
+				status = (*func)(NODE_TO_ELEM(lp, np), private);
+			}
+		}
+	}
+	if (status >= 0)
+		return (0);
+	uu_set_error(UU_ERROR_CALLBACK_FAILED);
+	return (-1);
+}
+
+void
+uu_list_remove(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, elem);
+	uu_list_walk_t *wp;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_remove(%p, %p): elem not on list\n",
+			    lp, elem);
+		/*
+		 * invalidate outstanding uu_list_index_ts.
+		 */
+		lp->ul_index = INDEX_NEXT(lp->ul_index);
+	}
+
+	/*
+	 * robust walkers must be advanced.  In debug mode, non-robust
+	 * walkers are also on the list.  If there are any, it's an error.
+	 */
+	for (wp = lp->ul_null_walk.ulw_next; wp != &lp->ul_null_walk;
+	    wp = wp->ulw_next) {
+		if (wp->ulw_robust) {
+			if (np == wp->ulw_next_result)
+				(void) list_walk_advance(wp, lp);
+		} else if (wp->ulw_next_result != NULL) {
+			uu_panic("uu_list_remove(%p, %p): active non-robust "
+			    "walker\n", lp, elem);
+		}
+	}
+
+	np->uln_next->uln_prev = np->uln_prev;
+	np->uln_prev->uln_next = np->uln_next;
+
+	lp->ul_numnodes--;
+
+	np->uln_next = POOL_TO_MARKER(lp->ul_pool);
+	np->uln_prev = NULL;
+}
+
+void *
+uu_list_teardown(uu_list_t *lp, void **cookie)
+{
+	void *ep;
+
+	/*
+	 * XXX: disable list modification until list is empty
+	 */
+	if (lp->ul_debug && *cookie != NULL)
+		uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n", lp,
+		    cookie);
+
+	ep = uu_list_first(lp);
+	if (ep)
+		uu_list_remove(lp, ep);
+	return (ep);
+}
+
+int
+uu_list_insert_before(uu_list_t *lp, void *target, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
+
+	if (target == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert_before(%p, %p, %p): %p is "
+			    "not currently on a list\n",
+			    lp, target, elem, target);
+	}
+	if (lp->ul_sorted) {
+		if (lp->ul_debug)
+			uu_panic("uu_list_insert_before(%p, ...): list is "
+			    "UU_LIST_SORTED\n", lp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (-1);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
+	return (0);
+}
+
+int
+uu_list_insert_after(uu_list_t *lp, void *target, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
+
+	if (target == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert_after(%p, %p, %p): %p is "
+			    "not currently on a list\n",
+			    lp, target, elem, target);
+	}
+	if (lp->ul_sorted) {
+		if (lp->ul_debug)
+			uu_panic("uu_list_insert_after(%p, ...): list is "
+			    "UU_LIST_SORTED\n", lp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (-1);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np, np->uln_next);
+	return (0);
+}
+
+size_t
+uu_list_numnodes(uu_list_t *lp)
+{
+	return (lp->ul_numnodes);
+}
+
+void *
+uu_list_first(uu_list_t *lp)
+{
+	uu_list_node_impl_t *n = lp->ul_null_node.uln_next;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_last(uu_list_t *lp)
+{
+	uu_list_node_impl_t *n = lp->ul_null_node.uln_prev;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_next(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
+
+	n = n->uln_next;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_prev(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
+
+	n = n->uln_prev;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+/*
+ * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
+ */
+void
+uu_list_lockup(void)
+{
+	uu_list_pool_t *pp;
+
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
+	    pp = pp->ulp_next)
+		(void) pthread_mutex_lock(&pp->ulp_lock);
+}
+
+void
+uu_list_release(void)
+{
+	uu_list_pool_t *pp;
+
+	for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
+	    pp = pp->ulp_next)
+		(void) pthread_mutex_unlock(&pp->ulp_lock);
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c
@@ -0,0 +1,300 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <limits.h>
+#include <ctype.h>
+
+#define	MAX_BASE	36
+
+#define	IS_DIGIT(x)	((x) >= '0' && (x) <= '9')
+
+#define	CTOI(x) (((x) >= '0' && (x) <= '9') ? (x) - '0' : \
+	    ((x) >= 'a' && (x) <= 'z') ? (x) + 10 - 'a' : (x) + 10 - 'A')
+
+static int
+strtoint(const char *s_arg, uint64_t *out, uint32_t base, int sign)
+{
+	const unsigned char *s = (const unsigned char *)s_arg;
+
+	uint64_t val = 0;
+	uint64_t multmax;
+
+	unsigned c, i;
+
+	int neg = 0;
+
+	int bad_digit = 0;
+	int bad_char = 0;
+	int overflow = 0;
+
+	if (s == NULL || base == 1 || base > MAX_BASE) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (-1);
+	}
+
+	while ((c = *s) != 0 && isspace(c))
+		s++;
+
+	switch (c) {
+	case '-':
+		if (!sign)
+			overflow = 1;		/* becomes underflow below */
+		neg = 1;
+		/*FALLTHRU*/
+	case '+':
+		c = *++s;
+		break;
+	default:
+		break;
+	}
+
+	if (c == '\0') {
+		uu_set_error(UU_ERROR_EMPTY);
+		return (-1);
+	}
+
+	if (base == 0) {
+		if (c != '0')
+			base = 10;
+		else if (s[1] == 'x' || s[1] == 'X')
+			base = 16;
+		else
+			base = 8;
+	}
+
+	if (base == 16 && c == '0' && (s[1] == 'x' || s[1] == 'X'))
+		c = *(s += 2);
+
+	if ((val = CTOI(c)) >= base) {
+		if (IS_DIGIT(c))
+			bad_digit = 1;
+		else
+			bad_char = 1;
+		val = 0;
+	}
+
+	multmax = (uint64_t)UINT64_MAX / (uint64_t)base;
+
+	for (c = *++s; c != '\0'; c = *++s) {
+		if ((i = CTOI(c)) >= base) {
+			if (isspace(c))
+				break;
+			if (IS_DIGIT(c))
+				bad_digit = 1;
+			else
+				bad_char = 1;
+			i = 0;
+		}
+
+		if (val > multmax)
+			overflow = 1;
+
+		val *= base;
+		if ((uint64_t)UINT64_MAX - val < (uint64_t)i)
+			overflow = 1;
+
+		val += i;
+	}
+
+	while ((c = *s) != 0) {
+		if (!isspace(c))
+			bad_char = 1;
+		s++;
+	}
+
+	if (sign) {
+		if (neg) {
+			if (val > -(uint64_t)INT64_MIN)
+				overflow = 1;
+		} else {
+			if (val > INT64_MAX)
+				overflow = 1;
+		}
+	}
+
+	if (neg)
+		val = -val;
+
+	if (bad_char | bad_digit | overflow) {
+		if (bad_char)
+			uu_set_error(UU_ERROR_INVALID_CHAR);
+		else if (bad_digit)
+			uu_set_error(UU_ERROR_INVALID_DIGIT);
+		else if (overflow) {
+			if (neg)
+				uu_set_error(UU_ERROR_UNDERFLOW);
+			else
+				uu_set_error(UU_ERROR_OVERFLOW);
+		}
+		return (-1);
+	}
+
+	*out = val;
+	return (0);
+}
+
+int
+uu_strtoint(const char *s, void *v, size_t sz, int base,
+    int64_t min, int64_t max)
+{
+	uint64_t val_u;
+	int64_t val;
+
+	if (min > max)
+		goto bad_argument;
+
+	switch (sz) {
+	case 1:
+		if (max > INT8_MAX || min < INT8_MIN)
+			goto bad_argument;
+		break;
+	case 2:
+		if (max > INT16_MAX || min < INT16_MIN)
+			goto bad_argument;
+		break;
+	case 4:
+		if (max > INT32_MAX || min < INT32_MIN)
+			goto bad_argument;
+		break;
+	case 8:
+		if (max > INT64_MAX || min < INT64_MIN)
+			goto bad_argument;
+		break;
+	default:
+		goto bad_argument;
+	}
+
+	if (min == 0 && max == 0) {
+		min = -(1ULL << (8 * sz - 1));
+		max = (1ULL << (8 * sz - 1)) - 1;
+	}
+
+	if (strtoint(s, &val_u, base, 1) == -1)
+		return (-1);
+
+	val = (int64_t)val_u;
+
+	if (val < min) {
+		uu_set_error(UU_ERROR_UNDERFLOW);
+		return (-1);
+	} else if (val > max) {
+		uu_set_error(UU_ERROR_OVERFLOW);
+		return (-1);
+	}
+
+	switch (sz) {
+	case 1:
+		*(int8_t *)v = val;
+		return (0);
+	case 2:
+		*(int16_t *)v = val;
+		return (0);
+	case 4:
+		*(int32_t *)v = val;
+		return (0);
+	case 8:
+		*(int64_t *)v = val;
+		return (0);
+	default:
+		break;		/* fall through to bad_argument */
+	}
+
+bad_argument:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
+
+int
+uu_strtouint(const char *s, void *v, size_t sz, int base,
+    uint64_t min, uint64_t max)
+{
+	uint64_t val;
+
+	if (min > max)
+		goto bad_argument;
+
+	switch (sz) {
+	case 1:
+		if (max > UINT8_MAX)
+			goto bad_argument;
+		break;
+	case 2:
+		if (max > UINT16_MAX)
+			goto bad_argument;
+		break;
+	case 4:
+		if (max > UINT32_MAX)
+			goto bad_argument;
+		break;
+	case 8:
+		if (max > UINT64_MAX)
+			goto bad_argument;
+		break;
+	default:
+		goto bad_argument;
+	}
+
+	if (min == 0 && max == 0) {
+		/* we have to be careful, since << can overflow */
+		max = (1ULL << (8 * sz - 1)) * 2 - 1;
+	}
+
+	if (strtoint(s, &val, base, 0) == -1)
+		return (-1);
+
+	if (val < min) {
+		uu_set_error(UU_ERROR_UNDERFLOW);
+		return (-1);
+	} else if (val > max) {
+		uu_set_error(UU_ERROR_OVERFLOW);
+		return (-1);
+	}
+
+	switch (sz) {
+	case 1:
+		*(uint8_t *)v = val;
+		return (0);
+	case 2:
+		*(uint16_t *)v = val;
+		return (0);
+	case 4:
+		*(uint32_t *)v = val;
+		return (0);
+	case 8:
+		*(uint64_t *)v = val;
+		return (0);
+	default:
+		break;		/* shouldn't happen, fall through */
+	}
+
+bad_argument:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
@@ -0,0 +1,384 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_H
+#define	_LIBUUTIL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <solaris.h>
+#include <sys/types.h>
+#include <stdarg.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Standard flags codes.
+ */
+#define	UU_DEFAULT		0
+
+/*
+ * Standard error codes.
+ */
+#define	UU_ERROR_NONE		0	/* no error */
+#define	UU_ERROR_INVALID_ARGUMENT 1	/* invalid argument */
+#define	UU_ERROR_UNKNOWN_FLAG	2	/* passed flag invalid */
+#define	UU_ERROR_NO_MEMORY	3	/* out of memory */
+#define	UU_ERROR_CALLBACK_FAILED 4	/* callback-initiated error */
+#define	UU_ERROR_NOT_SUPPORTED	5	/* operation not supported */
+#define	UU_ERROR_EMPTY		6	/* no value provided */
+#define	UU_ERROR_UNDERFLOW	7	/* value is too small */
+#define	UU_ERROR_OVERFLOW	8	/* value is too value */
+#define	UU_ERROR_INVALID_CHAR	9	/* value contains unexpected char */
+#define	UU_ERROR_INVALID_DIGIT	10	/* value contains digit not in base */
+
+#define	UU_ERROR_SYSTEM		99	/* underlying system error */
+#define	UU_ERROR_UNKNOWN	100	/* error status not known */
+
+/*
+ * Standard program exit codes.
+ */
+#define	UU_EXIT_OK	(*(uu_exit_ok()))
+#define	UU_EXIT_FATAL	(*(uu_exit_fatal()))
+#define	UU_EXIT_USAGE	(*(uu_exit_usage()))
+
+/*
+ * Exit status profiles.
+ */
+#define	UU_PROFILE_DEFAULT	0
+#define	UU_PROFILE_LAUNCHER	1
+
+/*
+ * Error reporting functions.
+ */
+uint32_t uu_error(void);
+const char *uu_strerror(uint32_t);
+
+/*
+ * Program notification functions.
+ */
+extern void uu_alt_exit(int);
+extern const char *uu_setpname(char *);
+extern const char *uu_getpname(void);
+/*PRINTFLIKE1*/
+extern void uu_warn(const char *, ...);
+extern void uu_vwarn(const char *, va_list);
+/*PRINTFLIKE1*/
+extern void uu_die(const char *, ...) __NORETURN;
+extern void uu_vdie(const char *, va_list) __NORETURN;
+/*PRINTFLIKE2*/
+extern void uu_xdie(int, const char *, ...) __NORETURN;
+extern void uu_vxdie(int, const char *, va_list) __NORETURN;
+
+/*
+ * Exit status functions (not to be used directly)
+ */
+extern int *uu_exit_ok(void);
+extern int *uu_exit_fatal(void);
+extern int *uu_exit_usage(void);
+
+/*
+ * string->number conversions
+ */
+extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t);
+extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t);
+
+/*
+ * Debug print facility functions.
+ */
+typedef struct uu_dprintf uu_dprintf_t;
+
+typedef enum {
+	UU_DPRINTF_SILENT,
+	UU_DPRINTF_FATAL,
+	UU_DPRINTF_WARNING,
+	UU_DPRINTF_NOTICE,
+	UU_DPRINTF_INFO,
+	UU_DPRINTF_DEBUG
+} uu_dprintf_severity_t;
+
+extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t,
+    uint_t);
+/*PRINTFLIKE3*/
+extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t,
+    const char *, ...);
+extern void uu_dprintf_destroy(uu_dprintf_t *);
+extern const char *uu_dprintf_getname(uu_dprintf_t *);
+
+/*
+ * Identifier test flags and function.
+ */
+#define	UU_NAME_DOMAIN		0x1	/* allow SUNW, or com.sun, prefix */
+#define	UU_NAME_PATH		0x2	/* allow '/'-delimited paths */
+
+int uu_check_name(const char *, uint_t);
+
+/*
+ * File creation functions.
+ */
+extern int uu_open_tmp(const char *dir, uint_t uflags);
+
+/*
+ * Convenience functions.
+ */
+/*PRINTFLIKE1*/
+extern char *uu_msprintf(const char *format, ...);
+extern void *uu_zalloc(size_t);
+extern void uu_free(void *);
+
+/*
+ * Comparison function type definition.
+ *   Developers should be careful in their use of the _private argument. If you
+ *   break interface guarantees, you get undefined behavior.
+ */
+typedef int uu_compare_fn_t(const void *__left, const void *__right,
+    void *__private);
+
+/*
+ * Walk variant flags.
+ *   A data structure need not provide support for all variants and
+ *   combinations.  Refer to the appropriate documentation.
+ */
+#define	UU_WALK_ROBUST		0x00000001	/* walk can survive removes */
+#define	UU_WALK_REVERSE		0x00000002	/* reverse walk order */
+
+#define	UU_WALK_PREORDER	0x00000010	/* walk tree in pre-order */
+#define	UU_WALK_POSTORDER	0x00000020	/* walk tree in post-order */
+
+/*
+ * Walk callback function return codes.
+ */
+#define	UU_WALK_ERROR		-1
+#define	UU_WALK_NEXT		0
+#define	UU_WALK_DONE		1
+
+/*
+ * Walk callback function type definition.
+ */
+typedef int uu_walk_fn_t(void *_elem, void *_private);
+
+/*
+ * lists: opaque structures
+ */
+typedef struct uu_list_pool uu_list_pool_t;
+typedef struct uu_list uu_list_t;
+
+typedef struct uu_list_node {
+	uintptr_t uln_opaque[2];
+} uu_list_node_t;
+
+typedef struct uu_list_walk uu_list_walk_t;
+
+typedef uintptr_t uu_list_index_t;
+
+/*
+ * lists: interface
+ *
+ * basic usage:
+ *	typedef struct foo {
+ *		...
+ *		uu_list_node_t foo_node;
+ *		...
+ *	} foo_t;
+ *
+ *	static int
+ *	foo_compare(void *l_arg, void *r_arg, void *private)
+ *	{
+ *		foo_t *l = l_arg;
+ *		foo_t *r = r_arg;
+ *
+ *		if (... l greater than r ...)
+ *			return (1);
+ *		if (... l less than r ...)
+ *			return (-1);
+ *		return (0);
+ *	}
+ *
+ *	...
+ *		// at initialization time
+ *		foo_pool = uu_list_pool_create("foo_pool",
+ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
+ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
+ *	...
+ */
+uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t,
+    uu_compare_fn_t *, uint32_t);
+#define	UU_LIST_POOL_DEBUG	0x00000001
+
+void uu_list_pool_destroy(uu_list_pool_t *);
+
+/*
+ * usage:
+ *
+ *	foo_t *a;
+ *	a = malloc(sizeof(*a));
+ *	uu_list_node_init(a, &a->foo_list, pool);
+ *	...
+ *	uu_list_node_fini(a, &a->foo_list, pool);
+ *	free(a);
+ */
+void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *);
+void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *);
+
+uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t);
+#define	UU_LIST_DEBUG	0x00000001
+#define	UU_LIST_SORTED	0x00000002	/* list is sorted */
+
+void uu_list_destroy(uu_list_t *);	/* list must be empty */
+
+size_t uu_list_numnodes(uu_list_t *);
+
+void *uu_list_first(uu_list_t *);
+void *uu_list_last(uu_list_t *);
+
+void *uu_list_next(uu_list_t *, void *);
+void *uu_list_prev(uu_list_t *, void *);
+
+int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t);
+
+uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t);
+void *uu_list_walk_next(uu_list_walk_t *);
+void uu_list_walk_end(uu_list_walk_t *);
+
+void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *);
+void uu_list_insert(uu_list_t *, void *, uu_list_index_t);
+
+void *uu_list_nearest_next(uu_list_t *, uu_list_index_t);
+void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t);
+
+void *uu_list_teardown(uu_list_t *, void **);
+
+void uu_list_remove(uu_list_t *, void *);
+
+/*
+ * lists: interfaces for non-sorted lists only
+ */
+int uu_list_insert_before(uu_list_t *, void *_target, void *_elem);
+int uu_list_insert_after(uu_list_t *, void *_target, void *_elem);
+
+/*
+ * avl trees: opaque structures
+ */
+typedef struct uu_avl_pool uu_avl_pool_t;
+typedef struct uu_avl uu_avl_t;
+
+typedef struct uu_avl_node {
+#ifdef _LP64
+	uintptr_t uan_opaque[3];
+#else
+	uintptr_t uan_opaque[4];
+#endif
+} uu_avl_node_t;
+
+typedef struct uu_avl_walk uu_avl_walk_t;
+
+typedef uintptr_t uu_avl_index_t;
+
+/*
+ * avl trees: interface
+ *
+ * basic usage:
+ *	typedef struct foo {
+ *		...
+ *		uu_avl_node_t foo_node;
+ *		...
+ *	} foo_t;
+ *
+ *	static int
+ *	foo_compare(void *l_arg, void *r_arg, void *private)
+ *	{
+ *		foo_t *l = l_arg;
+ *		foo_t *r = r_arg;
+ *
+ *		if (... l greater than r ...)
+ *			return (1);
+ *		if (... l less than r ...)
+ *			return (-1);
+ *		return (0);
+ *	}
+ *
+ *	...
+ *		// at initialization time
+ *		foo_pool = uu_avl_pool_create("foo_pool",
+ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
+ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
+ *	...
+ */
+uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t,
+    uu_compare_fn_t *, uint32_t);
+#define	UU_AVL_POOL_DEBUG	0x00000001
+
+void uu_avl_pool_destroy(uu_avl_pool_t *);
+
+/*
+ * usage:
+ *
+ *	foo_t *a;
+ *	a = malloc(sizeof(*a));
+ *	uu_avl_node_init(a, &a->foo_avl, pool);
+ *	...
+ *	uu_avl_node_fini(a, &a->foo_avl, pool);
+ *	free(a);
+ */
+void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *);
+void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *);
+
+uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t);
+#define	UU_AVL_DEBUG	0x00000001
+
+void uu_avl_destroy(uu_avl_t *);	/* list must be empty */
+
+size_t uu_avl_numnodes(uu_avl_t *);
+
+void *uu_avl_first(uu_avl_t *);
+void *uu_avl_last(uu_avl_t *);
+
+void *uu_avl_next(uu_avl_t *, void *);
+void *uu_avl_prev(uu_avl_t *, void *);
+
+int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t);
+
+uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t);
+void *uu_avl_walk_next(uu_avl_walk_t *);
+void uu_avl_walk_end(uu_avl_walk_t *);
+
+void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *);
+void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t);
+
+void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t);
+void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t);
+
+void *uu_avl_teardown(uu_avl_t *, void **);
+
+void uu_avl_remove(uu_avl_t *, void *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBUUTIL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/OPENSOLARIS.LICENSE
@@ -0,0 +1,384 @@
+Unless otherwise noted, all files in this distribution are released
+under the Common Development and Distribution License (CDDL).
+Exceptions are noted within the associated source files.
+
+--------------------------------------------------------------------
+
+
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0
+
+1. Definitions.
+
+    1.1. "Contributor" means each individual or entity that creates
+         or contributes to the creation of Modifications.
+
+    1.2. "Contributor Version" means the combination of the Original
+         Software, prior Modifications used by a Contributor (if any),
+         and the Modifications made by that particular Contributor.
+
+    1.3. "Covered Software" means (a) the Original Software, or (b)
+         Modifications, or (c) the combination of files containing
+         Original Software with files containing Modifications, in
+         each case including portions thereof.
+
+    1.4. "Executable" means the Covered Software in any form other
+         than Source Code.
+
+    1.5. "Initial Developer" means the individual or entity that first
+         makes Original Software available under this License.
+
+    1.6. "Larger Work" means a work which combines Covered Software or
+         portions thereof with code not governed by the terms of this
+         License.
+
+    1.7. "License" means this document.
+
+    1.8. "Licensable" means having the right to grant, to the maximum
+         extent possible, whether at the time of the initial grant or
+         subsequently acquired, any and all of the rights conveyed
+         herein.
+
+    1.9. "Modifications" means the Source Code and Executable form of
+         any of the following:
+
+        A. Any file that results from an addition to, deletion from or
+           modification of the contents of a file containing Original
+           Software or previous Modifications;
+
+        B. Any new file that contains any part of the Original
+           Software or previous Modifications; or
+
+        C. Any new file that is contributed or otherwise made
+           available under the terms of this License.
+
+    1.10. "Original Software" means the Source Code and Executable
+          form of computer software code that is originally released
+          under this License.
+
+    1.11. "Patent Claims" means any patent claim(s), now owned or
+          hereafter acquired, including without limitation, method,
+          process, and apparatus claims, in any patent Licensable by
+          grantor.
+
+    1.12. "Source Code" means (a) the common form of computer software
+          code in which modifications are made and (b) associated
+          documentation included in or with such code.
+
+    1.13. "You" (or "Your") means an individual or a legal entity
+          exercising rights under, and complying with all of the terms
+          of, this License.  For legal entities, "You" includes any
+          entity which controls, is controlled by, or is under common
+          control with You.  For purposes of this definition,
+          "control" means (a) the power, direct or indirect, to cause
+          the direction or management of such entity, whether by
+          contract or otherwise, or (b) ownership of more than fifty
+          percent (50%) of the outstanding shares or beneficial
+          ownership of such entity.
+
+2. License Grants.
+
+    2.1. The Initial Developer Grant.
+
+    Conditioned upon Your compliance with Section 3.1 below and
+    subject to third party intellectual property claims, the Initial
+    Developer hereby grants You a world-wide, royalty-free,
+    non-exclusive license:
+
+        (a) under intellectual property rights (other than patent or
+            trademark) Licensable by Initial Developer, to use,
+            reproduce, modify, display, perform, sublicense and
+            distribute the Original Software (or portions thereof),
+            with or without Modifications, and/or as part of a Larger
+            Work; and
+
+        (b) under Patent Claims infringed by the making, using or
+            selling of Original Software, to make, have made, use,
+            practice, sell, and offer for sale, and/or otherwise
+            dispose of the Original Software (or portions thereof).
+
+        (c) The licenses granted in Sections 2.1(a) and (b) are
+            effective on the date Initial Developer first distributes
+            or otherwise makes the Original Software available to a
+            third party under the terms of this License.
+
+        (d) Notwithstanding Section 2.1(b) above, no patent license is
+            granted: (1) for code that You delete from the Original
+            Software, or (2) for infringements caused by: (i) the
+            modification of the Original Software, or (ii) the
+            combination of the Original Software with other software
+            or devices.
+
+    2.2. Contributor Grant.
+
+    Conditioned upon Your compliance with Section 3.1 below and
+    subject to third party intellectual property claims, each
+    Contributor hereby grants You a world-wide, royalty-free,
+    non-exclusive license:
+
+        (a) under intellectual property rights (other than patent or
+            trademark) Licensable by Contributor to use, reproduce,
+            modify, display, perform, sublicense and distribute the
+            Modifications created by such Contributor (or portions
+            thereof), either on an unmodified basis, with other
+            Modifications, as Covered Software and/or as part of a
+            Larger Work; and
+
+        (b) under Patent Claims infringed by the making, using, or
+            selling of Modifications made by that Contributor either
+            alone and/or in combination with its Contributor Version
+            (or portions of such combination), to make, use, sell,
+            offer for sale, have made, and/or otherwise dispose of:
+            (1) Modifications made by that Contributor (or portions
+            thereof); and (2) the combination of Modifications made by
+            that Contributor with its Contributor Version (or portions
+            of such combination).
+
+        (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
+            effective on the date Contributor first distributes or
+            otherwise makes the Modifications available to a third
+            party.
+
+        (d) Notwithstanding Section 2.2(b) above, no patent license is
+            granted: (1) for any code that Contributor has deleted
+            from the Contributor Version; (2) for infringements caused
+            by: (i) third party modifications of Contributor Version,
+            or (ii) the combination of Modifications made by that
+            Contributor with other software (except as part of the
+            Contributor Version) or other devices; or (3) under Patent
+            Claims infringed by Covered Software in the absence of
+            Modifications made by that Contributor.
+
+3. Distribution Obligations.
+
+    3.1. Availability of Source Code.
+
+    Any Covered Software that You distribute or otherwise make
+    available in Executable form must also be made available in Source
+    Code form and that Source Code form must be distributed only under
+    the terms of this License.  You must include a copy of this
+    License with every copy of the Source Code form of the Covered
+    Software You distribute or otherwise make available.  You must
+    inform recipients of any such Covered Software in Executable form
+    as to how they can obtain such Covered Software in Source Code
+    form in a reasonable manner on or through a medium customarily
+    used for software exchange.
+
+    3.2. Modifications.
+
+    The Modifications that You create or to which You contribute are
+    governed by the terms of this License.  You represent that You
+    believe Your Modifications are Your original creation(s) and/or
+    You have sufficient rights to grant the rights conveyed by this
+    License.
+
+    3.3. Required Notices.
+
+    You must include a notice in each of Your Modifications that
+    identifies You as the Contributor of the Modification.  You may
+    not remove or alter any copyright, patent or trademark notices
+    contained within the Covered Software, or any notices of licensing
+    or any descriptive text giving attribution to any Contributor or
+    the Initial Developer.
+
+    3.4. Application of Additional Terms.
+
+    You may not offer or impose any terms on any Covered Software in
+    Source Code form that alters or restricts the applicable version
+    of this License or the recipients' rights hereunder.  You may
+    choose to offer, and to charge a fee for, warranty, support,
+    indemnity or liability obligations to one or more recipients of
+    Covered Software.  However, you may do so only on Your own behalf,
+    and not on behalf of the Initial Developer or any Contributor.
+    You must make it absolutely clear that any such warranty, support,
+    indemnity or liability obligation is offered by You alone, and You
+    hereby agree to indemnify the Initial Developer and every
+    Contributor for any liability incurred by the Initial Developer or
+    such Contributor as a result of warranty, support, indemnity or
+    liability terms You offer.
+
+    3.5. Distribution of Executable Versions.
+
+    You may distribute the Executable form of the Covered Software
+    under the terms of this License or under the terms of a license of
+    Your choice, which may contain terms different from this License,
+    provided that You are in compliance with the terms of this License
+    and that the license for the Executable form does not attempt to
+    limit or alter the recipient's rights in the Source Code form from
+    the rights set forth in this License.  If You distribute the
+    Covered Software in Executable form under a different license, You
+    must make it absolutely clear that any terms which differ from
+    this License are offered by You alone, not by the Initial
+    Developer or Contributor.  You hereby agree to indemnify the
+    Initial Developer and every Contributor for any liability incurred
+    by the Initial Developer or such Contributor as a result of any
+    such terms You offer.
+
+    3.6. Larger Works.
+
+    You may create a Larger Work by combining Covered Software with
+    other code not governed by the terms of this License and
+    distribute the Larger Work as a single product.  In such a case,
+    You must make sure the requirements of this License are fulfilled
+    for the Covered Software.
+
+4. Versions of the License.
+
+    4.1. New Versions.
+
+    Sun Microsystems, Inc. is the initial license steward and may
+    publish revised and/or new versions of this License from time to
+    time.  Each version will be given a distinguishing version number.
+    Except as provided in Section 4.3, no one other than the license
+    steward has the right to modify this License.
+
+    4.2. Effect of New Versions.
+
+    You may always continue to use, distribute or otherwise make the
+    Covered Software available under the terms of the version of the
+    License under which You originally received the Covered Software.
+    If the Initial Developer includes a notice in the Original
+    Software prohibiting it from being distributed or otherwise made
+    available under any subsequent version of the License, You must
+    distribute and make the Covered Software available under the terms
+    of the version of the License under which You originally received
+    the Covered Software.  Otherwise, You may also choose to use,
+    distribute or otherwise make the Covered Software available under
+    the terms of any subsequent version of the License published by
+    the license steward.
+
+    4.3. Modified Versions.
+
+    When You are an Initial Developer and You want to create a new
+    license for Your Original Software, You may create and use a
+    modified version of this License if You: (a) rename the license
+    and remove any references to the name of the license steward
+    (except to note that the license differs from this License); and
+    (b) otherwise make it clear that the license contains terms which
+    differ from this License.
+
+5. DISCLAIMER OF WARRANTY.
+
+    COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
+    BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+    INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
+    SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
+    PURPOSE OR NON-INFRINGING.  THE ENTIRE RISK AS TO THE QUALITY AND
+    PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU.  SHOULD ANY
+    COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
+    INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
+    NECESSARY SERVICING, REPAIR OR CORRECTION.  THIS DISCLAIMER OF
+    WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE.  NO USE OF
+    ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
+    DISCLAIMER.
+
+6. TERMINATION.
+
+    6.1. This License and the rights granted hereunder will terminate
+    automatically if You fail to comply with terms herein and fail to
+    cure such breach within 30 days of becoming aware of the breach.
+    Provisions which, by their nature, must remain in effect beyond
+    the termination of this License shall survive.
+
+    6.2. If You assert a patent infringement claim (excluding
+    declaratory judgment actions) against Initial Developer or a
+    Contributor (the Initial Developer or Contributor against whom You
+    assert such claim is referred to as "Participant") alleging that
+    the Participant Software (meaning the Contributor Version where
+    the Participant is a Contributor or the Original Software where
+    the Participant is the Initial Developer) directly or indirectly
+    infringes any patent, then any and all rights granted directly or
+    indirectly to You by such Participant, the Initial Developer (if
+    the Initial Developer is not the Participant) and all Contributors
+    under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
+    notice from Participant terminate prospectively and automatically
+    at the expiration of such 60 day notice period, unless if within
+    such 60 day period You withdraw Your claim with respect to the
+    Participant Software against such Participant either unilaterally
+    or pursuant to a written agreement with Participant.
+
+    6.3. In the event of termination under Sections 6.1 or 6.2 above,
+    all end user licenses that have been validly granted by You or any
+    distributor hereunder prior to termination (excluding licenses
+    granted to You by any distributor) shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+    UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+    (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
+    INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
+    COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
+    LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
+    CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
+    LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
+    STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
+    COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
+    INFORMED OF THE POSSIBILITY OF SUCH DAMAGES.  THIS LIMITATION OF
+    LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
+    INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
+    APPLICABLE LAW PROHIBITS SUCH LIMITATION.  SOME JURISDICTIONS DO
+    NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
+    CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
+    APPLY TO YOU.
+
+8. U.S. GOVERNMENT END USERS.
+
+    The Covered Software is a "commercial item," as that term is
+    defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
+    computer software" (as that term is defined at 48
+    C.F.R. 252.227-7014(a)(1)) and "commercial computer software
+    documentation" as such terms are used in 48 C.F.R. 12.212
+    (Sept. 1995).  Consistent with 48 C.F.R. 12.212 and 48
+    C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
+    U.S. Government End Users acquire Covered Software with only those
+    rights set forth herein.  This U.S. Government Rights clause is in
+    lieu of, and supersedes, any other FAR, DFAR, or other clause or
+    provision that addresses Government rights in computer software
+    under this License.
+
+9. MISCELLANEOUS.
+
+    This License represents the complete agreement concerning subject
+    matter hereof.  If any provision of this License is held to be
+    unenforceable, such provision shall be reformed only to the extent
+    necessary to make it enforceable.  This License shall be governed
+    by the law of the jurisdiction specified in a notice contained
+    within the Original Software (except to the extent applicable law,
+    if any, provides otherwise), excluding such jurisdiction's
+    conflict-of-law provisions.  Any litigation relating to this
+    License shall be subject to the jurisdiction of the courts located
+    in the jurisdiction and venue specified in a notice contained
+    within the Original Software, with the losing party responsible
+    for costs, including, without limitation, court costs and
+    reasonable attorneys' fees and expenses.  The application of the
+    United Nations Convention on Contracts for the International Sale
+    of Goods is expressly excluded.  Any law or regulation which
+    provides that the language of a contract shall be construed
+    against the drafter shall not apply to this License.  You agree
+    that You alone are responsible for compliance with the United
+    States export administration regulations (and the export control
+    laws and regulation of any other countries) when You use,
+    distribute or otherwise make available any Covered Software.
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+    As between Initial Developer and the Contributors, each party is
+    responsible for claims and damages arising, directly or
+    indirectly, out of its utilization of rights under this License
+    and You agree to work with Initial Developer and Contributors to
+    distribute such responsibility on an equitable basis.  Nothing
+    herein is intended or shall be deemed to constitute any admission
+    of liability.
+
+--------------------------------------------------------------------
+
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND
+DISTRIBUTION LICENSE (CDDL)
+
+For Covered Software in this distribution, this License shall
+be governed by the laws of the State of California (excluding
+conflict-of-law provisions).
+
+Any litigation relating to this License shall be subject to the
+jurisdiction of the Federal Courts of the Northern District of
+California and the state courts of the State of California, with
+venue lying in Santa Clara County, California.
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -0,0 +1,354 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Print intent log header and statistics.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/dmu.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/zil.h>
+#include <sys/zil_impl.h>
+
+extern uint8_t dump_opt[256];
+
+static void
+print_log_bp(const blkptr_t *bp, const char *prefix)
+{
+	char blkbuf[BP_SPRINTF_LEN];
+
+	sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+	(void) printf("%s%s\n", prefix, blkbuf);
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
+{
+	time_t crtime = lr->lr_crtime[0];
+	char *name = (char *)(lr + 1);
+	char *link = name + strlen(name) + 1;
+
+	if (txtype == TX_SYMLINK)
+		(void) printf("\t\t\t%s -> %s\n", name, link);
+	else
+		(void) printf("\t\t\t%s\n", name);
+
+	(void) printf("\t\t\t%s", ctime(&crtime));
+	(void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
+	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
+	    (longlong_t)lr->lr_mode);
+	(void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
+	    (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
+	    (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
+{
+	(void) printf("\t\t\tdoid %llu, name %s\n",
+	    (u_longlong_t)lr->lr_doid, (char *)(lr + 1));
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
+{
+	(void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
+	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
+	    (char *)(lr + 1));
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
+{
+	char *snm = (char *)(lr + 1);
+	char *tnm = snm + strlen(snm) + 1;
+
+	(void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
+	    (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
+	(void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
+{
+	char *data, *dlimit;
+	blkptr_t *bp = &lr->lr_blkptr;
+	char buf[SPA_MAXBLOCKSIZE];
+	int verbose = MAX(dump_opt['d'], dump_opt['i']);
+	int error;
+
+	(void) printf("\t\t\tfoid %llu, offset 0x%llx,"
+	    " length 0x%llx, blkoff 0x%llx\n",
+	    (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
+	    (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
+
+	if (verbose < 5)
+		return;
+
+	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+		(void) printf("\t\t\thas blkptr, %s\n",
+		    bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
+		    "will claim" : "won't claim");
+		print_log_bp(bp, "\t\t\t");
+		if (bp->blk_birth == 0) {
+			bzero(buf, sizeof (buf));
+		} else {
+			zbookmark_t zb;
+
+			ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
+			    dmu_objset_id(zilog->zl_os));
+
+			zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
+			zb.zb_object = 0;
+			zb.zb_level = -1;
+			zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
+
+			error = zio_wait(zio_read(NULL, zilog->zl_spa,
+			    bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
+			    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
+			if (error)
+				return;
+		}
+		data = buf + lr->lr_blkoff;
+	} else {
+		data = (char *)(lr + 1);
+	}
+
+	dlimit = data + MIN(lr->lr_length,
+	    (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
+
+	(void) printf("\t\t\t");
+	while (data < dlimit) {
+		if (isprint(*data))
+			(void) printf("%c ", *data);
+		else
+			(void) printf("%2X", *data);
+		data++;
+	}
+	(void) printf("\n");
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
+{
+	(void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
+	    (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
+	    (u_longlong_t)lr->lr_length);
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
+{
+	time_t atime = (time_t)lr->lr_atime[0];
+	time_t mtime = (time_t)lr->lr_mtime[0];
+
+	(void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
+	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
+
+	if (lr->lr_mask & AT_MODE) {
+		(void) printf("\t\t\tAT_MODE  %llo\n",
+		    (longlong_t)lr->lr_mode);
+	}
+
+	if (lr->lr_mask & AT_UID) {
+		(void) printf("\t\t\tAT_UID   %llu\n",
+		    (u_longlong_t)lr->lr_uid);
+	}
+
+	if (lr->lr_mask & AT_GID) {
+		(void) printf("\t\t\tAT_GID   %llu\n",
+		    (u_longlong_t)lr->lr_gid);
+	}
+
+	if (lr->lr_mask & AT_SIZE) {
+		(void) printf("\t\t\tAT_SIZE  %llu\n",
+		    (u_longlong_t)lr->lr_size);
+	}
+
+	if (lr->lr_mask & AT_ATIME) {
+		(void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
+		    (u_longlong_t)lr->lr_atime[0],
+		    (u_longlong_t)lr->lr_atime[1],
+		    ctime(&atime));
+	}
+
+	if (lr->lr_mask & AT_MTIME) {
+		(void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
+		    (u_longlong_t)lr->lr_mtime[0],
+		    (u_longlong_t)lr->lr_mtime[1],
+		    ctime(&mtime));
+	}
+}
+
+/* ARGSUSED */
+static void
+zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
+{
+	(void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
+	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
+}
+
+typedef void (*zil_prt_rec_func_t)();
+typedef struct zil_rec_info {
+	zil_prt_rec_func_t	zri_print;
+	char			*zri_name;
+	uint64_t		zri_count;
+} zil_rec_info_t;
+
+static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
+	{	NULL,			"Total      "	},
+	{	zil_prt_rec_create,	"TX_CREATE  "	},
+	{	zil_prt_rec_create,	"TX_MKDIR   "	},
+	{	zil_prt_rec_create,	"TX_MKXATTR "	},
+	{	zil_prt_rec_create,	"TX_SYMLINK "	},
+	{	zil_prt_rec_remove,	"TX_REMOVE  "	},
+	{	zil_prt_rec_remove,	"TX_RMDIR   "	},
+	{	zil_prt_rec_link,	"TX_LINK    "	},
+	{	zil_prt_rec_rename,	"TX_RENAME  "	},
+	{	zil_prt_rec_write,	"TX_WRITE   "	},
+	{	zil_prt_rec_truncate,	"TX_TRUNCATE"	},
+	{	zil_prt_rec_setattr,	"TX_SETATTR "	},
+	{	zil_prt_rec_acl,	"TX_ACL     "	},
+};
+
+/* ARGSUSED */
+static void
+print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
+{
+	int txtype;
+	int verbose = MAX(dump_opt['d'], dump_opt['i']);
+
+	txtype = lr->lrc_txtype;
+
+	ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE);
+	ASSERT(lr->lrc_txg);
+
+	(void) printf("\t\t%s len %6llu, txg %llu, seq %llu\n",
+	    zil_rec_info[txtype].zri_name,
+	    (u_longlong_t)lr->lrc_reclen,
+	    (u_longlong_t)lr->lrc_txg,
+	    (u_longlong_t)lr->lrc_seq);
+
+	if (txtype && verbose >= 3)
+		zil_rec_info[txtype].zri_print(zilog, txtype, lr);
+
+	zil_rec_info[txtype].zri_count++;
+	zil_rec_info[0].zri_count++;
+}
+
+/* ARGSUSED */
+static void
+print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+{
+	char blkbuf[BP_SPRINTF_LEN];
+	int verbose = MAX(dump_opt['d'], dump_opt['i']);
+	char *claim;
+
+	if (verbose <= 3)
+		return;
+
+	if (verbose >= 5) {
+		(void) strcpy(blkbuf, ", ");
+		sprintf_blkptr(blkbuf + strlen(blkbuf),
+		    BP_SPRINTF_LEN - strlen(blkbuf), bp);
+	} else {
+		blkbuf[0] = '\0';
+	}
+
+	if (claim_txg != 0)
+		claim = "already claimed";
+	else if (bp->blk_birth >= spa_first_txg(zilog->zl_spa))
+		claim = "will claim";
+	else
+		claim = "won't claim";
+
+	(void) printf("\tBlock seqno %llu, %s%s\n",
+	    (u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
+}
+
+static void
+print_log_stats(int verbose)
+{
+	int i, w, p10;
+
+	if (verbose > 3)
+		(void) printf("\n");
+
+	if (zil_rec_info[0].zri_count == 0)
+		return;
+
+	for (w = 1, p10 = 10; zil_rec_info[0].zri_count >= p10; p10 *= 10)
+		w++;
+
+	for (i = 0; i < TX_MAX_TYPE; i++)
+		if (zil_rec_info[i].zri_count || verbose >= 3)
+			(void) printf("\t\t%s %*llu\n",
+			    zil_rec_info[i].zri_name, w,
+			    (u_longlong_t)zil_rec_info[i].zri_count);
+	(void) printf("\n");
+}
+
+/* ARGSUSED */
+void
+dump_intent_log(zilog_t *zilog)
+{
+	const zil_header_t *zh = zilog->zl_header;
+	int verbose = MAX(dump_opt['d'], dump_opt['i']);
+	int i;
+
+	if (zh->zh_log.blk_birth == 0 || verbose < 2)
+		return;
+
+	(void) printf("\n    ZIL header: claim_txg %llu, seq %llu\n",
+	    (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_replay_seq);
+
+	if (verbose >= 4)
+		print_log_bp(&zh->zh_log, "\n\tfirst block: ");
+
+	for (i = 0; i < TX_MAX_TYPE; i++)
+		zil_rec_info[i].zri_count = 0;
+
+	if (verbose >= 2) {
+		(void) printf("\n");
+		(void) zil_parse(zilog, print_log_block, print_log_record, NULL,
+		    zh->zh_claim_txg);
+		print_log_stats(verbose);
+	}
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -0,0 +1,2193 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu.h>
+#include <sys/zap.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_znode.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/metaslab_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_pool.h>
+#include <sys/dbuf.h>
+#include <sys/zil.h>
+#include <sys/zil_impl.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/dmu_traverse.h>
+#include <sys/zio_checksum.h>
+#include <sys/zio_compress.h>
+
+const char cmdname[] = "zdb";
+uint8_t dump_opt[256];
+
+typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
+
+extern void dump_intent_log(zilog_t *);
+uint64_t *zopt_object = NULL;
+int zopt_objects = 0;
+int zdb_advance = ADVANCE_PRE;
+zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
+
+/*
+ * These libumem hooks provide a reasonable set of defaults for the allocator's
+ * debugging facilities.
+ */
+const char *
+_umem_debug_init()
+{
+	return ("default,verbose"); /* $UMEM_DEBUG setting */
+}
+
+const char *
+_umem_logging_init(void)
+{
+	return ("fail,contents"); /* $UMEM_LOGGING setting */
+}
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] "
+	    "dataset [object...]\n"
+	    "       %s -C [pool]\n"
+	    "       %s -l dev\n"
+	    "       %s -R vdev:offset:size:flags\n",
+	    cmdname, cmdname, cmdname, cmdname);
+
+	(void) fprintf(stderr, "	-u uberblock\n");
+	(void) fprintf(stderr, "	-d datasets\n");
+	(void) fprintf(stderr, "        -C cached pool configuration\n");
+	(void) fprintf(stderr, "	-i intent logs\n");
+	(void) fprintf(stderr, "	-b block statistics\n");
+	(void) fprintf(stderr, "	-c checksum all data blocks\n");
+	(void) fprintf(stderr, "	-s report stats on zdb's I/O\n");
+	(void) fprintf(stderr, "	-v verbose (applies to all others)\n");
+	(void) fprintf(stderr, "        -l dump label contents\n");
+	(void) fprintf(stderr, "	-L live pool (allows some errors)\n");
+	(void) fprintf(stderr, "	-O [!]<pre|post|prune|data|holes> "
+	    "visitation order\n");
+	(void) fprintf(stderr, "	-U use zpool.cache in /tmp\n");
+	(void) fprintf(stderr, "	-B objset:object:level:blkid -- "
+	    "simulate bad block\n");
+	(void) fprintf(stderr, "        -R read and display block from a"
+	    "device\n");
+	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
+	    "to make only that option verbose\n");
+	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
+	exit(1);
+}
+
+static void
+fatal(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void) fprintf(stderr, "%s: ", cmdname);
+	(void) vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	(void) fprintf(stderr, "\n");
+
+	exit(1);
+}
+
+static void
+dump_nvlist(nvlist_t *list, int indent)
+{
+	nvpair_t *elem = NULL;
+
+	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
+		switch (nvpair_type(elem)) {
+		case DATA_TYPE_STRING:
+			{
+				char *value;
+
+				VERIFY(nvpair_value_string(elem, &value) == 0);
+				(void) printf("%*s%s='%s'\n", indent, "",
+				    nvpair_name(elem), value);
+			}
+			break;
+
+		case DATA_TYPE_UINT64:
+			{
+				uint64_t value;
+
+				VERIFY(nvpair_value_uint64(elem, &value) == 0);
+				(void) printf("%*s%s=%llu\n", indent, "",
+				    nvpair_name(elem), (u_longlong_t)value);
+			}
+			break;
+
+		case DATA_TYPE_NVLIST:
+			{
+				nvlist_t *value;
+
+				VERIFY(nvpair_value_nvlist(elem, &value) == 0);
+				(void) printf("%*s%s\n", indent, "",
+				    nvpair_name(elem));
+				dump_nvlist(value, indent + 4);
+			}
+			break;
+
+		case DATA_TYPE_NVLIST_ARRAY:
+			{
+				nvlist_t **value;
+				uint_t c, count;
+
+				VERIFY(nvpair_value_nvlist_array(elem, &value,
+				    &count) == 0);
+
+				for (c = 0; c < count; c++) {
+					(void) printf("%*s%s[%u]\n", indent, "",
+					    nvpair_name(elem), c);
+					dump_nvlist(value[c], indent + 8);
+				}
+			}
+			break;
+
+		default:
+
+			(void) printf("bad config type %d for %s\n",
+			    nvpair_type(elem), nvpair_name(elem));
+		}
+	}
+}
+
+/* ARGSUSED */
+static void
+dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	nvlist_t *nv;
+	size_t nvsize = *(uint64_t *)data;
+	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
+
+	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
+
+	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
+
+	umem_free(packed, nvsize);
+
+	dump_nvlist(nv, 8);
+
+	nvlist_free(nv);
+}
+
+const char dump_zap_stars[] = "****************************************";
+const int dump_zap_width = sizeof (dump_zap_stars) - 1;
+
+static void
+dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
+{
+	int i;
+	int minidx = ZAP_HISTOGRAM_SIZE - 1;
+	int maxidx = 0;
+	uint64_t max = 0;
+
+	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
+		if (histo[i] > max)
+			max = histo[i];
+		if (histo[i] > 0 && i > maxidx)
+			maxidx = i;
+		if (histo[i] > 0 && i < minidx)
+			minidx = i;
+	}
+
+	if (max < dump_zap_width)
+		max = dump_zap_width;
+
+	for (i = minidx; i <= maxidx; i++)
+		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
+		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
+}
+
+static void
+dump_zap_stats(objset_t *os, uint64_t object)
+{
+	int error;
+	zap_stats_t zs;
+
+	error = zap_get_stats(os, object, &zs);
+	if (error)
+		return;
+
+	if (zs.zs_ptrtbl_len == 0) {
+		ASSERT(zs.zs_num_blocks == 1);
+		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
+		    (u_longlong_t)zs.zs_blocksize,
+		    (u_longlong_t)zs.zs_num_entries);
+		return;
+	}
+
+	(void) printf("\tFat ZAP stats:\n");
+
+	(void) printf("\t\tPointer table:\n");
+	(void) printf("\t\t\t%llu elements\n",
+	    (u_longlong_t)zs.zs_ptrtbl_len);
+	(void) printf("\t\t\tzt_blk: %llu\n",
+	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
+	(void) printf("\t\t\tzt_numblks: %llu\n",
+	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
+	(void) printf("\t\t\tzt_shift: %llu\n",
+	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
+	(void) printf("\t\t\tzt_blks_copied: %llu\n",
+	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
+	(void) printf("\t\t\tzt_nextblk: %llu\n",
+	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
+
+	(void) printf("\t\tZAP entries: %llu\n",
+	    (u_longlong_t)zs.zs_num_entries);
+	(void) printf("\t\tLeaf blocks: %llu\n",
+	    (u_longlong_t)zs.zs_num_leafs);
+	(void) printf("\t\tTotal blocks: %llu\n",
+	    (u_longlong_t)zs.zs_num_blocks);
+	(void) printf("\t\tzap_block_type: 0x%llx\n",
+	    (u_longlong_t)zs.zs_block_type);
+	(void) printf("\t\tzap_magic: 0x%llx\n",
+	    (u_longlong_t)zs.zs_magic);
+	(void) printf("\t\tzap_salt: 0x%llx\n",
+	    (u_longlong_t)zs.zs_salt);
+
+	(void) printf("\t\tLeafs with 2^n pointers:\n");
+	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
+
+	(void) printf("\t\tBlocks with n*5 entries:\n");
+	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
+
+	(void) printf("\t\tBlocks n/10 full:\n");
+	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
+
+	(void) printf("\t\tEntries with n chunks:\n");
+	dump_zap_histogram(zs.zs_entries_using_n_chunks);
+
+	(void) printf("\t\tBuckets with n entries:\n");
+	dump_zap_histogram(zs.zs_buckets_with_n_entries);
+}
+
+/*ARGSUSED*/
+static void
+dump_none(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+/*ARGSUSED*/
+void
+dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+/*ARGSUSED*/
+static void
+dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+/*ARGSUSED*/
+static void
+dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	zap_cursor_t zc;
+	zap_attribute_t attr;
+	void *prop;
+	int i;
+
+	dump_zap_stats(os, object);
+	(void) printf("\n");
+
+	for (zap_cursor_init(&zc, os, object);
+	    zap_cursor_retrieve(&zc, &attr) == 0;
+	    zap_cursor_advance(&zc)) {
+		(void) printf("\t\t%s = ", attr.za_name);
+		if (attr.za_num_integers == 0) {
+			(void) printf("\n");
+			continue;
+		}
+		prop = umem_zalloc(attr.za_num_integers *
+		    attr.za_integer_length, UMEM_NOFAIL);
+		(void) zap_lookup(os, object, attr.za_name,
+		    attr.za_integer_length, attr.za_num_integers, prop);
+		if (attr.za_integer_length == 1) {
+			(void) printf("%s", (char *)prop);
+		} else {
+			for (i = 0; i < attr.za_num_integers; i++) {
+				switch (attr.za_integer_length) {
+				case 2:
+					(void) printf("%u ",
+					    ((uint16_t *)prop)[i]);
+					break;
+				case 4:
+					(void) printf("%u ",
+					    ((uint32_t *)prop)[i]);
+					break;
+				case 8:
+					(void) printf("%lld ",
+					    (u_longlong_t)((int64_t *)prop)[i]);
+					break;
+				}
+			}
+		}
+		(void) printf("\n");
+		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
+	}
+	zap_cursor_fini(&zc);
+}
+
+static void
+dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
+{
+	uint64_t alloc, offset, entry;
+	uint8_t mapshift = sm->sm_shift;
+	uint64_t mapstart = sm->sm_start;
+	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
+			    "INVALID", "INVALID", "INVALID", "INVALID" };
+
+	if (smo->smo_object == 0)
+		return;
+
+	/*
+	 * Print out the freelist entries in both encoded and decoded form.
+	 */
+	alloc = 0;
+	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
+		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+		    sizeof (entry), &entry));
+		if (SM_DEBUG_DECODE(entry)) {
+			(void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
+			    (u_longlong_t)(offset / sizeof (entry)),
+			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
+			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
+			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
+		} else {
+			(void) printf("\t\t[%4llu]    %c  range:"
+			    " %08llx-%08llx  size: %06llx\n",
+			    (u_longlong_t)(offset / sizeof (entry)),
+			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
+			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
+			    mapshift) + mapstart),
+			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
+			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
+			    mapshift)),
+			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
+			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
+				alloc += SM_RUN_DECODE(entry) << mapshift;
+			else
+				alloc -= SM_RUN_DECODE(entry) << mapshift;
+		}
+	}
+	if (alloc != smo->smo_alloc) {
+		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
+		    "with space map summary (%llu)\n",
+		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
+	}
+}
+
+static void
+dump_metaslab(metaslab_t *msp)
+{
+	char freebuf[5];
+	space_map_obj_t *smo = &msp->ms_smo;
+	vdev_t *vd = msp->ms_group->mg_vd;
+	spa_t *spa = vd->vdev_spa;
+
+	nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
+
+	if (dump_opt['d'] <= 5) {
+		(void) printf("\t%10llx   %10llu   %5s\n",
+		    (u_longlong_t)msp->ms_map.sm_start,
+		    (u_longlong_t)smo->smo_object,
+		    freebuf);
+		return;
+	}
+
+	(void) printf(
+	    "\tvdev %llu   offset %08llx   spacemap %4llu   free %5s\n",
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
+	    (u_longlong_t)smo->smo_object, freebuf);
+
+	ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
+
+	dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
+}
+
+static void
+dump_metaslabs(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	vdev_t *vd;
+	int c, m;
+
+	(void) printf("\nMetaslabs:\n");
+
+	for (c = 0; c < rvd->vdev_children; c++) {
+		vd = rvd->vdev_child[c];
+
+		spa_config_enter(spa, RW_READER, FTAG);
+		(void) printf("\n    vdev %llu = %s\n\n",
+		    (u_longlong_t)vd->vdev_id, vdev_description(vd));
+		spa_config_exit(spa, FTAG);
+
+		if (dump_opt['d'] <= 5) {
+			(void) printf("\t%10s   %10s   %5s\n",
+			    "offset", "spacemap", "free");
+			(void) printf("\t%10s   %10s   %5s\n",
+			    "------", "--------", "----");
+		}
+		for (m = 0; m < vd->vdev_ms_count; m++)
+			dump_metaslab(vd->vdev_ms[m]);
+		(void) printf("\n");
+	}
+}
+
+static void
+dump_dtl(vdev_t *vd, int indent)
+{
+	avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
+	spa_t *spa = vd->vdev_spa;
+	space_seg_t *ss;
+	vdev_t *pvd;
+	int c;
+
+	if (indent == 0)
+		(void) printf("\nDirty time logs:\n\n");
+
+	spa_config_enter(spa, RW_READER, FTAG);
+	(void) printf("\t%*s%s\n", indent, "", vdev_description(vd));
+	spa_config_exit(spa, FTAG);
+
+	for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
+		/*
+		 * Everything in this DTL must appear in all parent DTL unions.
+		 */
+		for (pvd = vd; pvd; pvd = pvd->vdev_parent)
+			ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
+			    ss->ss_start, ss->ss_end - ss->ss_start));
+		(void) printf("\t%*soutage [%llu,%llu] length %llu\n",
+		    indent, "",
+		    (u_longlong_t)ss->ss_start,
+		    (u_longlong_t)ss->ss_end - 1,
+		    (u_longlong_t)(ss->ss_end - ss->ss_start));
+	}
+
+	(void) printf("\n");
+
+	if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
+		dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
+		    &vd->vdev_dtl_map);
+		(void) printf("\n");
+	}
+
+	for (c = 0; c < vd->vdev_children; c++)
+		dump_dtl(vd->vdev_child[c], indent + 4);
+}
+
+/*ARGSUSED*/
+static void
+dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+static uint64_t
+blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
+{
+	if (level < 0)
+		return (blkid);
+
+	return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
+	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
+}
+
+static void
+sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
+{
+	dva_t *dva = bp->blk_dva;
+	int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
+	int i;
+
+	blkbuf[0] = '\0';
+
+	for (i = 0; i < ndvas; i++)
+		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
+		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
+		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
+		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
+
+	(void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
+	    (u_longlong_t)BP_GET_LSIZE(bp),
+	    (u_longlong_t)BP_GET_PSIZE(bp),
+	    (u_longlong_t)bp->blk_fill,
+	    (u_longlong_t)bp->blk_birth);
+}
+
+/* ARGSUSED */
+static int
+zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
+{
+	zbookmark_t *zb = &bc->bc_bookmark;
+	blkptr_t *bp = &bc->bc_blkptr;
+	void *data = bc->bc_data;
+	dnode_phys_t *dnp = bc->bc_dnode;
+	char blkbuf[BP_SPRINTF_LEN + 80];
+	int l;
+
+	if (bc->bc_errno) {
+		(void) sprintf(blkbuf,
+		    "Error %d reading <%llu, %llu, %lld, %llu>: ",
+		    bc->bc_errno,
+		    (u_longlong_t)zb->zb_objset,
+		    (u_longlong_t)zb->zb_object,
+		    (u_longlong_t)zb->zb_level,
+		    (u_longlong_t)zb->zb_blkid);
+		goto out;
+	}
+
+	if (zb->zb_level == -1) {
+		ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
+		ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
+	} else {
+		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
+		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
+	}
+
+	if (zb->zb_level > 0) {
+		uint64_t fill = 0;
+		blkptr_t *bpx, *bpend;
+
+		for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
+		    bpx < bpend; bpx++) {
+			if (bpx->blk_birth != 0) {
+				fill += bpx->blk_fill;
+			} else {
+				ASSERT(bpx->blk_fill == 0);
+			}
+		}
+		ASSERT3U(fill, ==, bp->blk_fill);
+	}
+
+	if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
+		uint64_t fill = 0;
+		dnode_phys_t *dnx, *dnend;
+
+		for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
+		    dnx < dnend; dnx++) {
+			if (dnx->dn_type != DMU_OT_NONE)
+				fill++;
+		}
+		ASSERT3U(fill, ==, bp->blk_fill);
+	}
+
+	(void) sprintf(blkbuf, "%16llx ",
+	    (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
+
+	ASSERT(zb->zb_level >= 0);
+
+	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
+		if (l == zb->zb_level) {
+			(void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
+			    (u_longlong_t)zb->zb_level);
+		} else {
+			(void) sprintf(blkbuf + strlen(blkbuf), " ");
+		}
+	}
+
+out:
+	if (bp->blk_birth == 0) {
+		(void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
+		(void) printf("%s\n", blkbuf);
+	} else {
+		sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
+		    dump_opt['d'] > 5 ? 1 : 0);
+		(void) printf("%s\n", blkbuf);
+	}
+
+	return (bc->bc_errno ? ERESTART : 0);
+}
+
+/*ARGSUSED*/
+static void
+dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	traverse_handle_t *th;
+	uint64_t objset = dmu_objset_id(os);
+	int advance = zdb_advance;
+
+	(void) printf("Indirect blocks:\n");
+
+	if (object == 0)
+		advance |= ADVANCE_DATA;
+
+	th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
+	    ZIO_FLAG_CANFAIL);
+	th->th_noread = zdb_noread;
+
+	traverse_add_dnode(th, 0, -1ULL, objset, object);
+
+	while (traverse_more(th) == EAGAIN)
+		continue;
+
+	(void) printf("\n");
+
+	traverse_fini(th);
+}
+
+/*ARGSUSED*/
+static void
+dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	dsl_dir_phys_t *dd = data;
+	time_t crtime;
+	char used[6], compressed[6], uncompressed[6], quota[6], resv[6];
+
+	if (dd == NULL)
+		return;
+
+	ASSERT(size == sizeof (*dd));
+
+	crtime = dd->dd_creation_time;
+	nicenum(dd->dd_used_bytes, used);
+	nicenum(dd->dd_compressed_bytes, compressed);
+	nicenum(dd->dd_uncompressed_bytes, uncompressed);
+	nicenum(dd->dd_quota, quota);
+	nicenum(dd->dd_reserved, resv);
+
+	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
+	(void) printf("\t\thead_dataset_obj = %llu\n",
+	    (u_longlong_t)dd->dd_head_dataset_obj);
+	(void) printf("\t\tparent_dir_obj = %llu\n",
+	    (u_longlong_t)dd->dd_parent_obj);
+	(void) printf("\t\tclone_parent_obj = %llu\n",
+	    (u_longlong_t)dd->dd_clone_parent_obj);
+	(void) printf("\t\tchild_dir_zapobj = %llu\n",
+	    (u_longlong_t)dd->dd_child_dir_zapobj);
+	(void) printf("\t\tused_bytes = %s\n", used);
+	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
+	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
+	(void) printf("\t\tquota = %s\n", quota);
+	(void) printf("\t\treserved = %s\n", resv);
+	(void) printf("\t\tprops_zapobj = %llu\n",
+	    (u_longlong_t)dd->dd_props_zapobj);
+}
+
+/*ARGSUSED*/
+static void
+dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	dsl_dataset_phys_t *ds = data;
+	time_t crtime;
+	char used[6], compressed[6], uncompressed[6], unique[6];
+	char blkbuf[BP_SPRINTF_LEN];
+
+	if (ds == NULL)
+		return;
+
+	ASSERT(size == sizeof (*ds));
+	crtime = ds->ds_creation_time;
+	nicenum(ds->ds_used_bytes, used);
+	nicenum(ds->ds_compressed_bytes, compressed);
+	nicenum(ds->ds_uncompressed_bytes, uncompressed);
+	nicenum(ds->ds_unique_bytes, unique);
+	sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
+
+	(void) printf("\t\tdataset_obj = %llu\n",
+	    (u_longlong_t)ds->ds_dir_obj);
+	(void) printf("\t\tprev_snap_obj = %llu\n",
+	    (u_longlong_t)ds->ds_prev_snap_obj);
+	(void) printf("\t\tprev_snap_txg = %llu\n",
+	    (u_longlong_t)ds->ds_prev_snap_txg);
+	(void) printf("\t\tnext_snap_obj = %llu\n",
+	    (u_longlong_t)ds->ds_next_snap_obj);
+	(void) printf("\t\tsnapnames_zapobj = %llu\n",
+	    (u_longlong_t)ds->ds_snapnames_zapobj);
+	(void) printf("\t\tnum_children = %llu\n",
+	    (u_longlong_t)ds->ds_num_children);
+	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
+	(void) printf("\t\tcreation_txg = %llu\n",
+	    (u_longlong_t)ds->ds_creation_txg);
+	(void) printf("\t\tdeadlist_obj = %llu\n",
+	    (u_longlong_t)ds->ds_deadlist_obj);
+	(void) printf("\t\tused_bytes = %s\n", used);
+	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
+	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
+	(void) printf("\t\tunique = %s\n", unique);
+	(void) printf("\t\tfsid_guid = %llu\n",
+	    (u_longlong_t)ds->ds_fsid_guid);
+	(void) printf("\t\tguid = %llu\n",
+	    (u_longlong_t)ds->ds_guid);
+	(void) printf("\t\tflags = %llx\n",
+	    (u_longlong_t)ds->ds_flags);
+	(void) printf("\t\tbp = %s\n", blkbuf);
+}
+
+static void
+dump_bplist(objset_t *mos, uint64_t object, char *name)
+{
+	bplist_t bpl = { 0 };
+	blkptr_t blk, *bp = &blk;
+	uint64_t itor = 0;
+	char bytes[6];
+	char comp[6];
+	char uncomp[6];
+
+	if (dump_opt['d'] < 3)
+		return;
+
+	VERIFY(0 == bplist_open(&bpl, mos, object));
+	if (bplist_empty(&bpl)) {
+		bplist_close(&bpl);
+		return;
+	}
+
+	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
+	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
+		nicenum(bpl.bpl_phys->bpl_comp, comp);
+		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
+		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
+		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+		    bytes, comp, uncomp);
+	} else {
+		(void) printf("\n    %s: %llu entries, %s\n",
+		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+	}
+
+	if (dump_opt['d'] < 5) {
+		bplist_close(&bpl);
+		return;
+	}
+
+	(void) printf("\n");
+
+	while (bplist_iterate(&bpl, &itor, bp) == 0) {
+		char blkbuf[BP_SPRINTF_LEN];
+
+		ASSERT(bp->blk_birth != 0);
+		sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+		(void) printf("\tItem %3llu: %s\n",
+		    (u_longlong_t)itor - 1, blkbuf);
+	}
+
+	bplist_close(&bpl);
+}
+
+/*ARGSUSED*/
+static void
+dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	znode_phys_t *zp = data;
+	time_t z_crtime, z_atime, z_mtime, z_ctime;
+	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
+	int error;
+
+	ASSERT(size >= sizeof (znode_phys_t));
+
+	error = zfs_obj_to_path(os, object, path, sizeof (path));
+	if (error != 0) {
+		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
+		    (u_longlong_t)object);
+	}
+
+	if (dump_opt['d'] < 3) {
+		(void) printf("\t%s\n", path);
+		return;
+	}
+
+	z_crtime = (time_t)zp->zp_crtime[0];
+	z_atime = (time_t)zp->zp_atime[0];
+	z_mtime = (time_t)zp->zp_mtime[0];
+	z_ctime = (time_t)zp->zp_ctime[0];
+
+	(void) printf("\tpath	%s\n", path);
+	(void) printf("\tatime	%s", ctime(&z_atime));
+	(void) printf("\tmtime	%s", ctime(&z_mtime));
+	(void) printf("\tctime	%s", ctime(&z_ctime));
+	(void) printf("\tcrtime	%s", ctime(&z_crtime));
+	(void) printf("\tgen	%llu\n", (u_longlong_t)zp->zp_gen);
+	(void) printf("\tmode	%llo\n", (u_longlong_t)zp->zp_mode);
+	(void) printf("\tsize	%llu\n", (u_longlong_t)zp->zp_size);
+	(void) printf("\tparent	%llu\n", (u_longlong_t)zp->zp_parent);
+	(void) printf("\tlinks	%llu\n", (u_longlong_t)zp->zp_links);
+	(void) printf("\txattr	%llu\n", (u_longlong_t)zp->zp_xattr);
+	(void) printf("\trdev	0x%016llx\n", (u_longlong_t)zp->zp_rdev);
+}
+
+/*ARGSUSED*/
+static void
+dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+/*ARGSUSED*/
+static void
+dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
+{
+}
+
+static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
+	dump_none,		/* unallocated			*/
+	dump_zap,		/* object directory		*/
+	dump_uint64,		/* object array			*/
+	dump_none,		/* packed nvlist		*/
+	dump_packed_nvlist,	/* packed nvlist size		*/
+	dump_none,		/* bplist			*/
+	dump_none,		/* bplist header		*/
+	dump_none,		/* SPA space map header		*/
+	dump_none,		/* SPA space map		*/
+	dump_none,		/* ZIL intent log		*/
+	dump_dnode,		/* DMU dnode			*/
+	dump_dmu_objset,	/* DMU objset			*/
+	dump_dsl_dir,		/* DSL directory		*/
+	dump_zap,		/* DSL directory child map	*/
+	dump_zap,		/* DSL dataset snap map		*/
+	dump_zap,		/* DSL props			*/
+	dump_dsl_dataset,	/* DSL dataset			*/
+	dump_znode,		/* ZFS znode			*/
+	dump_acl,		/* ZFS ACL			*/
+	dump_uint8,		/* ZFS plain file		*/
+	dump_zap,		/* ZFS directory		*/
+	dump_zap,		/* ZFS master node		*/
+	dump_zap,		/* ZFS delete queue		*/
+	dump_uint8,		/* zvol object			*/
+	dump_zap,		/* zvol prop			*/
+	dump_uint8,		/* other uint8[]		*/
+	dump_uint64,		/* other uint64[]		*/
+	dump_zap,		/* other ZAP			*/
+	dump_zap,		/* persistent error log		*/
+	dump_uint8,		/* SPA history			*/
+	dump_uint64,		/* SPA history offsets		*/
+	dump_zap,		/* Pool properties		*/
+};
+
+static void
+dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
+{
+	dmu_buf_t *db = NULL;
+	dmu_object_info_t doi;
+	dnode_t *dn;
+	void *bonus = NULL;
+	size_t bsize = 0;
+	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
+	char aux[50];
+	int error;
+
+	if (*print_header) {
+		(void) printf("\n    Object  lvl   iblk   dblk  lsize"
+		    "  asize  type\n");
+		*print_header = 0;
+	}
+
+	if (object == 0) {
+		dn = os->os->os_meta_dnode;
+	} else {
+		error = dmu_bonus_hold(os, object, FTAG, &db);
+		if (error)
+			fatal("dmu_bonus_hold(%llu) failed, errno %u",
+			    object, error);
+		bonus = db->db_data;
+		bsize = db->db_size;
+		dn = ((dmu_buf_impl_t *)db)->db_dnode;
+	}
+	dmu_object_info_from_dnode(dn, &doi);
+
+	nicenum(doi.doi_metadata_block_size, iblk);
+	nicenum(doi.doi_data_block_size, dblk);
+	nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
+	    lsize);
+	nicenum(doi.doi_physical_blks << 9, asize);
+	nicenum(doi.doi_bonus_size, bonus_size);
+
+	aux[0] = '\0';
+
+	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6)
+		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
+		zio_checksum_table[doi.doi_checksum].ci_name);
+
+	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6)
+		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
+		zio_compress_table[doi.doi_compress].ci_name);
+
+	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %s%s\n",
+	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
+	    asize, dmu_ot[doi.doi_type].ot_name, aux);
+
+	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
+		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %s\n",
+		    "", "", "", "", bonus_size, "bonus",
+		    dmu_ot[doi.doi_bonus_type].ot_name);
+	}
+
+	if (verbosity >= 4) {
+		object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
+		object_viewer[doi.doi_type](os, object, NULL, 0);
+		*print_header = 1;
+	}
+
+	if (verbosity >= 5)
+		dump_indirect(os, object, NULL, 0);
+
+	if (verbosity >= 5) {
+		/*
+		 * Report the list of segments that comprise the object.
+		 */
+		uint64_t start = 0;
+		uint64_t end;
+		uint64_t blkfill = 1;
+		int minlvl = 1;
+
+		if (dn->dn_type == DMU_OT_DNODE) {
+			minlvl = 0;
+			blkfill = DNODES_PER_BLOCK;
+		}
+
+		for (;;) {
+			error = dnode_next_offset(dn, B_FALSE, &start, minlvl,
+			    blkfill, 0);
+			if (error)
+				break;
+			end = start;
+			error = dnode_next_offset(dn, B_TRUE, &end, minlvl,
+			    blkfill, 0);
+			nicenum(end - start, segsize);
+			(void) printf("\t\tsegment [%016llx, %016llx)"
+			    " size %5s\n", (u_longlong_t)start,
+			    (u_longlong_t)end, segsize);
+			if (error)
+				break;
+			start = end;
+		}
+	}
+
+	if (db != NULL)
+		dmu_buf_rele(db, FTAG);
+}
+
+static char *objset_types[DMU_OST_NUMTYPES] = {
+	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
+
+/*ARGSUSED*/
+static void
+dump_dir(objset_t *os)
+{
+	dmu_objset_stats_t dds;
+	uint64_t object, object_count;
+	uint64_t refdbytes, usedobjs, scratch;
+	char numbuf[8];
+	char blkbuf[BP_SPRINTF_LEN];
+	char osname[MAXNAMELEN];
+	char *type = "UNKNOWN";
+	int verbosity = dump_opt['d'];
+	int print_header = 1;
+	int i, error;
+
+	dmu_objset_fast_stat(os, &dds);
+
+	if (dds.dds_type < DMU_OST_NUMTYPES)
+		type = objset_types[dds.dds_type];
+
+	if (dds.dds_type == DMU_OST_META) {
+		dds.dds_creation_txg = TXG_INITIAL;
+		usedobjs = os->os->os_rootbp->blk_fill;
+		refdbytes =
+		    os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes;
+	} else {
+		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
+	}
+
+	ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+
+	nicenum(refdbytes, numbuf);
+
+	if (verbosity >= 4) {
+		(void) strcpy(blkbuf, ", rootbp ");
+		sprintf_blkptr(blkbuf + strlen(blkbuf),
+		    BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+	} else {
+		blkbuf[0] = '\0';
+	}
+
+	dmu_objset_name(os, osname);
+
+	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
+	    "%s, %llu objects%s\n",
+	    osname, type, (u_longlong_t)dmu_objset_id(os),
+	    (u_longlong_t)dds.dds_creation_txg,
+	    numbuf, (u_longlong_t)usedobjs, blkbuf);
+
+	dump_intent_log(dmu_objset_zil(os));
+
+	if (dmu_objset_ds(os) != NULL)
+		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
+		    dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
+
+	if (verbosity < 2)
+		return;
+
+	if (zopt_objects != 0) {
+		for (i = 0; i < zopt_objects; i++)
+			dump_object(os, zopt_object[i], verbosity,
+			    &print_header);
+		(void) printf("\n");
+		return;
+	}
+
+	dump_object(os, 0, verbosity, &print_header);
+	object_count = 1;
+
+	object = 0;
+	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
+		dump_object(os, object, verbosity, &print_header);
+		object_count++;
+	}
+
+	ASSERT3U(object_count, ==, usedobjs);
+
+	(void) printf("\n");
+
+	if (error != ESRCH)
+		fatal("dmu_object_next() = %d", error);
+}
+
+static void
+dump_uberblock(uberblock_t *ub)
+{
+	time_t timestamp = ub->ub_timestamp;
+
+	(void) printf("Uberblock\n\n");
+	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
+	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
+	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
+	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
+	(void) printf("\ttimestamp = %llu UTC = %s",
+	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
+	if (dump_opt['u'] >= 3) {
+		char blkbuf[BP_SPRINTF_LEN];
+		sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
+		(void) printf("\trootbp = %s\n", blkbuf);
+	}
+	(void) printf("\n");
+}
+
+static void
+dump_config(const char *pool)
+{
+	spa_t *spa = NULL;
+
+	mutex_enter(&spa_namespace_lock);
+	while ((spa = spa_next(spa)) != NULL) {
+		if (pool == NULL)
+			(void) printf("%s\n", spa_name(spa));
+		if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
+			dump_nvlist(spa->spa_config, 4);
+	}
+	mutex_exit(&spa_namespace_lock);
+}
+
+static void
+dump_label(const char *dev)
+{
+	int fd;
+	vdev_label_t label;
+	char *buf = label.vl_vdev_phys.vp_nvlist;
+	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
+	struct stat64 statbuf;
+	uint64_t psize;
+	int l;
+
+	if ((fd = open64(dev, O_RDONLY)) < 0) {
+		(void) printf("cannot open '%s': %s\n", dev, strerror(errno));
+		exit(1);
+	}
+
+	if (fstat64(fd, &statbuf) != 0) {
+		(void) printf("failed to stat '%s': %s\n", dev,
+		    strerror(errno));
+		exit(1);
+	}
+
+	if (S_ISCHR(statbuf.st_mode)) {
+		if (ioctl(fd, DIOCGMEDIASIZE, &psize) != 0) {
+			(void) printf("failed to get size '%s': %s\n", dev,
+			    strerror(errno));
+			exit(1);
+		}
+	 } else
+		psize = statbuf.st_size;
+
+	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+
+		nvlist_t *config = NULL;
+
+		(void) printf("--------------------------------------------\n");
+		(void) printf("LABEL %d\n", l);
+		(void) printf("--------------------------------------------\n");
+
+		if (pread64(fd, &label, sizeof (label),
+		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
+			(void) printf("failed to read label %d\n", l);
+			continue;
+		}
+
+		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
+			(void) printf("failed to unpack label %d\n", l);
+			continue;
+		}
+		dump_nvlist(config, 4);
+		nvlist_free(config);
+	}
+}
+
+/*ARGSUSED*/
+static int
+dump_one_dir(char *dsname, void *arg)
+{
+	int error;
+	objset_t *os;
+
+	error = dmu_objset_open(dsname, DMU_OST_ANY,
+	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
+	if (error) {
+		(void) printf("Could not open %s\n", dsname);
+		return (0);
+	}
+	dump_dir(os);
+	dmu_objset_close(os);
+	return (0);
+}
+
+static void
+zdb_space_map_load(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	vdev_t *vd;
+	int c, m, error;
+
+	for (c = 0; c < rvd->vdev_children; c++) {
+		vd = rvd->vdev_child[c];
+		for (m = 0; m < vd->vdev_ms_count; m++) {
+			metaslab_t *msp = vd->vdev_ms[m];
+			mutex_enter(&msp->ms_lock);
+			error = space_map_load(&msp->ms_allocmap[0], NULL,
+			    SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset);
+			mutex_exit(&msp->ms_lock);
+			if (error)
+				fatal("%s bad space map #%d, error %d",
+				    spa->spa_name, c, error);
+		}
+	}
+}
+
+static int
+zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb)
+{
+	dva_t *dva = bp->blk_dva;
+	vdev_t *vd;
+	metaslab_t *msp;
+	space_map_t *allocmap, *freemap;
+	int error;
+	int d;
+	blkptr_t blk = *bp;
+
+	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
+		uint64_t vdev = DVA_GET_VDEV(&dva[d]);
+		uint64_t offset = DVA_GET_OFFSET(&dva[d]);
+		uint64_t size = DVA_GET_ASIZE(&dva[d]);
+
+		if ((vd = vdev_lookup_top(spa, vdev)) == NULL)
+			return (ENXIO);
+
+		if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
+			return (ENXIO);
+
+		msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+		allocmap = &msp->ms_allocmap[0];
+		freemap = &msp->ms_freemap[0];
+
+		/* Prepare our copy of the bp in case we need to read GBHs */
+		if (DVA_GET_GANG(&dva[d])) {
+			size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+			DVA_SET_ASIZE(&blk.blk_dva[d], size);
+			DVA_SET_GANG(&blk.blk_dva[d], 0);
+		}
+
+		mutex_enter(&msp->ms_lock);
+		if (space_map_contains(freemap, offset, size)) {
+			mutex_exit(&msp->ms_lock);
+			return (EAGAIN);	/* allocated more than once */
+		}
+
+		if (!space_map_contains(allocmap, offset, size)) {
+			mutex_exit(&msp->ms_lock);
+			return (ESTALE);	/* not allocated at all */
+		}
+
+		space_map_remove(allocmap, offset, size);
+		space_map_add(freemap, offset, size);
+
+		mutex_exit(&msp->ms_lock);
+	}
+
+	if (BP_IS_GANG(bp)) {
+		zio_gbh_phys_t gbh;
+		int g;
+
+		/* LINTED - compile time assert */
+		ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE);
+
+		BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER);
+		BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE);
+		BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE);
+		BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF);
+		error = zio_wait(zio_read(NULL, spa, &blk, &gbh,
+		    SPA_GANGBLOCKSIZE, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
+		    ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb));
+		if (error)
+			return (error);
+		if (BP_SHOULD_BYTESWAP(&blk))
+			byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE);
+		for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
+			if (BP_IS_HOLE(&gbh.zg_blkptr[g]))
+				break;
+			error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb);
+			if (error)
+				return (error);
+		}
+	}
+
+	return (0);
+}
+
+static void
+zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+{
+	metaslab_t *msp;
+
+	/* LINTED */
+	msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0]));
+
+	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
+	    (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
+	    (u_longlong_t)start,
+	    (u_longlong_t)size);
+}
+
+static void
+zdb_space_map_unload(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	vdev_t *vd;
+	int c, m;
+
+	for (c = 0; c < rvd->vdev_children; c++) {
+		vd = rvd->vdev_child[c];
+		for (m = 0; m < vd->vdev_ms_count; m++) {
+			metaslab_t *msp = vd->vdev_ms[m];
+			mutex_enter(&msp->ms_lock);
+			space_map_vacate(&msp->ms_allocmap[0], zdb_leak,
+			    &msp->ms_allocmap[0]);
+			space_map_unload(&msp->ms_allocmap[0]);
+			space_map_vacate(&msp->ms_freemap[0], NULL, NULL);
+			mutex_exit(&msp->ms_lock);
+		}
+	}
+}
+
+static void
+zdb_refresh_ubsync(spa_t *spa)
+{
+	uberblock_t ub = { 0 };
+	vdev_t *rvd = spa->spa_root_vdev;
+	zio_t *zio;
+
+	/*
+	 * Reload the uberblock.
+	 */
+	zio = zio_root(spa, NULL, NULL,
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
+	vdev_uberblock_load(zio, rvd, &ub);
+	(void) zio_wait(zio);
+
+	if (ub.ub_txg != 0)
+		spa->spa_ubsync = ub;
+}
+
+/*
+ * Verify that the sum of the sizes of all blocks in the pool adds up
+ * to the SPA's sa_alloc total.
+ */
+typedef struct zdb_blkstats {
+	uint64_t	zb_asize;
+	uint64_t	zb_lsize;
+	uint64_t	zb_psize;
+	uint64_t	zb_count;
+} zdb_blkstats_t;
+
+#define	DMU_OT_DEFERRED	DMU_OT_NONE
+#define	DMU_OT_TOTAL	DMU_OT_NUMTYPES
+
+#define	ZB_TOTAL	ZB_MAXLEVEL
+
+typedef struct zdb_cb {
+	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
+	uint64_t	zcb_errors[256];
+	traverse_blk_cache_t *zcb_cache;
+	int		zcb_readfails;
+	int		zcb_haderrors;
+} zdb_cb_t;
+
+static void
+zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
+{
+	int i, error;
+
+	for (i = 0; i < 4; i++) {
+		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
+		int t = (i & 1) ? type : DMU_OT_TOTAL;
+		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
+
+		zb->zb_asize += BP_GET_ASIZE(bp);
+		zb->zb_lsize += BP_GET_LSIZE(bp);
+		zb->zb_psize += BP_GET_PSIZE(bp);
+		zb->zb_count++;
+	}
+
+	if (dump_opt['L'])
+		return;
+
+	error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark);
+
+	if (error == 0)
+		return;
+
+	if (error == EAGAIN)
+		(void) fatal("double-allocation, bp=%p", bp);
+
+	if (error == ESTALE)
+		(void) fatal("reference to freed block, bp=%p", bp);
+
+	(void) fatal("fatal error %d in bp %p", error, bp);
+}
+
+static int
+zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+{
+	zbookmark_t *zb = &bc->bc_bookmark;
+	zdb_cb_t *zcb = arg;
+	blkptr_t *bp = &bc->bc_blkptr;
+	dmu_object_type_t type = BP_GET_TYPE(bp);
+	char blkbuf[BP_SPRINTF_LEN];
+	int error = 0;
+
+	if (bc->bc_errno) {
+		if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
+			zdb_refresh_ubsync(spa);
+			error = EAGAIN;
+		} else {
+			zcb->zcb_haderrors = 1;
+			zcb->zcb_errors[bc->bc_errno]++;
+			error = ERESTART;
+		}
+
+		if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
+			sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+		else
+			blkbuf[0] = '\0';
+
+		(void) printf("zdb_blkptr_cb: Got error %d reading "
+		    "<%llu, %llu, %lld, %llx> %s -- %s\n",
+		    bc->bc_errno,
+		    (u_longlong_t)zb->zb_objset,
+		    (u_longlong_t)zb->zb_object,
+		    (u_longlong_t)zb->zb_level,
+		    (u_longlong_t)zb->zb_blkid,
+		    blkbuf,
+		    error == EAGAIN ? "retrying" : "skipping");
+
+		return (error);
+	}
+
+	zcb->zcb_readfails = 0;
+
+	ASSERT(!BP_IS_HOLE(bp));
+
+	if (dump_opt['b'] >= 4) {
+		sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+		(void) printf("objset %llu object %llu offset 0x%llx %s\n",
+		    (u_longlong_t)zb->zb_objset,
+		    (u_longlong_t)zb->zb_object,
+		    (u_longlong_t)blkid2offset(bc->bc_dnode,
+			zb->zb_level, zb->zb_blkid),
+		    blkbuf);
+	}
+
+	zdb_count_block(spa, zcb, bp, type);
+
+	return (0);
+}
+
+static int
+dump_block_stats(spa_t *spa)
+{
+	traverse_handle_t *th;
+	zdb_cb_t zcb = { 0 };
+	traverse_blk_cache_t dummy_cache = { 0 };
+	zdb_blkstats_t *zb, *tzb;
+	uint64_t alloc, space;
+	int leaks = 0;
+	int advance = zdb_advance;
+	int flags;
+	int e;
+
+	zcb.zcb_cache = &dummy_cache;
+
+	if (dump_opt['c'])
+		advance |= ADVANCE_DATA;
+
+	advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
+
+	(void) printf("\nTraversing all blocks to %sverify"
+	    " nothing leaked ...\n",
+	    dump_opt['c'] ? "verify checksums and " : "");
+
+	/*
+	 * Load all space maps.  As we traverse the pool, if we find a block
+	 * that's not in its space map, that indicates a double-allocation,
+	 * reference to a freed block, or an unclaimed block.  Otherwise we
+	 * remove the block from the space map.  If the space maps are not
+	 * empty when we're done, that indicates leaked blocks.
+	 */
+	if (!dump_opt['L'])
+		zdb_space_map_load(spa);
+
+	/*
+	 * If there's a deferred-free bplist, process that first.
+	 */
+	if (spa->spa_sync_bplist_obj != 0) {
+		bplist_t *bpl = &spa->spa_sync_bplist;
+		blkptr_t blk;
+		uint64_t itor = 0;
+
+		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
+		    spa->spa_sync_bplist_obj));
+
+		while (bplist_iterate(bpl, &itor, &blk) == 0) {
+			if (dump_opt['b'] >= 4) {
+				char blkbuf[BP_SPRINTF_LEN];
+				sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
+				(void) printf("[%s] %s\n",
+				    "deferred free", blkbuf);
+			}
+			zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
+		}
+
+		bplist_close(bpl);
+	}
+
+	/*
+	 * Now traverse the pool.  If we're reading all data to verify
+	 * checksums, do a scrubbing read so that we validate all copies.
+	 */
+	flags = ZIO_FLAG_CANFAIL;
+	if (advance & ADVANCE_DATA)
+		flags |= ZIO_FLAG_SCRUB;
+	th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
+	th->th_noread = zdb_noread;
+
+	traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
+
+	while (traverse_more(th) == EAGAIN)
+		continue;
+
+	traverse_fini(th);
+
+	if (zcb.zcb_haderrors) {
+		(void) printf("\nError counts:\n\n");
+		(void) printf("\t%5s  %s\n", "errno", "count");
+		for (e = 0; e < 256; e++) {
+			if (zcb.zcb_errors[e] != 0) {
+				(void) printf("\t%5d  %llu\n",
+				    e, (u_longlong_t)zcb.zcb_errors[e]);
+			}
+		}
+	}
+
+	/*
+	 * Report any leaked segments.
+	 */
+	if (!dump_opt['L'])
+		zdb_space_map_unload(spa);
+
+	if (dump_opt['L'])
+		(void) printf("\n\n *** Live pool traversal; "
+		    "block counts are only approximate ***\n\n");
+
+	alloc = spa_get_alloc(spa);
+	space = spa_get_space(spa);
+
+	tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
+
+	if (tzb->zb_asize == alloc) {
+		(void) printf("\n\tNo leaks (block sum matches space"
+		    " maps exactly)\n");
+	} else {
+		(void) printf("block traversal size %llu != alloc %llu "
+		    "(leaked %lld)\n",
+		    (u_longlong_t)tzb->zb_asize,
+		    (u_longlong_t)alloc,
+		    (u_longlong_t)(alloc - tzb->zb_asize));
+		leaks = 1;
+	}
+
+	if (tzb->zb_count == 0)
+		return (2);
+
+	(void) printf("\n");
+	(void) printf("\tbp count:      %10llu\n",
+	    (u_longlong_t)tzb->zb_count);
+	(void) printf("\tbp logical:    %10llu\t avg: %6llu\n",
+	    (u_longlong_t)tzb->zb_lsize,
+	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
+	(void) printf("\tbp physical:   %10llu\t avg:"
+	    " %6llu\tcompression: %6.2f\n",
+	    (u_longlong_t)tzb->zb_psize,
+	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
+	    (double)tzb->zb_lsize / tzb->zb_psize);
+	(void) printf("\tbp allocated:  %10llu\t avg:"
+	    " %6llu\tcompression: %6.2f\n",
+	    (u_longlong_t)tzb->zb_asize,
+	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
+	    (double)tzb->zb_lsize / tzb->zb_asize);
+	(void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
+	    (u_longlong_t)alloc, 100.0 * alloc / space);
+
+	if (dump_opt['b'] >= 2) {
+		int l, t, level;
+		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
+		    "\t  avg\t comp\t%%Total\tType\n");
+
+		for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
+			char csize[6], lsize[6], psize[6], asize[6], avg[6];
+			char *typename;
+
+			typename = t == DMU_OT_DEFERRED ? "deferred free" :
+			    t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
+
+			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
+				(void) printf("%6s\t%5s\t%5s\t%5s"
+				    "\t%5s\t%5s\t%6s\t%s\n",
+				    "-",
+				    "-",
+				    "-",
+				    "-",
+				    "-",
+				    "-",
+				    "-",
+				    typename);
+				continue;
+			}
+
+			for (l = ZB_TOTAL - 1; l >= -1; l--) {
+				level = (l == -1 ? ZB_TOTAL : l);
+				zb = &zcb.zcb_type[level][t];
+
+				if (zb->zb_asize == 0)
+					continue;
+
+				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
+					continue;
+
+				if (level == 0 && zb->zb_asize ==
+				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
+					continue;
+
+				nicenum(zb->zb_count, csize);
+				nicenum(zb->zb_lsize, lsize);
+				nicenum(zb->zb_psize, psize);
+				nicenum(zb->zb_asize, asize);
+				nicenum(zb->zb_asize / zb->zb_count, avg);
+
+				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
+				    "\t%5.2f\t%6.2f\t",
+				    csize, lsize, psize, asize, avg,
+				    (double)zb->zb_lsize / zb->zb_psize,
+				    100.0 * zb->zb_asize / tzb->zb_asize);
+
+				if (level == ZB_TOTAL)
+					(void) printf("%s\n", typename);
+				else
+					(void) printf("    L%d %s\n",
+					    level, typename);
+			}
+		}
+	}
+
+	(void) printf("\n");
+
+	if (leaks)
+		return (2);
+
+	if (zcb.zcb_haderrors)
+		return (3);
+
+	return (0);
+}
+
+static void
+dump_zpool(spa_t *spa)
+{
+	dsl_pool_t *dp = spa_get_dsl(spa);
+	int rc = 0;
+
+	if (dump_opt['u'])
+		dump_uberblock(&spa->spa_uberblock);
+
+	if (dump_opt['d'] || dump_opt['i']) {
+		dump_dir(dp->dp_meta_objset);
+		if (dump_opt['d'] >= 3) {
+			dump_bplist(dp->dp_meta_objset,
+			    spa->spa_sync_bplist_obj, "Deferred frees");
+			dump_dtl(spa->spa_root_vdev, 0);
+			dump_metaslabs(spa);
+		}
+		(void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL,
+		    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+	}
+
+	if (dump_opt['b'] || dump_opt['c'])
+		rc = dump_block_stats(spa);
+
+	if (dump_opt['s'])
+		show_pool_stats(spa);
+
+	if (rc != 0)
+		exit(rc);
+}
+
+#define	ZDB_FLAG_CHECKSUM	0x0001
+#define	ZDB_FLAG_DECOMPRESS	0x0002
+#define	ZDB_FLAG_BSWAP		0x0004
+#define	ZDB_FLAG_GBH		0x0008
+#define	ZDB_FLAG_INDIRECT	0x0010
+#define	ZDB_FLAG_PHYS		0x0020
+#define	ZDB_FLAG_RAW		0x0040
+#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
+
+int flagbits[256];
+
+static void
+zdb_print_blkptr(blkptr_t *bp, int flags)
+{
+	dva_t *dva = bp->blk_dva;
+	int d;
+
+	if (flags & ZDB_FLAG_BSWAP)
+		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
+	/*
+	 * Super-ick warning:  This code is also duplicated in
+	 * cmd/mdb/common/modules/zfs/zfs.c .  Yeah, I hate code
+	 * replication, too.
+	 */
+	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
+		(void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
+		    (longlong_t)DVA_GET_VDEV(&dva[d]),
+		    (longlong_t)DVA_GET_OFFSET(&dva[d]));
+		(void) printf("\tDVA[%d]:       GANG: %-5s  GRID:  %04llx\t"
+		    "ASIZE: %llx\n", d,
+		    DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
+		    (longlong_t)DVA_GET_GRID(&dva[d]),
+		    (longlong_t)DVA_GET_ASIZE(&dva[d]));
+		(void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
+		    (u_longlong_t)DVA_GET_VDEV(&dva[d]),
+		    (longlong_t)DVA_GET_OFFSET(&dva[d]),
+		    (longlong_t)BP_GET_PSIZE(bp),
+		    BP_SHOULD_BYTESWAP(bp) ? "e" : "",
+		    !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
+		    "d" : "",
+		    DVA_GET_GANG(&dva[d]) ? "g" : "",
+		    BP_GET_COMPRESS(bp) != 0 ? "d" : "");
+	}
+	(void) printf("\tLSIZE:  %-16llx\t\tPSIZE: %llx\n",
+	    (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
+	(void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
+	    BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
+	    dmu_ot[BP_GET_TYPE(bp)].ot_name);
+	(void) printf("\tBIRTH:  %-16llx   LEVEL: %-2llu\tFILL:  %llx\n",
+	    (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
+	    (u_longlong_t)bp->blk_fill);
+	(void) printf("\tCKFUNC: %-16s\t\tCOMP:  %s\n",
+	    zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
+	    zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
+	(void) printf("\tCKSUM:  %llx:%llx:%llx:%llx\n",
+	    (u_longlong_t)bp->blk_cksum.zc_word[0],
+	    (u_longlong_t)bp->blk_cksum.zc_word[1],
+	    (u_longlong_t)bp->blk_cksum.zc_word[2],
+	    (u_longlong_t)bp->blk_cksum.zc_word[3]);
+}
+
+static void
+zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
+{
+	int i;
+
+	for (i = 0; i < nbps; i++)
+		zdb_print_blkptr(&bp[i], flags);
+}
+
+static void
+zdb_dump_gbh(void *buf, int flags)
+{
+	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
+}
+
+static void
+zdb_dump_block_raw(void *buf, uint64_t size, int flags)
+{
+	if (flags & ZDB_FLAG_BSWAP)
+		byteswap_uint64_array(buf, size);
+	(void) write(2, buf, size);
+}
+
+static void
+zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
+{
+	uint64_t *d = (uint64_t *)buf;
+	int nwords = size / sizeof (uint64_t);
+	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
+	int i, j;
+	char *hdr, *c;
+
+
+	if (do_bswap)
+		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
+	else
+		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
+
+	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
+
+	for (i = 0; i < nwords; i += 2) {
+		(void) printf("%06llx:  %016llx  %016llx  ",
+		    (u_longlong_t)(i * sizeof (uint64_t)),
+		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
+		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
+
+		c = (char *)&d[i];
+		for (j = 0; j < 2 * sizeof (uint64_t); j++)
+			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
+		(void) printf("\n");
+	}
+}
+
+/*
+ * There are two acceptable formats:
+ *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
+ *	child[.child]*    - For example: 0.1.1
+ *
+ * The second form can be used to specify arbitrary vdevs anywhere
+ * in the heirarchy.  For example, in a pool with a mirror of
+ * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
+ */
+static vdev_t *
+zdb_vdev_lookup(vdev_t *vdev, char *path)
+{
+	char *s, *p, *q;
+	int i;
+
+	if (vdev == NULL)
+		return (NULL);
+
+	/* First, assume the x.x.x.x format */
+	i = (int)strtoul(path, &s, 10);
+	if (s == path || (s && *s != '.' && *s != '\0'))
+		goto name;
+	if (i < 0 || i >= vdev->vdev_children)
+		return (NULL);
+
+	vdev = vdev->vdev_child[i];
+	if (*s == '\0')
+		return (vdev);
+	return (zdb_vdev_lookup(vdev, s+1));
+
+name:
+	for (i = 0; i < vdev->vdev_children; i++) {
+		vdev_t *vc = vdev->vdev_child[i];
+
+		if (vc->vdev_path == NULL) {
+			vc = zdb_vdev_lookup(vc, path);
+			if (vc == NULL)
+				continue;
+			else
+				return (vc);
+		}
+
+		p = strrchr(vc->vdev_path, '/');
+		p = p ? p + 1 : vc->vdev_path;
+		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
+
+		if (strcmp(vc->vdev_path, path) == 0)
+			return (vc);
+		if (strcmp(p, path) == 0)
+			return (vc);
+		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
+			return (vc);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Read a block from a pool and print it out.  The syntax of the
+ * block descriptor is:
+ *
+ *	pool:vdev_specifier:offset:size[:flags]
+ *
+ *	pool           - The name of the pool you wish to read from
+ *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
+ *	offset         - offset, in hex, in bytes
+ *	size           - Amount of data to read, in hex, in bytes
+ *	flags          - A string of characters specifying options
+ *		 b: Decode a blkptr at given offset within block
+ *		*c: Calculate and display checksums
+ *		*d: Decompress data before dumping
+ *		 e: Byteswap data before dumping
+ *		*g: Display data as a gang block header
+ *		*i: Display as an indirect block
+ *		 p: Do I/O to physical offset
+ *		 r: Dump raw data to stdout
+ *
+ *              * = not yet implemented
+ */
+static void
+zdb_read_block(char *thing, spa_t **spap)
+{
+	spa_t *spa = *spap;
+	int flags = 0;
+	uint64_t offset = 0, size = 0, blkptr_offset = 0;
+	zio_t *zio;
+	vdev_t *vd;
+	void *buf;
+	char *s, *p, *dup, *spa_name, *vdev, *flagstr;
+	int i, error, zio_flags;
+
+	dup = strdup(thing);
+	s = strtok(dup, ":");
+	spa_name = s ? s : "";
+	s = strtok(NULL, ":");
+	vdev = s ? s : "";
+	s = strtok(NULL, ":");
+	offset = strtoull(s ? s : "", NULL, 16);
+	s = strtok(NULL, ":");
+	size = strtoull(s ? s : "", NULL, 16);
+	s = strtok(NULL, ":");
+	flagstr = s ? s : "";
+
+	s = NULL;
+	if (size == 0)
+		s = "size must not be zero";
+	if (!IS_P2ALIGNED(size, DEV_BSIZE))
+		s = "size must be a multiple of sector size";
+	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
+		s = "offset must be a multiple of sector size";
+	if (s) {
+		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
+		free(dup);
+		return;
+	}
+
+	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
+		for (i = 0; flagstr[i]; i++) {
+			int bit = flagbits[(uchar_t)flagstr[i]];
+
+			if (bit == 0) {
+				(void) printf("***Invalid flag: %c\n",
+				    flagstr[i]);
+				continue;
+			}
+			flags |= bit;
+
+			/* If it's not something with an argument, keep going */
+			if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
+			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
+				continue;
+
+			p = &flagstr[i + 1];
+			if (bit == ZDB_FLAG_PRINT_BLKPTR)
+				blkptr_offset = strtoull(p, &p, 16);
+			if (*p != ':' && *p != '\0') {
+				(void) printf("***Invalid flag arg: '%s'\n", s);
+				free(dup);
+				return;
+			}
+		}
+	}
+
+	if (spa == NULL || spa->spa_name == NULL ||
+	    strcmp(spa->spa_name, spa_name)) {
+		if (spa && spa->spa_name)
+			spa_close(spa, (void *)zdb_read_block);
+		error = spa_open(spa_name, spap, (void *)zdb_read_block);
+		if (error)
+			fatal("Failed to open pool '%s': %s",
+			    spa_name, strerror(error));
+		spa = *spap;
+	}
+
+	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
+	if (vd == NULL) {
+		(void) printf("***Invalid vdev: %s\n", vdev);
+		free(dup);
+		return;
+	} else {
+		if (vd->vdev_path)
+			(void) printf("Found vdev: %s\n", vd->vdev_path);
+		else
+			(void) printf("Found vdev type: %s\n",
+			    vd->vdev_ops->vdev_op_type);
+	}
+
+	buf = umem_alloc(size, UMEM_NOFAIL);
+
+	zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
+	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK;
+
+	if (flags & ZDB_FLAG_PHYS)
+		zio_flags |= ZIO_FLAG_PHYSICAL;
+
+	zio = zio_root(spa, NULL, NULL, 0);
+	/* XXX todo - cons up a BP so RAID-Z will be happy */
+	zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
+	    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
+	error = zio_wait(zio);
+
+	if (error) {
+		(void) printf("Read of %s failed, error: %d\n", thing, error);
+		goto out;
+	}
+
+	if (flags & ZDB_FLAG_PRINT_BLKPTR)
+		zdb_print_blkptr((blkptr_t *)(void *)
+		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
+	else if (flags & ZDB_FLAG_RAW)
+		zdb_dump_block_raw(buf, size, flags);
+	else if (flags & ZDB_FLAG_INDIRECT)
+		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
+		    flags);
+	else if (flags & ZDB_FLAG_GBH)
+		zdb_dump_gbh(buf, flags);
+	else
+		zdb_dump_block(thing, buf, size, flags);
+
+out:
+	umem_free(buf, size);
+	free(dup);
+}
+
+int
+main(int argc, char **argv)
+{
+	int i, c;
+	struct rlimit rl = { 1024, 1024 };
+	spa_t *spa;
+	objset_t *os = NULL;
+	char *endstr;
+	int dump_all = 1;
+	int verbose = 0;
+	int error;
+	int flag, set;
+
+	(void) setrlimit(RLIMIT_NOFILE, &rl);
+	(void) enable_extended_FILE_stdio(-1, -1);
+
+	dprintf_setup(&argc, argv);
+
+	while ((c = getopt(argc, argv, "udibcsvCLO:B:UlR")) != -1) {
+		switch (c) {
+		case 'u':
+		case 'd':
+		case 'i':
+		case 'b':
+		case 'c':
+		case 's':
+		case 'C':
+		case 'l':
+		case 'R':
+			dump_opt[c]++;
+			dump_all = 0;
+			break;
+		case 'L':
+			dump_opt[c]++;
+			break;
+		case 'O':
+			endstr = optarg;
+			if (endstr[0] == '!') {
+				endstr++;
+				set = 0;
+			} else {
+				set = 1;
+			}
+			if (strcmp(endstr, "post") == 0) {
+				flag = ADVANCE_PRE;
+				set = !set;
+			} else if (strcmp(endstr, "pre") == 0) {
+				flag = ADVANCE_PRE;
+			} else if (strcmp(endstr, "prune") == 0) {
+				flag = ADVANCE_PRUNE;
+			} else if (strcmp(endstr, "data") == 0) {
+				flag = ADVANCE_DATA;
+			} else if (strcmp(endstr, "holes") == 0) {
+				flag = ADVANCE_HOLES;
+			} else {
+				usage();
+			}
+			if (set)
+				zdb_advance |= flag;
+			else
+				zdb_advance &= ~flag;
+			break;
+		case 'B':
+			endstr = optarg - 1;
+			zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
+			zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
+			zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
+			zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
+			(void) printf("simulating bad block "
+			    "<%llu, %llu, %lld, %llx>\n",
+			    (u_longlong_t)zdb_noread.zb_objset,
+			    (u_longlong_t)zdb_noread.zb_object,
+			    (u_longlong_t)zdb_noread.zb_level,
+			    (u_longlong_t)zdb_noread.zb_blkid);
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'U':
+			spa_config_dir = "/tmp";
+			break;
+		default:
+			usage();
+			break;
+		}
+	}
+
+	kernel_init(FREAD);
+
+	/*
+	 * Disable vdev caching.  If we don't do this, live pool traversal
+	 * won't make progress because it will never see disk updates.
+	 */
+	zfs_vdev_cache_size = 0;
+
+	for (c = 0; c < 256; c++) {
+		if (dump_all && c != 'L' && c != 'l' && c != 'R')
+			dump_opt[c] = 1;
+		if (dump_opt[c])
+			dump_opt[c] += verbose;
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 1) {
+		if (dump_opt['C']) {
+			dump_config(NULL);
+			return (0);
+		}
+		usage();
+	}
+
+	if (dump_opt['l']) {
+		dump_label(argv[0]);
+		return (0);
+	}
+
+	if (dump_opt['R']) {
+		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
+		flagbits['c'] = ZDB_FLAG_CHECKSUM;
+		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
+		flagbits['e'] = ZDB_FLAG_BSWAP;
+		flagbits['g'] = ZDB_FLAG_GBH;
+		flagbits['i'] = ZDB_FLAG_INDIRECT;
+		flagbits['p'] = ZDB_FLAG_PHYS;
+		flagbits['r'] = ZDB_FLAG_RAW;
+
+		spa = NULL;
+		while (argv[0]) {
+			zdb_read_block(argv[0], &spa);
+			argv++;
+			argc--;
+		}
+		if (spa)
+			spa_close(spa, (void *)zdb_read_block);
+		return (0);
+	}
+
+	if (dump_opt['C'])
+		dump_config(argv[0]);
+
+	if (strchr(argv[0], '/') != NULL) {
+		error = dmu_objset_open(argv[0], DMU_OST_ANY,
+		    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
+	} else {
+		error = spa_open(argv[0], &spa, FTAG);
+	}
+
+	if (error)
+		fatal("can't open %s: %s", argv[0], strerror(error));
+
+	argv++;
+	if (--argc > 0) {
+		zopt_objects = argc;
+		zopt_object = calloc(zopt_objects, sizeof (uint64_t));
+		for (i = 0; i < zopt_objects; i++) {
+			errno = 0;
+			zopt_object[i] = strtoull(argv[i], NULL, 0);
+			if (zopt_object[i] == 0 && errno != 0)
+				fatal("bad object number %s: %s",
+				    argv[i], strerror(errno));
+		}
+	}
+
+	if (os != NULL) {
+		dump_dir(os);
+		dmu_objset_close(os);
+	} else {
+		dump_zpool(spa);
+		spa_close(spa, FTAG);
+	}
+
+	kernel_fini();
+
+	return (0);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zdb/zdb.8
@@ -0,0 +1,93 @@
+'\" te
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").  
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\" Copyright (c) 2004, Sun Microsystems, Inc. All Rights Reserved.
+.TH zdb 1M "31 Oct 2005" "SunOS 5.11" "System Administration Commands"
+.SH NAME
+zdb \- ZFS debugger
+.SH SYNOPSIS
+.LP
+.nf
+\fBzdb\fR \fIpool\fR
+.fi
+
+.SH DESCRIPTION
+.LP
+The \fBzdb\fR command is used by support engineers to diagnose failures and gather statistics. Since the \fBZFS\fR file system is always consistent on disk and is self-repairing, \fBzdb\fR should only be run under the direction by a support engineer.
+.LP
+If no arguments are specified, \fBzdb\fR, performs basic consistency checks on the pool and associated datasets, and report any problems detected.
+.LP
+Any options supported by this command are internal to Sun and subject to change at any time.
+.SH EXIT STATUS
+.LP
+The following exit values are returned:
+.sp
+.ne 2
+.mk
+.na
+\fB\fB0\fR\fR
+.ad
+.RS 5n
+.rt  
+The pool is consistent.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB1\fR\fR
+.ad
+.RS 5n
+.rt  
+An error was detected.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB2\fR\fR
+.ad
+.RS 5n
+.rt  
+Invalid command line options were specified.
+.RE
+
+.SH ATTRIBUTES
+.LP
+See \fBattributes\fR(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+tab() box;
+cw(2.75i) |cw(2.75i) 
+lw(2.75i) |lw(2.75i) 
+.
+ATTRIBUTE TYPEATTRIBUTE VALUE
+_
+AvailabilitySUNWzfsu
+_
+Interface StabilityUnstable
+.TE
+
+.SH SEE ALSO
+.LP
+\fBzfs\fR(1M), \fBzpool\fR(1M), \fBattributes\fR(5)
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZFS_UTIL_H
+#define	_ZFS_UTIL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libzfs.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+void * safe_malloc(size_t size);
+libzfs_handle_t *g_zfs;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_UTIL_H */
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -0,0 +1,3253 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <libuutil.h>
+#include <locale.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include <libzfs.h>
+
+#include "zfs_iter.h"
+#include "zfs_util.h"
+
+libzfs_handle_t *g_zfs;
+
+static FILE *mnttab_file;
+
+static int zfs_do_clone(int argc, char **argv);
+static int zfs_do_create(int argc, char **argv);
+static int zfs_do_destroy(int argc, char **argv);
+static int zfs_do_get(int argc, char **argv);
+static int zfs_do_inherit(int argc, char **argv);
+static int zfs_do_list(int argc, char **argv);
+static int zfs_do_mount(int argc, char **argv);
+static int zfs_do_rename(int argc, char **argv);
+static int zfs_do_rollback(int argc, char **argv);
+static int zfs_do_set(int argc, char **argv);
+static int zfs_do_snapshot(int argc, char **argv);
+static int zfs_do_unmount(int argc, char **argv);
+static int zfs_do_share(int argc, char **argv);
+static int zfs_do_unshare(int argc, char **argv);
+static int zfs_do_send(int argc, char **argv);
+static int zfs_do_receive(int argc, char **argv);
+static int zfs_do_promote(int argc, char **argv);
+static int zfs_do_jail(int argc, char **argv);
+static int zfs_do_unjail(int argc, char **argv);
+
+/*
+ * These libumem hooks provide a reasonable set of defaults for the allocator's
+ * debugging facilities.
+ */
+const char *
+_umem_debug_init(void)
+{
+	return ("default,verbose"); /* $UMEM_DEBUG setting */
+}
+
+const char *
+_umem_logging_init(void)
+{
+	return ("fail,contents"); /* $UMEM_LOGGING setting */
+}
+
+typedef enum {
+	HELP_CLONE,
+	HELP_CREATE,
+	HELP_DESTROY,
+	HELP_GET,
+	HELP_INHERIT,
+	HELP_JAIL,
+	HELP_UNJAIL,
+	HELP_LIST,
+	HELP_MOUNT,
+	HELP_PROMOTE,
+	HELP_RECEIVE,
+	HELP_RENAME,
+	HELP_ROLLBACK,
+	HELP_SEND,
+	HELP_SET,
+	HELP_SHARE,
+	HELP_SNAPSHOT,
+	HELP_UNMOUNT,
+	HELP_UNSHARE
+} zfs_help_t;
+
+typedef struct zfs_command {
+	const char	*name;
+	int		(*func)(int argc, char **argv);
+	zfs_help_t	usage;
+} zfs_command_t;
+
+/*
+ * Master command table.  Each ZFS command has a name, associated function, and
+ * usage message.  The usage messages need to be internationalized, so we have
+ * to have a function to return the usage message based on a command index.
+ *
+ * These commands are organized according to how they are displayed in the usage
+ * message.  An empty command (one with a NULL name) indicates an empty line in
+ * the generic usage message.
+ */
+static zfs_command_t command_table[] = {
+	{ "create",	zfs_do_create,		HELP_CREATE		},
+	{ "destroy",	zfs_do_destroy,		HELP_DESTROY		},
+	{ NULL },
+	{ "snapshot",	zfs_do_snapshot,	HELP_SNAPSHOT		},
+	{ "rollback",	zfs_do_rollback,	HELP_ROLLBACK		},
+	{ "clone",	zfs_do_clone,		HELP_CLONE		},
+	{ "promote",	zfs_do_promote,		HELP_PROMOTE		},
+	{ "rename",	zfs_do_rename,		HELP_RENAME		},
+	{ NULL },
+	{ "list",	zfs_do_list,		HELP_LIST		},
+	{ NULL },
+	{ "set",	zfs_do_set,		HELP_SET		},
+	{ "get", 	zfs_do_get,		HELP_GET		},
+	{ "inherit",	zfs_do_inherit,		HELP_INHERIT		},
+	{ NULL },
+	{ "mount",	zfs_do_mount,		HELP_MOUNT		},
+	{ NULL },
+	{ "unmount",	zfs_do_unmount,		HELP_UNMOUNT		},
+	{ NULL },
+	{ "share",	zfs_do_share,		HELP_SHARE		},
+	{ NULL },
+	{ "unshare",	zfs_do_unshare,		HELP_UNSHARE		},
+	{ NULL },
+	{ "send",	zfs_do_send,		HELP_SEND		},
+	{ "receive",	zfs_do_receive,		HELP_RECEIVE		},
+	{ NULL },
+	{ "jail",	zfs_do_jail,		HELP_JAIL		},
+	{ "unjail",	zfs_do_unjail,		HELP_UNJAIL		},
+};
+
+#define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
+
+zfs_command_t *current_command;
+
+static const char *
+get_usage(zfs_help_t idx)
+{
+	switch (idx) {
+	case HELP_CLONE:
+		return (gettext("\tclone <snapshot> <filesystem|volume>\n"));
+	case HELP_CREATE:
+		return (gettext("\tcreate [[-o property=value] ... ] "
+		    "<filesystem>\n"
+		    "\tcreate [-s] [-b blocksize] [[-o property=value] ...]\n"
+		    "\t    -V <size> <volume>\n"));
+	case HELP_DESTROY:
+		return (gettext("\tdestroy [-rRf] "
+		    "<filesystem|volume|snapshot>\n"));
+	case HELP_GET:
+		return (gettext("\tget [-rHp] [-o field[,field]...] "
+		    "[-s source[,source]...]\n"
+		    "\t    <all | property[,property]...> "
+		    "[filesystem|volume|snapshot] ...\n"));
+	case HELP_INHERIT:
+		return (gettext("\tinherit [-r] <property> "
+		    "<filesystem|volume> ...\n"));
+	case HELP_JAIL:
+		return (gettext("\tjail <jailid> <filesystem>\n"));
+	case HELP_UNJAIL:
+		return (gettext("\tunjail <jailid> <filesystem>\n"));
+	case HELP_LIST:
+		return (gettext("\tlist [-rH] [-o property[,property]...] "
+		    "[-t type[,type]...]\n"
+		    "\t    [-s property [-s property]...]"
+		    " [-S property [-S property]...]\n"
+		    "\t    [filesystem|volume|snapshot] ...\n"));
+	case HELP_MOUNT:
+		return (gettext("\tmount\n"
+		    "\tmount [-o opts] [-O] -a\n"
+		    "\tmount [-o opts] [-O] <filesystem>\n"));
+	case HELP_PROMOTE:
+		return (gettext("\tpromote <clone filesystem>\n"));
+	case HELP_RECEIVE:
+		return (gettext("\treceive [-vnF] <filesystem|volume|"
+		"snapshot>\n"
+		"\treceive [-vnF] -d <filesystem>\n"));
+	case HELP_RENAME:
+		return (gettext("\trename <filesystem|volume|snapshot> "
+		    "<filesystem|volume|snapshot>\n"
+		    "\trename -r <snapshot> <snapshot>"));
+	case HELP_ROLLBACK:
+		return (gettext("\trollback [-rRf] <snapshot>\n"));
+	case HELP_SEND:
+		return (gettext("\tsend [-i <snapshot>] <snapshot>\n"));
+	case HELP_SET:
+		return (gettext("\tset <property=value> "
+		    "<filesystem|volume> ...\n"));
+	case HELP_SHARE:
+		return (gettext("\tshare -a\n"
+		    "\tshare <filesystem>\n"));
+	case HELP_SNAPSHOT:
+		return (gettext("\tsnapshot [-r] "
+		    "<filesystem at name|volume at name>\n"));
+	case HELP_UNMOUNT:
+		return (gettext("\tunmount [-f] -a\n"
+		    "\tunmount [-f] <filesystem|mountpoint>\n"));
+	case HELP_UNSHARE:
+		return (gettext("\tunshare [-f] -a\n"
+		    "\tunshare [-f] <filesystem|mountpoint>\n"));
+	}
+
+	abort();
+	/* NOTREACHED */
+}
+
+/*
+ * Utility function to guarantee malloc() success.
+ */
+void *
+safe_malloc(size_t size)
+{
+	void *data;
+
+	if ((data = calloc(1, size)) == NULL) {
+		(void) fprintf(stderr, "internal error: out of memory\n");
+		exit(1);
+	}
+
+	return (data);
+}
+
+/*
+ * Callback routinue that will print out information for each of the
+ * the properties.
+ */
+static zfs_prop_t
+usage_prop_cb(zfs_prop_t prop, void *cb)
+{
+	FILE *fp = cb;
+
+	(void) fprintf(fp, "\t%-13s  ", zfs_prop_to_name(prop));
+
+	if (zfs_prop_readonly(prop))
+		(void) fprintf(fp, "  NO    ");
+	else
+		(void) fprintf(fp, " YES    ");
+
+	if (zfs_prop_inheritable(prop))
+		(void) fprintf(fp, "  YES   ");
+	else
+		(void) fprintf(fp, "   NO   ");
+
+	if (zfs_prop_values(prop) == NULL)
+		(void) fprintf(fp, "-\n");
+	else
+		(void) fprintf(fp, "%s\n", zfs_prop_values(prop));
+
+	return (ZFS_PROP_CONT);
+}
+
+/*
+ * Display usage message.  If we're inside a command, display only the usage for
+ * that command.  Otherwise, iterate over the entire command table and display
+ * a complete usage message.
+ */
+static void
+usage(boolean_t requested)
+{
+	int i;
+	boolean_t show_properties = B_FALSE;
+	FILE *fp = requested ? stdout : stderr;
+
+	if (current_command == NULL) {
+
+		(void) fprintf(fp, gettext("usage: zfs command args ...\n"));
+		(void) fprintf(fp,
+		    gettext("where 'command' is one of the following:\n\n"));
+
+		for (i = 0; i < NCOMMAND; i++) {
+			if (command_table[i].name == NULL)
+				(void) fprintf(fp, "\n");
+			else
+				(void) fprintf(fp, "%s",
+				    get_usage(command_table[i].usage));
+		}
+
+		(void) fprintf(fp, gettext("\nEach dataset is of the form: "
+		    "pool/[dataset/]*dataset[@name]\n"));
+	} else {
+		(void) fprintf(fp, gettext("usage:\n"));
+		(void) fprintf(fp, "%s", get_usage(current_command->usage));
+	}
+
+	if (current_command != NULL &&
+	    (strcmp(current_command->name, "set") == 0 ||
+	    strcmp(current_command->name, "get") == 0 ||
+	    strcmp(current_command->name, "inherit") == 0 ||
+	    strcmp(current_command->name, "list") == 0))
+		show_properties = B_TRUE;
+
+	if (show_properties) {
+
+		(void) fprintf(fp,
+		    gettext("\nThe following properties are supported:\n"));
+
+		(void) fprintf(fp, "\n\t%-13s  %s  %s   %s\n\n",
+		    "PROPERTY", "EDIT", "INHERIT", "VALUES");
+
+		/* Iterate over all properties */
+		(void) zfs_prop_iter(usage_prop_cb, fp, B_FALSE);
+
+		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
+		    "with standard units such as K, M, G, etc.\n"));
+		(void) fprintf(fp, gettext("\n\nUser-defined properties can "
+		    "be specified by using a name containing a colon (:).\n"));
+	} else {
+		/*
+		 * TRANSLATION NOTE:
+		 * "zfs set|get" must not be localised this is the
+		 * command name and arguments.
+		 */
+		(void) fprintf(fp,
+		    gettext("\nFor the property list, run: zfs set|get\n"));
+	}
+
+	/*
+	 * See comments at end of main().
+	 */
+	if (getenv("ZFS_ABORT") != NULL) {
+		(void) printf("dumping core by request\n");
+		abort();
+	}
+
+	exit(requested ? 0 : 2);
+}
+
+/*
+ * zfs clone <fs, snap, vol> fs
+ *
+ * Given an existing dataset, create a writable copy whose initial contents
+ * are the same as the source.  The newly created dataset maintains a
+ * dependency on the original; the original cannot be destroyed so long as
+ * the clone exists.
+ */
+static int
+zfs_do_clone(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	int ret;
+
+	/* check options */
+	if (argc > 1 && argv[1][0] == '-') {
+		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+		    argv[1][1]);
+		usage(B_FALSE);
+	}
+
+	/* check number of arguments */
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing source dataset "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing target dataset "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	/* open the source dataset */
+	if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
+		return (1);
+
+	/* pass to libzfs */
+	ret = zfs_clone(zhp, argv[2], NULL);
+
+	/* create the mountpoint if necessary */
+	if (ret == 0) {
+		zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY);
+		if (clone != NULL) {
+			if ((ret = zfs_mount(clone, NULL, 0)) == 0)
+				ret = zfs_share(clone);
+			zfs_close(clone);
+		}
+		zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
+	}
+
+	zfs_close(zhp);
+
+	return (ret == 0 ? 0 : 1);
+}
+
+/*
+ * zfs create [-o prop=value] ... fs
+ * zfs create [-s] [-b blocksize] [-o prop=value] ... -V vol size
+ *
+ * Create a new dataset.  This command can be used to create filesystems
+ * and volumes.  Snapshot creation is handled by 'zfs snapshot'.
+ * For volumes, the user must specify a size to be used.
+ *
+ * The '-s' flag applies only to volumes, and indicates that we should not try
+ * to set the reservation for this volume.  By default we set a reservation
+ * equal to the size for any volume.
+ */
+static int
+zfs_do_create(int argc, char **argv)
+{
+	zfs_type_t type = ZFS_TYPE_FILESYSTEM;
+	zfs_handle_t *zhp = NULL;
+	uint64_t volsize;
+	int c;
+	boolean_t noreserve = B_FALSE;
+	int ret = 1;
+	nvlist_t *props = NULL;
+	uint64_t intval;
+	char *propname;
+	char *propval = NULL;
+	char *strval;
+
+	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+		(void) fprintf(stderr, gettext("internal error: "
+		    "out of memory\n"));
+		return (1);
+	}
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":V:b:so:")) != -1) {
+		switch (c) {
+		case 'V':
+			type = ZFS_TYPE_VOLUME;
+			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
+				(void) fprintf(stderr, gettext("bad volume "
+				    "size '%s': %s\n"), optarg,
+				    libzfs_error_description(g_zfs));
+				goto error;
+			}
+
+			if (nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+			    intval) != 0) {
+				(void) fprintf(stderr, gettext("internal "
+				    "error: out of memory\n"));
+				goto error;
+			}
+			volsize = intval;
+			break;
+		case 'b':
+			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
+				(void) fprintf(stderr, gettext("bad volume "
+				    "block size '%s': %s\n"), optarg,
+				    libzfs_error_description(g_zfs));
+				goto error;
+			}
+
+			if (nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+			    intval) != 0) {
+				(void) fprintf(stderr, gettext("internal "
+				    "error: out of memory\n"));
+				goto error;
+			}
+			break;
+		case 'o':
+			propname = optarg;
+			if ((propval = strchr(propname, '=')) == NULL) {
+				(void) fprintf(stderr, gettext("missing "
+				    "'=' for -o option\n"));
+				goto error;
+			}
+			*propval = '\0';
+			propval++;
+			if (nvlist_lookup_string(props, propname,
+			    &strval) == 0) {
+				(void) fprintf(stderr, gettext("property '%s' "
+				    "specified multiple times\n"), propname);
+				goto error;
+			}
+			if (nvlist_add_string(props, propname, propval) != 0) {
+				(void) fprintf(stderr, gettext("internal "
+				    "error: out of memory\n"));
+				goto error;
+			}
+			break;
+		case 's':
+			noreserve = B_TRUE;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing size "
+			    "argument\n"));
+			goto badusage;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			goto badusage;
+		}
+	}
+
+	if (noreserve && type != ZFS_TYPE_VOLUME) {
+		(void) fprintf(stderr, gettext("'-s' can only be used when "
+		    "creating a volume\n"));
+		goto badusage;
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc == 0) {
+		(void) fprintf(stderr, gettext("missing %s argument\n"),
+		    zfs_type_to_name(type));
+		goto badusage;
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		goto badusage;
+	}
+
+	if (type == ZFS_TYPE_VOLUME && !noreserve &&
+	    nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_RESERVATION),
+	    &strval) != 0) {
+		if (nvlist_add_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
+		    volsize) != 0) {
+			(void) fprintf(stderr, gettext("internal "
+			    "error: out of memory\n"));
+			nvlist_free(props);
+			return (1);
+		}
+	}
+
+	/* pass to libzfs */
+	if (zfs_create(g_zfs, argv[0], type, props) != 0)
+		goto error;
+
+	if (propval != NULL)
+		*(propval - 1) = '=';
+	zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
+	    B_FALSE, B_FALSE);
+
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+		goto error;
+
+	/*
+	 * Mount and/or share the new filesystem as appropriate.  We provide a
+	 * verbose error message to let the user know that their filesystem was
+	 * in fact created, even if we failed to mount or share it.
+	 */
+	if (zfs_mount(zhp, NULL, 0) != 0) {
+		(void) fprintf(stderr, gettext("filesystem successfully "
+		    "created, but not mounted\n"));
+		ret = 1;
+	} else if (zfs_share(zhp) != 0) {
+		(void) fprintf(stderr, gettext("filesystem successfully "
+		    "created, but not shared\n"));
+		ret = 1;
+	} else {
+		ret = 0;
+	}
+
+error:
+	if (zhp)
+		zfs_close(zhp);
+	nvlist_free(props);
+	return (ret);
+badusage:
+	nvlist_free(props);
+	usage(B_FALSE);
+	return (2);
+}
+
+/*
+ * zfs destroy [-rf] <fs, snap, vol>
+ *
+ * 	-r	Recursively destroy all children
+ * 	-R	Recursively destroy all dependents, including clones
+ * 	-f	Force unmounting of any dependents
+ *
+ * Destroys the given dataset.  By default, it will unmount any filesystems,
+ * and refuse to destroy a dataset that has any dependents.  A dependent can
+ * either be a child, or a clone of a child.
+ */
+typedef struct destroy_cbdata {
+	boolean_t	cb_first;
+	int		cb_force;
+	int		cb_recurse;
+	int		cb_error;
+	int		cb_needforce;
+	int		cb_doclones;
+	boolean_t	cb_closezhp;
+	zfs_handle_t	*cb_target;
+	char		*cb_snapname;
+} destroy_cbdata_t;
+
+/*
+ * Check for any dependents based on the '-r' or '-R' flags.
+ */
+static int
+destroy_check_dependent(zfs_handle_t *zhp, void *data)
+{
+	destroy_cbdata_t *cbp = data;
+	const char *tname = zfs_get_name(cbp->cb_target);
+	const char *name = zfs_get_name(zhp);
+
+	if (strncmp(tname, name, strlen(tname)) == 0 &&
+	    (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) {
+		/*
+		 * This is a direct descendant, not a clone somewhere else in
+		 * the hierarchy.
+		 */
+		if (cbp->cb_recurse)
+			goto out;
+
+		if (cbp->cb_first) {
+			(void) fprintf(stderr, gettext("cannot destroy '%s': "
+			    "%s has children\n"),
+			    zfs_get_name(cbp->cb_target),
+			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
+			(void) fprintf(stderr, gettext("use '-r' to destroy "
+			    "the following datasets:\n"));
+			cbp->cb_first = B_FALSE;
+			cbp->cb_error = 1;
+		}
+
+		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
+	} else {
+		/*
+		 * This is a clone.  We only want to report this if the '-r'
+		 * wasn't specified, or the target is a snapshot.
+		 */
+		if (!cbp->cb_recurse &&
+		    zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT)
+			goto out;
+
+		if (cbp->cb_first) {
+			(void) fprintf(stderr, gettext("cannot destroy '%s': "
+			    "%s has dependent clones\n"),
+			    zfs_get_name(cbp->cb_target),
+			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
+			(void) fprintf(stderr, gettext("use '-R' to destroy "
+			    "the following datasets:\n"));
+			cbp->cb_first = B_FALSE;
+			cbp->cb_error = 1;
+		}
+
+		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
+	}
+
+out:
+	zfs_close(zhp);
+	return (0);
+}
+
+static int
+destroy_callback(zfs_handle_t *zhp, void *data)
+{
+	destroy_cbdata_t *cbp = data;
+
+	/*
+	 * Ignore pools (which we've already flagged as an error before getting
+	 * here.
+	 */
+	if (strchr(zfs_get_name(zhp), '/') == NULL &&
+	    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/*
+	 * Bail out on the first error.
+	 */
+	if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
+	    zfs_destroy(zhp) != 0) {
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+static int
+destroy_snap_clones(zfs_handle_t *zhp, void *arg)
+{
+	destroy_cbdata_t *cbp = arg;
+	char thissnap[MAXPATHLEN];
+	zfs_handle_t *szhp;
+	boolean_t closezhp = cbp->cb_closezhp;
+	int rv;
+
+	(void) snprintf(thissnap, sizeof (thissnap),
+	    "%s@%s", zfs_get_name(zhp), cbp->cb_snapname);
+
+	libzfs_print_on_error(g_zfs, B_FALSE);
+	szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT);
+	libzfs_print_on_error(g_zfs, B_TRUE);
+	if (szhp) {
+		/*
+		 * Destroy any clones of this snapshot
+		 */
+		if (zfs_iter_dependents(szhp, B_FALSE, destroy_callback,
+		    cbp) != 0) {
+			zfs_close(szhp);
+			if (closezhp)
+				zfs_close(zhp);
+			return (-1);
+		}
+		zfs_close(szhp);
+	}
+
+	cbp->cb_closezhp = B_TRUE;
+	rv = zfs_iter_filesystems(zhp, destroy_snap_clones, arg);
+	if (closezhp)
+		zfs_close(zhp);
+	return (rv);
+}
+
+static int
+zfs_do_destroy(int argc, char **argv)
+{
+	destroy_cbdata_t cb = { 0 };
+	int c;
+	zfs_handle_t *zhp;
+	char *cp;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "frR")) != -1) {
+		switch (c) {
+		case 'f':
+			cb.cb_force = 1;
+			break;
+		case 'r':
+			cb.cb_recurse = 1;
+			break;
+		case 'R':
+			cb.cb_recurse = 1;
+			cb.cb_doclones = 1;
+			break;
+		case '?':
+		default:
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc == 0) {
+		(void) fprintf(stderr, gettext("missing path argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	/*
+	 * If we are doing recursive destroy of a snapshot, then the
+	 * named snapshot may not exist.  Go straight to libzfs.
+	 */
+	if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) {
+		int ret;
+
+		*cp = '\0';
+		if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+			return (1);
+		*cp = '@';
+		cp++;
+
+		if (cb.cb_doclones) {
+			cb.cb_snapname = cp;
+			if (destroy_snap_clones(zhp, &cb) != 0) {
+				zfs_close(zhp);
+				return (1);
+			}
+		}
+
+		ret = zfs_destroy_snaps(zhp, cp);
+		zfs_close(zhp);
+		if (ret) {
+			(void) fprintf(stderr,
+			    gettext("no snapshots destroyed\n"));
+		} else {
+			zpool_log_history(g_zfs, argc + optind, argv - optind,
+			    argv[0], B_FALSE, B_FALSE);
+		}
+		return (ret != 0);
+	}
+
+
+	/* Open the given dataset */
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+		return (1);
+
+	cb.cb_target = zhp;
+
+	/*
+	 * Perform an explicit check for pools before going any further.
+	 */
+	if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL &&
+	    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
+		(void) fprintf(stderr, gettext("cannot destroy '%s': "
+		    "operation does not apply to pools\n"),
+		    zfs_get_name(zhp));
+		(void) fprintf(stderr, gettext("use 'zfs destroy -r "
+		    "%s' to destroy all datasets in the pool\n"),
+		    zfs_get_name(zhp));
+		(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
+		    "to destroy the pool itself\n"), zfs_get_name(zhp));
+		zfs_close(zhp);
+		return (1);
+	}
+
+	/*
+	 * Check for any dependents and/or clones.
+	 */
+	cb.cb_first = B_TRUE;
+	if (!cb.cb_doclones &&
+	    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
+	    &cb) != 0) {
+		zfs_close(zhp);
+		return (1);
+	}
+
+
+	if (cb.cb_error ||
+	    zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
+		zfs_close(zhp);
+		return (1);
+	}
+
+	/*
+	 * Do the real thing.  The callback will close the handle regardless of
+	 * whether it succeeds or not.
+	 */
+	if (destroy_callback(zhp, &cb) != 0)
+		return (1);
+
+	zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
+	    B_FALSE, B_FALSE);
+
+	return (0);
+}
+
+/*
+ * zfs get [-rHp] [-o field[,field]...] [-s source[,source]...]
+ * 	< all | property[,property]... > < fs | snap | vol > ...
+ *
+ *	-r	recurse over any child datasets
+ *	-H	scripted mode.  Headers are stripped, and fields are separated
+ *		by tabs instead of spaces.
+ *	-o	Set of fields to display.  One of "name,property,value,source".
+ *		Default is all four.
+ *	-s	Set of sources to allow.  One of
+ *		"local,default,inherited,temporary,none".  Default is all
+ *		five.
+ *	-p	Display values in parsable (literal) format.
+ *
+ *  Prints properties for the given datasets.  The user can control which
+ *  columns to display as well as which property types to allow.
+ */
+
+/*
+ * Invoked to display the properties for a single dataset.
+ */
+static int
+get_callback(zfs_handle_t *zhp, void *data)
+{
+	char buf[ZFS_MAXPROPLEN];
+	zfs_source_t sourcetype;
+	char source[ZFS_MAXNAMELEN];
+	libzfs_get_cbdata_t *cbp = data;
+	nvlist_t *userprop = zfs_get_user_props(zhp);
+	zfs_proplist_t *pl = cbp->cb_proplist;
+	nvlist_t *propval;
+	char *strval;
+	char *sourceval;
+
+	for (; pl != NULL; pl = pl->pl_next) {
+		/*
+		 * Skip the special fake placeholder.  This will also skip over
+		 * the name property when 'all' is specified.
+		 */
+		if (pl->pl_prop == ZFS_PROP_NAME &&
+		    pl == cbp->cb_proplist)
+			continue;
+
+		if (pl->pl_prop != ZFS_PROP_INVAL) {
+			if (zfs_prop_get(zhp, pl->pl_prop, buf,
+			    sizeof (buf), &sourcetype, source,
+			    sizeof (source),
+			    cbp->cb_literal) != 0) {
+				if (pl->pl_all)
+					continue;
+				if (!zfs_prop_valid_for_type(pl->pl_prop,
+				    ZFS_TYPE_ANY)) {
+					(void) fprintf(stderr,
+					    gettext("No such property '%s'\n"),
+					    zfs_prop_to_name(pl->pl_prop));
+					continue;
+				}
+				sourcetype = ZFS_SRC_NONE;
+				(void) strlcpy(buf, "-", sizeof (buf));
+			}
+
+			libzfs_print_one_property(zfs_get_name(zhp), cbp,
+			    zfs_prop_to_name(pl->pl_prop),
+			    buf, sourcetype, source);
+		} else {
+			if (nvlist_lookup_nvlist(userprop,
+			    pl->pl_user_prop, &propval) != 0) {
+				if (pl->pl_all)
+					continue;
+				sourcetype = ZFS_SRC_NONE;
+				strval = "-";
+			} else {
+				verify(nvlist_lookup_string(propval,
+				    ZFS_PROP_VALUE, &strval) == 0);
+				verify(nvlist_lookup_string(propval,
+				    ZFS_PROP_SOURCE, &sourceval) == 0);
+
+				if (strcmp(sourceval,
+				    zfs_get_name(zhp)) == 0) {
+					sourcetype = ZFS_SRC_LOCAL;
+				} else {
+					sourcetype = ZFS_SRC_INHERITED;
+					(void) strlcpy(source,
+					    sourceval, sizeof (source));
+				}
+			}
+
+			libzfs_print_one_property(zfs_get_name(zhp), cbp,
+			    pl->pl_user_prop, strval, sourcetype,
+			    source);
+		}
+	}
+
+	return (0);
+}
+
+static int
+zfs_do_get(int argc, char **argv)
+{
+	libzfs_get_cbdata_t cb = { 0 };
+	boolean_t recurse = B_FALSE;
+	int i, c;
+	char *value, *fields;
+	int ret;
+	zfs_proplist_t fake_name = { 0 };
+
+	/*
+	 * Set up default columns and sources.
+	 */
+	cb.cb_sources = ZFS_SRC_ALL;
+	cb.cb_columns[0] = GET_COL_NAME;
+	cb.cb_columns[1] = GET_COL_PROPERTY;
+	cb.cb_columns[2] = GET_COL_VALUE;
+	cb.cb_columns[3] = GET_COL_SOURCE;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
+		switch (c) {
+		case 'p':
+			cb.cb_literal = B_TRUE;
+			break;
+		case 'r':
+			recurse = B_TRUE;
+			break;
+		case 'H':
+			cb.cb_scripted = B_TRUE;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case 'o':
+			/*
+			 * Process the set of columns to display.  We zero out
+			 * the structure to give us a blank slate.
+			 */
+			bzero(&cb.cb_columns, sizeof (cb.cb_columns));
+			i = 0;
+			while (*optarg != '\0') {
+				static char *col_subopts[] =
+				    { "name", "property", "value", "source",
+				    NULL };
+
+				if (i == 4) {
+					(void) fprintf(stderr, gettext("too "
+					    "many fields given to -o "
+					    "option\n"));
+					usage(B_FALSE);
+				}
+
+				switch (getsubopt(&optarg, col_subopts,
+				    &value)) {
+				case 0:
+					cb.cb_columns[i++] = GET_COL_NAME;
+					break;
+				case 1:
+					cb.cb_columns[i++] = GET_COL_PROPERTY;
+					break;
+				case 2:
+					cb.cb_columns[i++] = GET_COL_VALUE;
+					break;
+				case 3:
+					cb.cb_columns[i++] = GET_COL_SOURCE;
+					break;
+				default:
+					(void) fprintf(stderr,
+					    gettext("invalid column name "
+					    "'%s'\n"), value);
+					usage(B_FALSE);
+				}
+			}
+			break;
+
+		case 's':
+			cb.cb_sources = 0;
+			while (*optarg != '\0') {
+				static char *source_subopts[] = {
+					"local", "default", "inherited",
+					"temporary", "none", NULL };
+
+				switch (getsubopt(&optarg, source_subopts,
+				    &value)) {
+				case 0:
+					cb.cb_sources |= ZFS_SRC_LOCAL;
+					break;
+				case 1:
+					cb.cb_sources |= ZFS_SRC_DEFAULT;
+					break;
+				case 2:
+					cb.cb_sources |= ZFS_SRC_INHERITED;
+					break;
+				case 3:
+					cb.cb_sources |= ZFS_SRC_TEMPORARY;
+					break;
+				case 4:
+					cb.cb_sources |= ZFS_SRC_NONE;
+					break;
+				default:
+					(void) fprintf(stderr,
+					    gettext("invalid source "
+					    "'%s'\n"), value);
+					usage(B_FALSE);
+				}
+			}
+			break;
+
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing property "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+
+	fields = argv[0];
+
+	if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
+		usage(B_FALSE);
+
+	argc--;
+	argv++;
+
+	/*
+	 * As part of zfs_expand_proplist(), we keep track of the maximum column
+	 * width for each property.  For the 'NAME' (and 'SOURCE') columns, we
+	 * need to know the maximum name length.  However, the user likely did
+	 * not specify 'name' as one of the properties to fetch, so we need to
+	 * make sure we always include at least this property for
+	 * print_get_headers() to work properly.
+	 */
+	if (cb.cb_proplist != NULL) {
+		fake_name.pl_prop = ZFS_PROP_NAME;
+		fake_name.pl_width = strlen(gettext("NAME"));
+		fake_name.pl_next = cb.cb_proplist;
+		cb.cb_proplist = &fake_name;
+	}
+
+	cb.cb_first = B_TRUE;
+
+	/* run for each object */
+	ret = zfs_for_each(argc, argv, recurse, ZFS_TYPE_ANY, NULL,
+	    &cb.cb_proplist, get_callback, &cb, B_FALSE);
+
+	if (cb.cb_proplist == &fake_name)
+		zfs_free_proplist(fake_name.pl_next);
+	else
+		zfs_free_proplist(cb.cb_proplist);
+
+	return (ret);
+}
+
+/*
+ * inherit [-r] <property> <fs|vol> ...
+ *
+ * 	-r	Recurse over all children
+ *
+ * For each dataset specified on the command line, inherit the given property
+ * from its parent.  Inheriting a property at the pool level will cause it to
+ * use the default value.  The '-r' flag will recurse over all children, and is
+ * useful for setting a property on a hierarchy-wide basis, regardless of any
+ * local modifications for each dataset.
+ */
+typedef struct inherit_cbdata {
+	char		*cb_propname;
+	boolean_t	cb_any_successful;
+} inherit_cbdata_t;
+
+static int
+inherit_callback(zfs_handle_t *zhp, void *data)
+{
+	inherit_cbdata_t *cbp = data;
+	int ret;
+
+	ret = zfs_prop_inherit(zhp, cbp->cb_propname);
+	if (ret == 0)
+		cbp->cb_any_successful = B_TRUE;
+	return (ret != 0);
+}
+
+static int
+zfs_do_inherit(int argc, char **argv)
+{
+	boolean_t recurse = B_FALSE;
+	int c;
+	zfs_prop_t prop;
+	inherit_cbdata_t cb;
+	int ret;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "r")) != -1) {
+		switch (c) {
+		case 'r':
+			recurse = B_TRUE;
+			break;
+		case '?':
+		default:
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing property argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing dataset argument\n"));
+		usage(B_FALSE);
+	}
+
+	cb.cb_propname = argv[0];
+	argc--;
+	argv++;
+
+	if ((prop = zfs_name_to_prop(cb.cb_propname)) != ZFS_PROP_INVAL) {
+		if (zfs_prop_readonly(prop)) {
+			(void) fprintf(stderr, gettext(
+			    "%s property is read-only\n"),
+			    cb.cb_propname);
+			return (1);
+		}
+		if (!zfs_prop_inheritable(prop)) {
+			(void) fprintf(stderr, gettext("'%s' property cannot "
+			    "be inherited\n"), cb.cb_propname);
+			if (prop == ZFS_PROP_QUOTA ||
+			    prop == ZFS_PROP_RESERVATION)
+				(void) fprintf(stderr, gettext("use 'zfs set "
+				    "%s=none' to clear\n"), cb.cb_propname);
+			return (1);
+		}
+	} else if (!zfs_prop_user(cb.cb_propname)) {
+		(void) fprintf(stderr, gettext(
+		    "invalid property '%s'\n"),
+		    cb.cb_propname);
+		usage(B_FALSE);
+	}
+
+	cb.cb_any_successful = B_FALSE;
+
+	ret = zfs_for_each(argc, argv, recurse,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL,
+	    inherit_callback, &cb, B_FALSE);
+
+	if (cb.cb_any_successful) {
+		zpool_log_history(g_zfs, argc + optind + 1, argv - optind - 1,
+		    argv[0], B_FALSE, B_FALSE);
+	}
+
+	return (ret);
+}
+
+/*
+ * list [-rH] [-o property[,property]...] [-t type[,type]...]
+ *      [-s property [-s property]...] [-S property [-S property]...]
+ *      <dataset> ...
+ *
+ * 	-r	Recurse over all children
+ * 	-H	Scripted mode; elide headers and separate colums by tabs
+ * 	-o	Control which fields to display.
+ * 	-t	Control which object types to display.
+ *	-s	Specify sort columns, descending order.
+ *	-S	Specify sort columns, ascending order.
+ *
+ * When given no arguments, lists all filesystems in the system.
+ * Otherwise, list the specified datasets, optionally recursing down them if
+ * '-r' is specified.
+ */
+typedef struct list_cbdata {
+	boolean_t	cb_first;
+	boolean_t	cb_scripted;
+	zfs_proplist_t	*cb_proplist;
+} list_cbdata_t;
+
+/*
+ * Given a list of columns to display, output appropriate headers for each one.
+ */
+static void
+print_header(zfs_proplist_t *pl)
+{
+	char headerbuf[ZFS_MAXPROPLEN];
+	const char *header;
+	int i;
+	boolean_t first = B_TRUE;
+	boolean_t right_justify;
+
+	for (; pl != NULL; pl = pl->pl_next) {
+		if (!first) {
+			(void) printf("  ");
+		} else {
+			first = B_FALSE;
+		}
+
+		right_justify = B_FALSE;
+		if (pl->pl_prop != ZFS_PROP_INVAL) {
+			header = zfs_prop_column_name(pl->pl_prop);
+			right_justify = zfs_prop_align_right(pl->pl_prop);
+		} else {
+			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
+				headerbuf[i] = toupper(pl->pl_user_prop[i]);
+			headerbuf[i] = '\0';
+			header = headerbuf;
+		}
+
+		if (pl->pl_next == NULL && !right_justify)
+			(void) printf("%s", header);
+		else if (right_justify)
+			(void) printf("%*s", pl->pl_width, header);
+		else
+			(void) printf("%-*s", pl->pl_width, header);
+	}
+
+	(void) printf("\n");
+}
+
+/*
+ * Given a dataset and a list of fields, print out all the properties according
+ * to the described layout.
+ */
+static void
+print_dataset(zfs_handle_t *zhp, zfs_proplist_t *pl, int scripted)
+{
+	boolean_t first = B_TRUE;
+	char property[ZFS_MAXPROPLEN];
+	nvlist_t *userprops = zfs_get_user_props(zhp);
+	nvlist_t *propval;
+	char *propstr;
+	boolean_t right_justify;
+	int width;
+
+	for (; pl != NULL; pl = pl->pl_next) {
+		if (!first) {
+			if (scripted)
+				(void) printf("\t");
+			else
+				(void) printf("  ");
+		} else {
+			first = B_FALSE;
+		}
+
+		right_justify = B_FALSE;
+		if (pl->pl_prop != ZFS_PROP_INVAL) {
+			if (zfs_prop_get(zhp, pl->pl_prop, property,
+			    sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
+				propstr = "-";
+			else
+				propstr = property;
+
+			right_justify = zfs_prop_align_right(pl->pl_prop);
+		} else {
+			if (nvlist_lookup_nvlist(userprops,
+			    pl->pl_user_prop, &propval) != 0)
+				propstr = "-";
+			else
+				verify(nvlist_lookup_string(propval,
+				    ZFS_PROP_VALUE, &propstr) == 0);
+		}
+
+		width = pl->pl_width;
+
+		/*
+		 * If this is being called in scripted mode, or if this is the
+		 * last column and it is left-justified, don't include a width
+		 * format specifier.
+		 */
+		if (scripted || (pl->pl_next == NULL && !right_justify))
+			(void) printf("%s", propstr);
+		else if (right_justify)
+			(void) printf("%*s", width, propstr);
+		else
+			(void) printf("%-*s", width, propstr);
+	}
+
+	(void) printf("\n");
+}
+
+/*
+ * Generic callback function to list a dataset or snapshot.
+ */
+static int
+list_callback(zfs_handle_t *zhp, void *data)
+{
+	list_cbdata_t *cbp = data;
+
+	if (cbp->cb_first) {
+		if (!cbp->cb_scripted)
+			print_header(cbp->cb_proplist);
+		cbp->cb_first = B_FALSE;
+	}
+
+	print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted);
+
+	return (0);
+}
+
+static int
+zfs_do_list(int argc, char **argv)
+{
+	int c;
+	boolean_t recurse = B_FALSE;
+	boolean_t scripted = B_FALSE;
+	static char default_fields[] =
+	    "name,used,available,referenced,mountpoint";
+	int types = ZFS_TYPE_ANY;
+	char *fields = NULL;
+	char *basic_fields = default_fields;
+	list_cbdata_t cb = { 0 };
+	char *value;
+	int ret;
+	char *type_subopts[] = { "filesystem", "volume", "snapshot", NULL };
+	zfs_sort_column_t *sortcol = NULL;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":o:rt:Hs:S:")) != -1) {
+		switch (c) {
+		case 'o':
+			fields = optarg;
+			break;
+		case 'r':
+			recurse = B_TRUE;
+			break;
+		case 'H':
+			scripted = B_TRUE;
+			break;
+		case 's':
+			if (zfs_add_sort_column(&sortcol, optarg,
+			    B_FALSE) != 0) {
+				(void) fprintf(stderr,
+				    gettext("invalid property '%s'\n"), optarg);
+				usage(B_FALSE);
+			}
+			break;
+		case 'S':
+			if (zfs_add_sort_column(&sortcol, optarg,
+			    B_TRUE) != 0) {
+				(void) fprintf(stderr,
+				    gettext("invalid property '%s'\n"), optarg);
+				usage(B_FALSE);
+			}
+			break;
+		case 't':
+			types = 0;
+			while (*optarg != '\0') {
+				switch (getsubopt(&optarg, type_subopts,
+				    &value)) {
+				case 0:
+					types |= ZFS_TYPE_FILESYSTEM;
+					break;
+				case 1:
+					types |= ZFS_TYPE_VOLUME;
+					break;
+				case 2:
+					types |= ZFS_TYPE_SNAPSHOT;
+					break;
+				default:
+					(void) fprintf(stderr,
+					    gettext("invalid type '%s'\n"),
+					    value);
+					usage(B_FALSE);
+				}
+			}
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (fields == NULL)
+		fields = basic_fields;
+
+	/*
+	 * If the user specifies '-o all', the zfs_get_proplist() doesn't
+	 * normally include the name of the dataset.  For 'zfs list', we always
+	 * want this property to be first.
+	 */
+	if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
+		usage(B_FALSE);
+
+	cb.cb_scripted = scripted;
+	cb.cb_first = B_TRUE;
+
+	ret = zfs_for_each(argc, argv, recurse, types, sortcol, &cb.cb_proplist,
+	    list_callback, &cb, B_TRUE);
+
+	zfs_free_proplist(cb.cb_proplist);
+	zfs_free_sort_columns(sortcol);
+
+	if (ret == 0 && cb.cb_first)
+		(void) printf(gettext("no datasets available\n"));
+
+	return (ret);
+}
+
+/*
+ * zfs rename [-r] <fs | snap | vol> <fs | snap | vol>
+ *
+ * Renames the given dataset to another of the same type.
+ */
+/* ARGSUSED */
+static int
+zfs_do_rename(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	int c;
+	int ret;
+	int recurse = 0;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "r")) != -1) {
+		switch (c) {
+		case 'r':
+			recurse = 1;
+			break;
+		case '?':
+		default:
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing source dataset "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing target dataset "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 2) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	if (recurse && strchr(argv[0], '@') == 0) {
+		(void) fprintf(stderr, gettext("source dataset for recursive "
+		    "rename must be a snapshot\n"));
+		usage(B_FALSE);
+	}
+
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+		return (1);
+
+	ret = (zfs_rename(zhp, argv[1], recurse) != 0);
+
+	if (!ret)
+		zpool_log_history(g_zfs, argc + optind, argv - optind, argv[1],
+		    B_FALSE, B_FALSE);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+/*
+ * zfs promote <fs>
+ *
+ * Promotes the given clone fs to be the parent
+ */
+/* ARGSUSED */
+static int
+zfs_do_promote(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	int ret;
+
+	/* check options */
+	if (argc > 1 && argv[1][0] == '-') {
+		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+		    argv[1][1]);
+		usage(B_FALSE);
+	}
+
+	/* check number of arguments */
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing clone filesystem"
+		    " argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 2) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
+		return (1);
+
+	ret = (zfs_promote(zhp) != 0);
+
+	if (!ret)
+		zpool_log_history(g_zfs, argc, argv, argv[1], B_FALSE, B_FALSE);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+/*
+ * zfs rollback [-rfR] <snapshot>
+ *
+ * 	-r	Delete any intervening snapshots before doing rollback
+ * 	-R	Delete any snapshots and their clones
+ * 	-f	Force unmount filesystems, even if they are in use.
+ *
+ * Given a filesystem, rollback to a specific snapshot, discarding any changes
+ * since then and making it the active dataset.  If more recent snapshots exist,
+ * the command will complain unless the '-r' flag is given.
+ */
+typedef struct rollback_cbdata {
+	uint64_t	cb_create;
+	boolean_t	cb_first;
+	int		cb_doclones;
+	char		*cb_target;
+	int		cb_error;
+	boolean_t	cb_recurse;
+	boolean_t	cb_dependent;
+} rollback_cbdata_t;
+
+/*
+ * Report any snapshots more recent than the one specified.  Used when '-r' is
+ * not specified.  We reuse this same callback for the snapshot dependents - if
+ * 'cb_dependent' is set, then this is a dependent and we should report it
+ * without checking the transaction group.
+ */
+static int
+rollback_check(zfs_handle_t *zhp, void *data)
+{
+	rollback_cbdata_t *cbp = data;
+
+	if (cbp->cb_doclones) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (!cbp->cb_dependent) {
+		if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 &&
+		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
+		    cbp->cb_create) {
+
+			if (cbp->cb_first && !cbp->cb_recurse) {
+				(void) fprintf(stderr, gettext("cannot "
+				    "rollback to '%s': more recent snapshots "
+				    "exist\n"),
+				    cbp->cb_target);
+				(void) fprintf(stderr, gettext("use '-r' to "
+				    "force deletion of the following "
+				    "snapshots:\n"));
+				cbp->cb_first = 0;
+				cbp->cb_error = 1;
+			}
+
+			if (cbp->cb_recurse) {
+				cbp->cb_dependent = B_TRUE;
+				if (zfs_iter_dependents(zhp, B_TRUE,
+				    rollback_check, cbp) != 0) {
+					zfs_close(zhp);
+					return (-1);
+				}
+				cbp->cb_dependent = B_FALSE;
+			} else {
+				(void) fprintf(stderr, "%s\n",
+				    zfs_get_name(zhp));
+			}
+		}
+	} else {
+		if (cbp->cb_first && cbp->cb_recurse) {
+			(void) fprintf(stderr, gettext("cannot rollback to "
+			    "'%s': clones of previous snapshots exist\n"),
+			    cbp->cb_target);
+			(void) fprintf(stderr, gettext("use '-R' to "
+			    "force deletion of the following clones and "
+			    "dependents:\n"));
+			cbp->cb_first = 0;
+			cbp->cb_error = 1;
+		}
+
+		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+static int
+zfs_do_rollback(int argc, char **argv)
+{
+	int ret;
+	int c;
+	rollback_cbdata_t cb = { 0 };
+	zfs_handle_t *zhp, *snap;
+	char parentname[ZFS_MAXNAMELEN];
+	char *delim;
+	int force = 0;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "rfR")) != -1) {
+		switch (c) {
+		case 'f':
+			force = 1;
+			break;
+		case 'r':
+			cb.cb_recurse = 1;
+			break;
+		case 'R':
+			cb.cb_recurse = 1;
+			cb.cb_doclones = 1;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing dataset argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	/* open the snapshot */
+	if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+		return (1);
+
+	/* open the parent dataset */
+	(void) strlcpy(parentname, argv[0], sizeof (parentname));
+	verify((delim = strrchr(parentname, '@')) != NULL);
+	*delim = '\0';
+	if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) {
+		zfs_close(snap);
+		return (1);
+	}
+
+	/*
+	 * Check for more recent snapshots and/or clones based on the presence
+	 * of '-r' and '-R'.
+	 */
+	cb.cb_target = argv[0];
+	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
+	cb.cb_first = B_TRUE;
+	cb.cb_error = 0;
+	if ((ret = zfs_iter_children(zhp, rollback_check, &cb)) != 0)
+		goto out;
+
+	if ((ret = cb.cb_error) != 0)
+		goto out;
+
+	/*
+	 * Rollback parent to the given snapshot.
+	 */
+	ret = zfs_rollback(zhp, snap, force);
+
+	if (!ret) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
+		    B_FALSE, B_FALSE);
+	}
+
+out:
+	zfs_close(snap);
+	zfs_close(zhp);
+
+	if (ret == 0)
+		return (0);
+	else
+		return (1);
+}
+
+/*
+ * zfs set property=value { fs | snap | vol } ...
+ *
+ * Sets the given property for all datasets specified on the command line.
+ */
+typedef struct set_cbdata {
+	char		*cb_propname;
+	char		*cb_value;
+	boolean_t	cb_any_successful;
+} set_cbdata_t;
+
+static int
+set_callback(zfs_handle_t *zhp, void *data)
+{
+	set_cbdata_t *cbp = data;
+
+	if (zfs_prop_set(zhp, cbp->cb_propname, cbp->cb_value) != 0) {
+		switch (libzfs_errno(g_zfs)) {
+		case EZFS_MOUNTFAILED:
+			(void) fprintf(stderr, gettext("property may be set "
+			    "but unable to remount filesystem\n"));
+			break;
+		case EZFS_SHARENFSFAILED:
+			(void) fprintf(stderr, gettext("property may be set "
+			    "but unable to reshare filesystem\n"));
+			break;
+		}
+		return (1);
+	}
+	cbp->cb_any_successful = B_TRUE;
+	return (0);
+}
+
+static int
+zfs_do_set(int argc, char **argv)
+{
+	set_cbdata_t cb;
+	int ret;
+
+	/* check for options */
+	if (argc > 1 && argv[1][0] == '-') {
+		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+		    argv[1][1]);
+		usage(B_FALSE);
+	}
+
+	/* check number of arguments */
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing property=value "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing dataset name\n"));
+		usage(B_FALSE);
+	}
+
+	/* validate property=value argument */
+	cb.cb_propname = argv[1];
+	if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) {
+		(void) fprintf(stderr, gettext("missing value in "
+		    "property=value argument\n"));
+		usage(B_FALSE);
+	}
+
+	*cb.cb_value = '\0';
+	cb.cb_value++;
+	cb.cb_any_successful = B_FALSE;
+
+	if (*cb.cb_propname == '\0') {
+		(void) fprintf(stderr,
+		    gettext("missing property in property=value argument\n"));
+		usage(B_FALSE);
+	}
+
+	ret = zfs_for_each(argc - 2, argv + 2, B_FALSE,
+	    ZFS_TYPE_ANY, NULL, NULL, set_callback, &cb, B_FALSE);
+
+	if (cb.cb_any_successful) {
+		*(cb.cb_value - 1) = '=';
+		zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
+	}
+
+	return (ret);
+}
+
+/*
+ * zfs snapshot [-r] <fs at snap>
+ *
+ * Creates a snapshot with the given name.  While functionally equivalent to
+ * 'zfs create', it is a separate command to diffferentiate intent.
+ */
+static int
+zfs_do_snapshot(int argc, char **argv)
+{
+	int recursive = B_FALSE;
+	int ret;
+	char c;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":r")) != -1) {
+		switch (c) {
+		case 'r':
+			recursive = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	ret = zfs_snapshot(g_zfs, argv[0], recursive);
+	if (ret && recursive)
+		(void) fprintf(stderr, gettext("no snapshots were created\n"));
+	if (!ret) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
+		    B_FALSE, B_FALSE);
+	}
+	return (ret != 0);
+}
+
+/*
+ * zfs send [-i <@snap>] <fs at snap>
+ *
+ * Send a backup stream to stdout.
+ */
+static int
+zfs_do_send(int argc, char **argv)
+{
+	char *fromname = NULL;
+	char *cp;
+	zfs_handle_t *zhp;
+	int c, err;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":i:")) != -1) {
+		switch (c) {
+		case 'i':
+			if (fromname)
+				usage(B_FALSE);
+			fromname = optarg;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	if (isatty(STDOUT_FILENO)) {
+		(void) fprintf(stderr,
+		    gettext("Error: Stream can not be written to a terminal.\n"
+		    "You must redirect standard output.\n"));
+		return (1);
+	}
+
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+		return (1);
+
+	/*
+	 * If they specified the full path to the snapshot, chop off
+	 * everything except the short name of the snapshot.
+	 */
+	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
+		if (cp != fromname &&
+		    strncmp(argv[0], fromname, cp - fromname + 1)) {
+			(void) fprintf(stderr,
+			    gettext("incremental source must be "
+			    "in same filesystem\n"));
+			usage(B_FALSE);
+		}
+		fromname = cp + 1;
+		if (strchr(fromname, '@') || strchr(fromname, '/')) {
+			(void) fprintf(stderr,
+			    gettext("invalid incremental source\n"));
+			usage(B_FALSE);
+		}
+	}
+
+	err = zfs_send(zhp, fromname, STDOUT_FILENO);
+	zfs_close(zhp);
+
+	return (err != 0);
+}
+
+/*
+ * zfs receive <fs at snap>
+ *
+ * Restore a backup stream from stdin.
+ */
+static int
+zfs_do_receive(int argc, char **argv)
+{
+	int c, err;
+	boolean_t isprefix = B_FALSE;
+	boolean_t dryrun = B_FALSE;
+	boolean_t verbose = B_FALSE;
+	boolean_t force = B_FALSE;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":dnvF")) != -1) {
+		switch (c) {
+		case 'd':
+			isprefix = B_TRUE;
+			break;
+		case 'n':
+			dryrun = B_TRUE;
+			break;
+		case 'v':
+			verbose = B_TRUE;
+			break;
+		case 'F':
+			force = B_TRUE;
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	if (isatty(STDIN_FILENO)) {
+		(void) fprintf(stderr,
+		    gettext("Error: Backup stream can not be read "
+		    "from a terminal.\n"
+		    "You must redirect standard input.\n"));
+		return (1);
+	}
+
+	err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun, force,
+	    STDIN_FILENO);
+
+	if (!err) {
+		zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
+		    B_FALSE, B_FALSE);
+	}
+
+	return (err != 0);
+}
+
+typedef struct get_all_cbdata {
+	zfs_handle_t	**cb_handles;
+	size_t		cb_alloc;
+	size_t		cb_used;
+	uint_t		cb_types;
+} get_all_cbdata_t;
+
+static int
+get_one_dataset(zfs_handle_t *zhp, void *data)
+{
+	get_all_cbdata_t *cbp = data;
+	zfs_type_t type = zfs_get_type(zhp);
+
+	/*
+	 * Interate over any nested datasets.
+	 */
+	if (type == ZFS_TYPE_FILESYSTEM &&
+	    zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
+		zfs_close(zhp);
+		return (1);
+	}
+
+	/*
+	 * Skip any datasets whose type does not match.
+	 */
+	if ((type & cbp->cb_types) == 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (cbp->cb_alloc == cbp->cb_used) {
+		zfs_handle_t **handles;
+
+		if (cbp->cb_alloc == 0)
+			cbp->cb_alloc = 64;
+		else
+			cbp->cb_alloc *= 2;
+
+		handles = safe_malloc(cbp->cb_alloc * sizeof (void *));
+
+		if (cbp->cb_handles) {
+			bcopy(cbp->cb_handles, handles,
+			    cbp->cb_used * sizeof (void *));
+			free(cbp->cb_handles);
+		}
+
+		cbp->cb_handles = handles;
+	}
+
+	cbp->cb_handles[cbp->cb_used++] = zhp;
+
+	return (0);
+}
+
+static void
+get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count)
+{
+	get_all_cbdata_t cb = { 0 };
+	cb.cb_types = types;
+
+	(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
+
+	*dslist = cb.cb_handles;
+	*count = cb.cb_used;
+}
+
+static int
+dataset_cmp(const void *a, const void *b)
+{
+	zfs_handle_t **za = (zfs_handle_t **)a;
+	zfs_handle_t **zb = (zfs_handle_t **)b;
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+	boolean_t gota, gotb;
+
+	if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (gota && gotb)
+		return (strcmp(mounta, mountb));
+
+	if (gota)
+		return (-1);
+	if (gotb)
+		return (1);
+
+	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+}
+
+/*
+ * Generic callback for sharing or mounting filesystems.  Because the code is so
+ * similar, we have a common function with an extra parameter to determine which
+ * mode we are using.
+ */
+#define	OP_SHARE	0x1
+#define	OP_MOUNT	0x2
+
+/*
+ * Share or mount a dataset.
+ */
+static int
+share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
+    const char *options)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	const char *cmdname = op == OP_SHARE ? "share" : "mount";
+	struct mnttab mnt;
+	uint64_t zoned, canmount;
+	zfs_type_t type = zfs_get_type(zhp);
+
+	assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME));
+
+	if (type == ZFS_TYPE_FILESYSTEM) {
+		/*
+		 * Check to make sure we can mount/share this dataset.  If we
+		 * are in the global zone and the filesystem is exported to a
+		 * local zone, or if we are in a local zone and the
+		 * filesystem is not exported, then it is an error.
+		 */
+		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+
+		if (zoned && getzoneid() == GLOBAL_ZONEID) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot %s '%s': "
+			    "dataset is exported to a local zone\n"), cmdname,
+			    zfs_get_name(zhp));
+			return (1);
+
+		} else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot %s '%s': "
+			    "permission denied\n"), cmdname,
+			    zfs_get_name(zhp));
+			return (1);
+		}
+
+		/*
+		 * Ignore any filesystems which don't apply to us. This
+		 * includes those with a legacy mountpoint, or those with
+		 * legacy share options.
+		 */
+		verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
+		    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
+		verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
+		    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+		canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
+
+		if (op == OP_SHARE && strcmp(shareopts, "off") == 0) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot share '%s': "
+			    "legacy share\n"), zfs_get_name(zhp));
+			(void) fprintf(stderr, gettext("use share(1M) to "
+			    "share this filesystem\n"));
+			return (1);
+		}
+
+		/*
+		 * We cannot share or mount legacy filesystems. If the
+		 * shareopts is non-legacy but the mountpoint is legacy, we
+		 * treat it as a legacy share.
+		 */
+		if (strcmp(mountpoint, "legacy") == 0) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot %s '%s': "
+			    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
+			(void) fprintf(stderr, gettext("use %s to "
+			    "%s this filesystem\n"), op == OP_SHARE ?
+			    "share(1M)" : "mount(1M)", cmdname);
+			return (1);
+		}
+
+		if (strcmp(mountpoint, "none") == 0) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot %s '%s': no "
+			    "mountpoint set\n"), cmdname, zfs_get_name(zhp));
+			return (1);
+		}
+
+		if (!canmount) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot %s '%s': "
+			    "'canmount' property is set to 'off'\n"), cmdname,
+			    zfs_get_name(zhp));
+			return (1);
+		}
+
+		/*
+		 * At this point, we have verified that the mountpoint and/or
+		 * shareopts are appropriate for auto management. If the
+		 * filesystem is already mounted or shared, return (failing
+		 * for explicit requests); otherwise mount or share the
+		 * filesystem.
+		 */
+		switch (op) {
+		case OP_SHARE:
+			if (zfs_is_shared_nfs(zhp, NULL)) {
+				if (!explicit)
+					return (0);
+
+				(void) fprintf(stderr, gettext("cannot share "
+				    "'%s': filesystem already shared\n"),
+				    zfs_get_name(zhp));
+				return (1);
+			}
+
+			if (!zfs_is_mounted(zhp, NULL) &&
+			    zfs_mount(zhp, NULL, 0) != 0)
+				return (1);
+
+			if (zfs_share_nfs(zhp) != 0)
+				return (1);
+			break;
+
+		case OP_MOUNT:
+			if (options == NULL)
+				mnt.mnt_mntopts = "";
+			else
+				mnt.mnt_mntopts = (char *)options;
+
+			if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
+			    zfs_is_mounted(zhp, NULL)) {
+				if (!explicit)
+					return (0);
+
+				(void) fprintf(stderr, gettext("cannot mount "
+				    "'%s': filesystem already mounted\n"),
+				    zfs_get_name(zhp));
+				return (1);
+			}
+
+			if (zfs_mount(zhp, options, flags) != 0)
+				return (1);
+			break;
+		}
+	} else {
+		assert(op == OP_SHARE);
+
+		/*
+		 * Ignore any volumes that aren't shared.
+		 */
+		verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
+		    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+
+		if (strcmp(shareopts, "off") == 0) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot share '%s': "
+			    "'shareiscsi' property not set\n"),
+			    zfs_get_name(zhp));
+			(void) fprintf(stderr, gettext("set 'shareiscsi' "
+			    "property or use iscsitadm(1M) to share this "
+			    "volume\n"));
+			return (1);
+		}
+
+		if (zfs_is_shared_iscsi(zhp)) {
+			if (!explicit)
+				return (0);
+
+			(void) fprintf(stderr, gettext("cannot share "
+			    "'%s': volume already shared\n"),
+			    zfs_get_name(zhp));
+			return (1);
+		}
+
+		if (zfs_share_iscsi(zhp) != 0)
+			return (1);
+	}
+
+	return (0);
+}
+
+static int
+share_mount(int op, int argc, char **argv)
+{
+	int do_all = 0;
+	int c, ret = 0;
+	const char *options = NULL;
+	int types, flags = 0;
+
+	/* check options */
+	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ao:O" : "a"))
+	    != -1) {
+		switch (c) {
+		case 'a':
+			do_all = 1;
+			break;
+		case 'o':
+			options = optarg;
+			break;
+		case 'O':
+			warnx("no overlay mounts support on FreeBSD, ignoring");
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (do_all) {
+		zfs_handle_t **dslist = NULL;
+		size_t i, count = 0;
+
+		if (op == OP_MOUNT) {
+			types = ZFS_TYPE_FILESYSTEM;
+		} else if (argc > 0) {
+			if (strcmp(argv[0], "nfs") == 0) {
+				types = ZFS_TYPE_FILESYSTEM;
+			} else if (strcmp(argv[0], "iscsi") == 0) {
+				types = ZFS_TYPE_VOLUME;
+			} else {
+				(void) fprintf(stderr, gettext("share type "
+				    "must be 'nfs' or 'iscsi'\n"));
+				usage(B_FALSE);
+			}
+
+			argc--;
+			argv++;
+		} else {
+			types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
+		}
+
+		if (argc != 0) {
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+
+		get_all_datasets(types, &dslist, &count);
+
+		if (count == 0)
+			return (0);
+
+		qsort(dslist, count, sizeof (void *), dataset_cmp);
+
+		for (i = 0; i < count; i++) {
+			if (share_mount_one(dslist[i], op, flags, B_FALSE,
+			    options) != 0)
+				ret = 1;
+			zfs_close(dslist[i]);
+		}
+
+		free(dslist);
+	} else if (argc == 0) {
+		struct statfs *sfs;
+		int i, n;
+
+		if (op == OP_SHARE) {
+			(void) fprintf(stderr, gettext("missing filesystem "
+			    "argument\n"));
+			usage(B_FALSE);
+		}
+
+		/*
+		 * When mount is given no arguments, go through /etc/mnttab and
+		 * display any active ZFS mounts.  We hide any snapshots, since
+		 * they are controlled automatically.
+		 */
+		if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
+			fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
+			return (0);
+		}
+		for (i = 0; i < n; i++) {
+			if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0 ||
+			    strchr(sfs[i].f_mntfromname, '@') != NULL)
+				continue;
+
+			(void) printf("%-30s  %s\n", sfs[i].f_mntfromname,
+			    sfs[i].f_mntonname);
+		}
+
+	} else {
+		zfs_handle_t *zhp;
+
+		types = ZFS_TYPE_FILESYSTEM;
+		if (op == OP_SHARE)
+			types |= ZFS_TYPE_VOLUME;
+
+		if (argc > 1) {
+			(void) fprintf(stderr,
+			    gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+
+		if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) {
+			ret = 1;
+		} else {
+			ret = share_mount_one(zhp, op, flags, B_TRUE,
+			    options);
+			zfs_close(zhp);
+		}
+	}
+
+	return (ret);
+}
+
+/*
+ * zfs mount -a [nfs | iscsi]
+ * zfs mount filesystem
+ *
+ * Mount all filesystems, or mount the given filesystem.
+ */
+static int
+zfs_do_mount(int argc, char **argv)
+{
+	return (share_mount(OP_MOUNT, argc, argv));
+}
+
+/*
+ * zfs share -a [nfs | iscsi]
+ * zfs share filesystem
+ *
+ * Share all filesystems, or share the given filesystem.
+ */
+static int
+zfs_do_share(int argc, char **argv)
+{
+	return (share_mount(OP_SHARE, argc, argv));
+}
+
+typedef struct unshare_unmount_node {
+	zfs_handle_t	*un_zhp;
+	char		*un_mountp;
+	uu_avl_node_t	un_avlnode;
+} unshare_unmount_node_t;
+
+/* ARGSUSED */
+static int
+unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
+{
+	const unshare_unmount_node_t *l = larg;
+	const unshare_unmount_node_t *r = rarg;
+
+	return (strcmp(l->un_mountp, r->un_mountp));
+}
+
+/*
+ * Convenience routine used by zfs_do_umount() and manual_unmount().  Given an
+ * absolute path, find the entry /etc/mnttab, verify that its a ZFS filesystem,
+ * and unmount it appropriately.
+ */
+static int
+unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
+{
+	zfs_handle_t *zhp;
+	int ret;
+	struct mnttab search = { 0 }, entry;
+	const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
+	char property[ZFS_MAXPROPLEN];
+
+	/*
+	 * Search for the given (major,minor) pair in the mount table.
+	 */
+	search.mnt_mountp = path;
+	rewind(mnttab_file);
+	if (getmntany(mnttab_file, &entry, &search) != 0) {
+		(void) fprintf(stderr, gettext("cannot %s '%s': not "
+		    "currently mounted\n"), cmdname, path);
+		return (1);
+	}
+
+	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
+		(void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
+		    "filesystem\n"), cmdname, path);
+		return (1);
+	}
+
+	if ((zhp = zfs_open(g_zfs, entry.mnt_special,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
+		return (1);
+
+	verify(zfs_prop_get(zhp, op == OP_SHARE ?
+	    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
+	    sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (op == OP_SHARE) {
+		if (strcmp(property, "off") == 0) {
+			(void) fprintf(stderr, gettext("cannot unshare "
+			    "'%s': legacy share\n"), path);
+			(void) fprintf(stderr, gettext("use "
+			    "unshare(1M) to unshare this filesystem\n"));
+			ret = 1;
+		} else if (!zfs_is_shared_nfs(zhp, NULL)) {
+			(void) fprintf(stderr, gettext("cannot unshare '%s': "
+			    "not currently shared\n"), path);
+			ret = 1;
+		} else {
+			ret = zfs_unshareall_nfs(zhp);
+		}
+	} else {
+		if (is_manual) {
+			ret = zfs_unmount(zhp, NULL, flags);
+		} else if (strcmp(property, "legacy") == 0) {
+			(void) fprintf(stderr, gettext("cannot unmount "
+			    "'%s': legacy mountpoint\n"),
+			    zfs_get_name(zhp));
+			(void) fprintf(stderr, gettext("use umount(1M) "
+			    "to unmount this filesystem\n"));
+			ret = 1;
+		} else {
+			ret = zfs_unmountall(zhp, flags);
+		}
+	}
+
+	zfs_close(zhp);
+
+	return (ret != 0);
+}
+
+/*
+ * Generic callback for unsharing or unmounting a filesystem.
+ */
+static int
+unshare_unmount(int op, int argc, char **argv)
+{
+	int do_all = 0;
+	int flags = 0;
+	int ret = 0;
+	int types, c;
+	zfs_handle_t *zhp;
+	char property[ZFS_MAXPROPLEN];
+
+	/* check options */
+	while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) {
+		switch (c) {
+		case 'a':
+			do_all = 1;
+			break;
+		case 'f':
+			flags = MS_FORCE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (do_all) {
+		/*
+		 * We could make use of zfs_for_each() to walk all datasets in
+		 * the system, but this would be very inefficient, especially
+		 * since we would have to linearly search /etc/mnttab for each
+		 * one.  Instead, do one pass through /etc/mnttab looking for
+		 * zfs entries and call zfs_unmount() for each one.
+		 *
+		 * Things get a little tricky if the administrator has created
+		 * mountpoints beneath other ZFS filesystems.  In this case, we
+		 * have to unmount the deepest filesystems first.  To accomplish
+		 * this, we place all the mountpoints in an AVL tree sorted by
+		 * the special type (dataset name), and walk the result in
+		 * reverse to make sure to get any snapshots first.
+		 */
+		uu_avl_pool_t *pool;
+		uu_avl_t *tree;
+		unshare_unmount_node_t *node;
+		uu_avl_index_t idx;
+		uu_avl_walk_t *walk;
+		struct statfs *sfs;
+		int i, n;
+
+		if (argc != 0) {
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+
+		if ((pool = uu_avl_pool_create("unmount_pool",
+		    sizeof (unshare_unmount_node_t),
+		    offsetof(unshare_unmount_node_t, un_avlnode),
+		    unshare_unmount_compare,
+		    UU_DEFAULT)) == NULL) {
+			(void) fprintf(stderr, gettext("internal error: "
+			    "out of memory\n"));
+			exit(1);
+		}
+
+		if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) {
+			(void) fprintf(stderr, gettext("internal error: "
+			    "out of memory\n"));
+			exit(1);
+		}
+
+		if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
+			(void) fprintf(stderr, gettext("internal error: "
+			    "getmntinfo() failed\n"));
+			exit(1);
+		}
+		for (i = 0; i < n; i++) {
+
+			/* ignore non-ZFS entries */
+			if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0)
+				continue;
+
+			/* ignore snapshots */
+			if (strchr(sfs[i].f_mntfromname, '@') != NULL)
+				continue;
+
+			if ((zhp = zfs_open(g_zfs, sfs[i].f_mntfromname,
+			    ZFS_TYPE_FILESYSTEM)) == NULL) {
+				ret = 1;
+				continue;
+			}
+
+			verify(zfs_prop_get(zhp, op == OP_SHARE ?
+			    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
+			    property, sizeof (property), NULL, NULL,
+			    0, B_FALSE) == 0);
+
+			/* Ignore legacy mounts and shares */
+			if ((op == OP_SHARE &&
+			    strcmp(property, "off") == 0) ||
+			    (op == OP_MOUNT &&
+			    strcmp(property, "legacy") == 0)) {
+				zfs_close(zhp);
+				continue;
+			}
+
+			node = safe_malloc(sizeof (unshare_unmount_node_t));
+			node->un_zhp = zhp;
+
+			if ((node->un_mountp = strdup(sfs[i].f_mntonname)) ==
+			    NULL) {
+				(void) fprintf(stderr, gettext("internal error:"
+				    " out of memory\n"));
+				exit(1);
+			}
+
+			uu_avl_node_init(node, &node->un_avlnode, pool);
+
+			if (uu_avl_find(tree, node, NULL, &idx) == NULL) {
+				uu_avl_insert(tree, node, idx);
+			} else {
+				zfs_close(node->un_zhp);
+				free(node->un_mountp);
+				free(node);
+			}
+		}
+
+		/*
+		 * Walk the AVL tree in reverse, unmounting each filesystem and
+		 * removing it from the AVL tree in the process.
+		 */
+		if ((walk = uu_avl_walk_start(tree,
+		    UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) {
+			(void) fprintf(stderr,
+			    gettext("internal error: out of memory"));
+			exit(1);
+		}
+
+		while ((node = uu_avl_walk_next(walk)) != NULL) {
+			uu_avl_remove(tree, node);
+
+			switch (op) {
+			case OP_SHARE:
+				if (zfs_unshare_nfs(node->un_zhp,
+				    node->un_mountp) != 0)
+					ret = 1;
+				break;
+
+			case OP_MOUNT:
+				if (zfs_unmount(node->un_zhp,
+				    node->un_mountp, flags) != 0)
+					ret = 1;
+				break;
+			}
+
+			zfs_close(node->un_zhp);
+			free(node->un_mountp);
+			free(node);
+		}
+
+		uu_avl_walk_end(walk);
+		uu_avl_destroy(tree);
+		uu_avl_pool_destroy(pool);
+
+		if (op == OP_SHARE) {
+			/*
+			 * Finally, unshare any volumes shared via iSCSI.
+			 */
+			zfs_handle_t **dslist = NULL;
+			size_t i, count = 0;
+
+			get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count);
+
+			if (count != 0) {
+				qsort(dslist, count, sizeof (void *),
+				    dataset_cmp);
+
+				for (i = 0; i < count; i++) {
+					if (zfs_unshare_iscsi(dslist[i]) != 0)
+						ret = 1;
+					zfs_close(dslist[i]);
+				}
+
+				free(dslist);
+			}
+		}
+	} else {
+		if (argc != 1) {
+			if (argc == 0)
+				(void) fprintf(stderr,
+				    gettext("missing filesystem argument\n"));
+			else
+				(void) fprintf(stderr,
+				    gettext("too many arguments\n"));
+			usage(B_FALSE);
+		}
+
+		/*
+		 * We have an argument, but it may be a full path or a ZFS
+		 * filesystem.  Pass full paths off to unmount_path() (shared by
+		 * manual_unmount), otherwise open the filesystem and pass to
+		 * zfs_unmount().
+		 */
+		if (argv[0][0] == '/')
+			return (unshare_unmount_path(op, argv[0],
+			    flags, B_FALSE));
+
+		types = ZFS_TYPE_FILESYSTEM;
+		if (op == OP_SHARE)
+			types |= ZFS_TYPE_VOLUME;
+
+		if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
+			return (1);
+
+		if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
+			verify(zfs_prop_get(zhp, op == OP_SHARE ?
+			    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
+			    sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+
+			switch (op) {
+			case OP_SHARE:
+				if (strcmp(property, "off") == 0) {
+					(void) fprintf(stderr, gettext("cannot "
+					    "unshare '%s': legacy share\n"),
+					    zfs_get_name(zhp));
+					(void) fprintf(stderr, gettext("use "
+					    "unshare(1M) to unshare this "
+					    "filesystem\n"));
+					ret = 1;
+				} else if (!zfs_is_shared_nfs(zhp, NULL)) {
+					(void) fprintf(stderr, gettext("cannot "
+					    "unshare '%s': not currently "
+					    "shared\n"), zfs_get_name(zhp));
+					ret = 1;
+				} else if (zfs_unshareall_nfs(zhp) != 0) {
+					ret = 1;
+				}
+				break;
+
+			case OP_MOUNT:
+				if (strcmp(property, "legacy") == 0) {
+					(void) fprintf(stderr, gettext("cannot "
+					    "unmount '%s': legacy "
+					    "mountpoint\n"), zfs_get_name(zhp));
+					(void) fprintf(stderr, gettext("use "
+					    "umount(1M) to unmount this "
+					    "filesystem\n"));
+					ret = 1;
+				} else if (!zfs_is_mounted(zhp, NULL)) {
+					(void) fprintf(stderr, gettext("cannot "
+					    "unmount '%s': not currently "
+					    "mounted\n"),
+					    zfs_get_name(zhp));
+					ret = 1;
+				} else if (zfs_unmountall(zhp, flags) != 0) {
+					ret = 1;
+				}
+				break;
+			}
+		} else {
+			assert(op == OP_SHARE);
+
+			verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, property,
+			    sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+
+			if (strcmp(property, "off") == 0) {
+				(void) fprintf(stderr, gettext("cannot unshare "
+				    "'%s': 'shareiscsi' property not set\n"),
+				    zfs_get_name(zhp));
+				(void) fprintf(stderr, gettext("set "
+				    "'shareiscsi' property or use "
+				    "iscsitadm(1M) to share this volume\n"));
+				ret = 1;
+			} else if (!zfs_is_shared_iscsi(zhp)) {
+				(void) fprintf(stderr, gettext("cannot "
+				    "unshare '%s': not currently shared\n"),
+				    zfs_get_name(zhp));
+				ret = 1;
+			} else if (zfs_unshare_iscsi(zhp) != 0) {
+				ret = 1;
+			}
+		}
+
+		zfs_close(zhp);
+	}
+
+	return (ret);
+}
+
+/*
+ * zfs unmount -a
+ * zfs unmount filesystem
+ *
+ * Unmount all filesystems, or a specific ZFS filesystem.
+ */
+static int
+zfs_do_unmount(int argc, char **argv)
+{
+	return (unshare_unmount(OP_MOUNT, argc, argv));
+}
+
+/*
+ * zfs unshare -a
+ * zfs unshare filesystem
+ *
+ * Unshare all filesystems, or a specific ZFS filesystem.
+ */
+static int
+zfs_do_unshare(int argc, char **argv)
+{
+	return (unshare_unmount(OP_SHARE, argc, argv));
+}
+
+/*
+ * Attach/detach the given dataset to/from the given jail
+ */
+/* ARGSUSED */
+static int
+do_jail(int argc, char **argv, int attach)
+{
+	zfs_handle_t *zhp;
+	int jailid, ret;
+
+	/* check number of arguments */
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing argument(s)\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	jailid = atoi(argv[1]);
+	if (jailid == 0) {
+		(void) fprintf(stderr, gettext("invalid jailid\n"));
+		usage(B_FALSE);
+	}
+
+	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		return (1);
+
+	ret = (zfs_jail(zhp, jailid, attach) != 0);
+
+	if (!ret)
+		zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+/*
+ * zfs jail jailid filesystem
+ *
+ * Attach the given dataset to the given jail
+ */
+/* ARGSUSED */
+static int
+zfs_do_jail(int argc, char **argv)
+{
+
+	return (do_jail(argc, argv, 1));
+}
+
+/*
+ * zfs unjail jailid filesystem
+ *
+ * Detach the given dataset from the given jail
+ */
+/* ARGSUSED */
+static int
+zfs_do_unjail(int argc, char **argv)
+{
+
+	return (do_jail(argc, argv, 0));
+}
+
+/*
+ * Called when invoked as /etc/fs/zfs/mount.  Do the mount if the mountpoint is
+ * 'legacy'.  Otherwise, complain that use should be using 'zfs mount'.
+ */
+static int
+manual_mount(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	char mountpoint[ZFS_MAXPROPLEN];
+	char mntopts[MNT_LINE_MAX] = { '\0' };
+	int ret;
+	int c;
+	int flags = 0;
+	char *dataset, *path;
+
+	/* check options */
+	while ((c = getopt(argc, argv, ":mo:O")) != -1) {
+		switch (c) {
+		case 'o':
+			(void) strlcpy(mntopts, optarg, sizeof (mntopts));
+			break;
+		case 'O':
+#if 0	/* FreeBSD: No support for MS_OVERLAY. */
+			flags |= MS_OVERLAY;
+#endif
+			break;
+		case 'm':
+#if 0	/* FreeBSD: No support for MS_NOMNTTAB. */
+			flags |= MS_NOMNTTAB;
+#endif
+			break;
+		case ':':
+			(void) fprintf(stderr, gettext("missing argument for "
+			    "'%c' option\n"), optopt);
+			usage(B_FALSE);
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			(void) fprintf(stderr, gettext("usage: mount [-o opts] "
+			    "<path>\n"));
+			return (2);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check that we only have two arguments */
+	if (argc != 2) {
+		if (argc == 0)
+			(void) fprintf(stderr, gettext("missing dataset "
+			    "argument\n"));
+		else if (argc == 1)
+			(void) fprintf(stderr,
+			    gettext("missing mountpoint argument\n"));
+		else
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+		(void) fprintf(stderr, "usage: mount <dataset> <mountpoint>\n");
+		return (2);
+	}
+
+	dataset = argv[0];
+	path = argv[1];
+
+	/* try to open the dataset */
+	if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
+		return (1);
+
+	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
+
+	/* check for legacy mountpoint and complain appropriately */
+	ret = 0;
+	if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) {
+		if (zmount(dataset, path, flags, MNTTYPE_ZFS,
+		    NULL, 0, mntopts, sizeof (mntopts)) != 0) {
+			(void) fprintf(stderr, gettext("mount failed: %s\n"),
+			    strerror(errno));
+			ret = 1;
+		}
+	} else {
+		(void) fprintf(stderr, gettext("filesystem '%s' cannot be "
+		    "mounted using 'mount -F zfs'\n"), dataset);
+		(void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' "
+		    "instead.\n"), path);
+		(void) fprintf(stderr, gettext("If you must use 'mount -F zfs' "
+		    "or /etc/vfstab, use 'zfs set mountpoint=legacy'.\n"));
+		(void) fprintf(stderr, gettext("See zfs(1M) for more "
+		    "information.\n"));
+		ret = 1;
+	}
+
+	return (ret);
+}
+
+/*
+ * Called when invoked as /etc/fs/zfs/umount.  Unlike a manual mount, we allow
+ * unmounts of non-legacy filesystems, as this is the dominant administrative
+ * interface.
+ */
+static int
+manual_unmount(int argc, char **argv)
+{
+	int flags = 0;
+	int c;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "f")) != -1) {
+		switch (c) {
+		case 'f':
+			flags = MS_FORCE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			(void) fprintf(stderr, gettext("usage: unmount [-f] "
+			    "<path>\n"));
+			return (2);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check arguments */
+	if (argc != 1) {
+		if (argc == 0)
+			(void) fprintf(stderr, gettext("missing path "
+			    "argument\n"));
+		else
+			(void) fprintf(stderr, gettext("too many arguments\n"));
+		(void) fprintf(stderr, gettext("usage: unmount [-f] <path>\n"));
+		return (2);
+	}
+
+	return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
+}
+
+static int
+volcheck(zpool_handle_t *zhp, void *data)
+{
+	boolean_t isinit = *((boolean_t *)data);
+
+	if (isinit)
+		return (zpool_create_zvol_links(zhp));
+	else
+		return (zpool_remove_zvol_links(zhp));
+}
+
+/*
+ * Iterate over all pools in the system and either create or destroy /dev/zvol
+ * links, depending on the value of 'isinit'.
+ */
+static int
+do_volcheck(boolean_t isinit)
+{
+	return (zpool_iter(g_zfs, volcheck, &isinit) ? 1 : 0);
+}
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	int i;
+	char *progname;
+	char *cmdname;
+
+	(void) setlocale(LC_ALL, "");
+	(void) textdomain(TEXT_DOMAIN);
+
+	opterr = 0;
+
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: failed to "
+		    "initialize ZFS library\n"));
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
+	if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: unable to "
+		    "open %s\n"), MNTTAB);
+		return (1);
+	}
+
+	/*
+	 * This command also doubles as the /etc/fs mount and unmount program.
+	 * Determine if we should take this behavior based on argv[0].
+	 */
+	progname = basename(argv[0]);
+	if (strcmp(progname, "mount") == 0) {
+		ret = manual_mount(argc, argv);
+	} else if (strcmp(progname, "umount") == 0) {
+		ret = manual_unmount(argc, argv);
+	} else {
+		/*
+		 * Make sure the user has specified some command.
+		 */
+		if (argc < 2) {
+			(void) fprintf(stderr, gettext("missing command\n"));
+			usage(B_FALSE);
+		}
+
+		cmdname = argv[1];
+
+		/*
+		 * The 'umount' command is an alias for 'unmount'
+		 */
+		if (strcmp(cmdname, "umount") == 0)
+			cmdname = "unmount";
+
+		/*
+		 * The 'recv' command is an alias for 'receive'
+		 */
+		if (strcmp(cmdname, "recv") == 0)
+			cmdname = "receive";
+
+		/*
+		 * Special case '-?'
+		 */
+		if (strcmp(cmdname, "-?") == 0)
+			usage(B_TRUE);
+
+		/*
+		 * 'volinit' and 'volfini' do not appear in the usage message,
+		 * so we have to special case them here.
+		 */
+		if (strcmp(cmdname, "volinit") == 0)
+			return (do_volcheck(B_TRUE));
+		else if (strcmp(cmdname, "volfini") == 0)
+			return (do_volcheck(B_FALSE));
+
+		/*
+		 * Run the appropriate command.
+		 */
+		for (i = 0; i < NCOMMAND; i++) {
+			if (command_table[i].name == NULL)
+				continue;
+
+			if (strcmp(cmdname, command_table[i].name) == 0) {
+				current_command = &command_table[i];
+				ret = command_table[i].func(argc - 1, argv + 1);
+				break;
+			}
+		}
+
+		if (i == NCOMMAND) {
+			(void) fprintf(stderr, gettext("unrecognized "
+			    "command '%s'\n"), cmdname);
+			usage(B_FALSE);
+		}
+	}
+
+	(void) fclose(mnttab_file);
+
+	libzfs_fini(g_zfs);
+
+	/*
+	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
+	 * for the purposes of running ::findleaks.
+	 */
+	if (getenv("ZFS_ABORT") != NULL) {
+		(void) printf("dumping core by request\n");
+		abort();
+	}
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	ZFS_ITER_H
+#define	ZFS_ITER_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct zfs_sort_column {
+	struct zfs_sort_column	*sc_next;
+	struct zfs_sort_column	*sc_last;
+	zfs_prop_t		sc_prop;
+	char			*sc_user_prop;
+	boolean_t		sc_reverse;
+} zfs_sort_column_t;
+
+int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_sort_column_t *,
+    zfs_proplist_t **, zfs_iter_f, void *, boolean_t);
+int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t);
+void zfs_free_sort_columns(zfs_sort_column_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* ZFS_ITER_H */
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
@@ -0,0 +1,405 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libintl.h>
+#include <libuutil.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include <libzfs.h>
+
+#include "zfs_util.h"
+#include "zfs_iter.h"
+
+/*
+ * This is a private interface used to gather up all the datasets specified on
+ * the command line so that we can iterate over them in order.
+ *
+ * First, we iterate over all filesystems, gathering them together into an
+ * AVL tree.  We report errors for any explicitly specified datasets
+ * that we couldn't open.
+ *
+ * When finished, we have an AVL tree of ZFS handles.  We go through and execute
+ * the provided callback for each one, passing whatever data the user supplied.
+ */
+
+typedef struct zfs_node {
+	zfs_handle_t	*zn_handle;
+	uu_avl_node_t	zn_avlnode;
+} zfs_node_t;
+
+typedef struct callback_data {
+	uu_avl_t	*cb_avl;
+	int		cb_recurse;
+	zfs_type_t	cb_types;
+	zfs_sort_column_t *cb_sortcol;
+	zfs_proplist_t	**cb_proplist;
+} callback_data_t;
+
+uu_avl_pool_t *avl_pool;
+
+/*
+ * Called for each dataset.  If the object the object is of an appropriate type,
+ * add it to the avl tree and recurse over any children as necessary.
+ */
+int
+zfs_callback(zfs_handle_t *zhp, void *data)
+{
+	callback_data_t *cb = data;
+	int dontclose = 0;
+
+	/*
+	 * If this object is of the appropriate type, add it to the AVL tree.
+	 */
+	if (zfs_get_type(zhp) & cb->cb_types) {
+		uu_avl_index_t idx;
+		zfs_node_t *node = safe_malloc(sizeof (zfs_node_t));
+
+		node->zn_handle = zhp;
+		uu_avl_node_init(node, &node->zn_avlnode, avl_pool);
+		if (uu_avl_find(cb->cb_avl, node, cb->cb_sortcol,
+		    &idx) == NULL) {
+			if (cb->cb_proplist &&
+			    zfs_expand_proplist(zhp, cb->cb_proplist) != 0) {
+				free(node);
+				return (-1);
+			}
+			uu_avl_insert(cb->cb_avl, node, idx);
+			dontclose = 1;
+		} else {
+			free(node);
+		}
+	}
+
+	/*
+	 * Recurse if necessary.
+	 */
+	if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM ||
+	    (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types &
+	    ZFS_TYPE_SNAPSHOT))))
+		(void) zfs_iter_children(zhp, zfs_callback, data);
+
+	if (!dontclose)
+		zfs_close(zhp);
+
+	return (0);
+}
+
+int
+zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
+    boolean_t reverse)
+{
+	zfs_sort_column_t *col;
+	zfs_prop_t prop;
+
+	if ((prop = zfs_name_to_prop(name)) == ZFS_PROP_INVAL &&
+	    !zfs_prop_user(name))
+		return (-1);
+
+	col = safe_malloc(sizeof (zfs_sort_column_t));
+
+	col->sc_prop = prop;
+	col->sc_reverse = reverse;
+	if (prop == ZFS_PROP_INVAL) {
+		col->sc_user_prop = safe_malloc(strlen(name) + 1);
+		(void) strcpy(col->sc_user_prop, name);
+	}
+
+	if (*sc == NULL) {
+		col->sc_last = col;
+		*sc = col;
+	} else {
+		(*sc)->sc_last->sc_next = col;
+		(*sc)->sc_last = col;
+	}
+
+	return (0);
+}
+
+void
+zfs_free_sort_columns(zfs_sort_column_t *sc)
+{
+	zfs_sort_column_t *col;
+
+	while (sc != NULL) {
+		col = sc->sc_next;
+		free(sc->sc_user_prop);
+		free(sc);
+		sc = col;
+	}
+}
+
+/* ARGSUSED */
+static int
+zfs_compare(const void *larg, const void *rarg, void *unused)
+{
+	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
+	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
+	const char *lname = zfs_get_name(l);
+	const char *rname = zfs_get_name(r);
+	char *lat, *rat;
+	uint64_t lcreate, rcreate;
+	int ret;
+
+	lat = (char *)strchr(lname, '@');
+	rat = (char *)strchr(rname, '@');
+
+	if (lat != NULL)
+		*lat = '\0';
+	if (rat != NULL)
+		*rat = '\0';
+
+	ret = strcmp(lname, rname);
+	if (ret == 0) {
+		/*
+		 * If we're comparing a dataset to one of its snapshots, we
+		 * always make the full dataset first.
+		 */
+		if (lat == NULL) {
+			ret = -1;
+		} else if (rat == NULL) {
+			ret = 1;
+		} else {
+			/*
+			 * If we have two snapshots from the same dataset, then
+			 * we want to sort them according to creation time.  We
+			 * use the hidden CREATETXG property to get an absolute
+			 * ordering of snapshots.
+			 */
+			lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
+			rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
+
+			if (lcreate < rcreate)
+				ret = -1;
+			else if (lcreate > rcreate)
+				ret = 1;
+		}
+	}
+
+	if (lat != NULL)
+		*lat = '@';
+	if (rat != NULL)
+		*rat = '@';
+
+	return (ret);
+}
+
+/*
+ * Sort datasets by specified columns.
+ *
+ * o  Numeric types sort in ascending order.
+ * o  String types sort in alphabetical order.
+ * o  Types inappropriate for a row sort that row to the literal
+ *    bottom, regardless of the specified ordering.
+ *
+ * If no sort columns are specified, or two datasets compare equally
+ * across all specified columns, they are sorted alphabetically by name
+ * with snapshots grouped under their parents.
+ */
+static int
+zfs_sort(const void *larg, const void *rarg, void *data)
+{
+	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
+	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
+	zfs_sort_column_t *sc = (zfs_sort_column_t *)data;
+	zfs_sort_column_t *psc;
+
+	for (psc = sc; psc != NULL; psc = psc->sc_next) {
+		char lbuf[ZFS_MAXPROPLEN], rbuf[ZFS_MAXPROPLEN];
+		char *lstr, *rstr;
+		uint64_t lnum, rnum;
+		boolean_t lvalid, rvalid;
+		int ret = 0;
+
+		/*
+		 * We group the checks below the generic code.  If 'lstr' and
+		 * 'rstr' are non-NULL, then we do a string based comparison.
+		 * Otherwise, we compare 'lnum' and 'rnum'.
+		 */
+		lstr = rstr = NULL;
+		if (psc->sc_prop == ZFS_PROP_INVAL) {
+			nvlist_t *luser, *ruser;
+			nvlist_t *lval, *rval;
+
+			luser = zfs_get_user_props(l);
+			ruser = zfs_get_user_props(r);
+
+			lvalid = (nvlist_lookup_nvlist(luser,
+			    psc->sc_user_prop, &lval) == 0);
+			rvalid = (nvlist_lookup_nvlist(ruser,
+			    psc->sc_user_prop, &rval) == 0);
+
+			if (lvalid)
+				verify(nvlist_lookup_string(lval,
+				    ZFS_PROP_VALUE, &lstr) == 0);
+			if (rvalid)
+				verify(nvlist_lookup_string(rval,
+				    ZFS_PROP_VALUE, &rstr) == 0);
+
+		} else if (zfs_prop_is_string(psc->sc_prop)) {
+			lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf,
+			    sizeof (lbuf), NULL, NULL, 0, B_TRUE) == 0);
+			rvalid = (zfs_prop_get(r, psc->sc_prop, rbuf,
+			    sizeof (rbuf), NULL, NULL, 0, B_TRUE) == 0);
+
+			lstr = lbuf;
+			rstr = rbuf;
+		} else {
+			lvalid = zfs_prop_valid_for_type(psc->sc_prop,
+			    zfs_get_type(l));
+			rvalid = zfs_prop_valid_for_type(psc->sc_prop,
+			    zfs_get_type(r));
+
+			if (lvalid)
+				(void) zfs_prop_get_numeric(l, psc->sc_prop,
+				    &lnum, NULL, NULL, 0);
+			if (rvalid)
+				(void) zfs_prop_get_numeric(r, psc->sc_prop,
+				    &rnum, NULL, NULL, 0);
+		}
+
+		if (!lvalid && !rvalid)
+			continue;
+		else if (!lvalid)
+			return (1);
+		else if (!rvalid)
+			return (-1);
+
+		if (lstr)
+			ret = strcmp(lstr, rstr);
+		if (lnum < rnum)
+			ret = -1;
+		else if (lnum > rnum)
+			ret = 1;
+
+		if (ret != 0) {
+			if (psc->sc_reverse == B_TRUE)
+				ret = (ret < 0) ? 1 : -1;
+			return (ret);
+		}
+	}
+
+	return (zfs_compare(larg, rarg, NULL));
+}
+
+int
+zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
+    zfs_sort_column_t *sortcol, zfs_proplist_t **proplist, zfs_iter_f callback,
+    void *data, boolean_t args_can_be_paths)
+{
+	callback_data_t cb;
+	int ret = 0;
+	zfs_node_t *node;
+	uu_avl_walk_t *walk;
+
+	avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
+	    offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
+
+	if (avl_pool == NULL) {
+		(void) fprintf(stderr,
+		    gettext("internal error: out of memory\n"));
+		exit(1);
+	}
+
+	cb.cb_sortcol = sortcol;
+	cb.cb_recurse = recurse;
+	cb.cb_proplist = proplist;
+	cb.cb_types = types;
+	if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
+		(void) fprintf(stderr,
+		    gettext("internal error: out of memory\n"));
+		exit(1);
+	}
+
+	if (argc == 0) {
+		/*
+		 * If given no arguments, iterate over all datasets.
+		 */
+		cb.cb_recurse = 1;
+		ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
+	} else {
+		int i;
+		zfs_handle_t *zhp;
+		zfs_type_t argtype;
+
+		/*
+		 * If we're recursive, then we always allow filesystems as
+		 * arguments.  If we also are interested in snapshots, then we
+		 * can take volumes as well.
+		 */
+		argtype = types;
+		if (recurse) {
+			argtype |= ZFS_TYPE_FILESYSTEM;
+			if (types & ZFS_TYPE_SNAPSHOT)
+				argtype |= ZFS_TYPE_VOLUME;
+		}
+
+		for (i = 0; i < argc; i++) {
+			if (args_can_be_paths) {
+				zhp = zfs_path_to_zhandle(g_zfs, argv[i],
+				    argtype);
+			} else {
+				zhp = zfs_open(g_zfs, argv[i], argtype);
+			}
+			if (zhp != NULL)
+				ret |= zfs_callback(zhp, &cb);
+			else
+				ret = 1;
+		}
+	}
+
+	/*
+	 * At this point we've got our AVL tree full of zfs handles, so iterate
+	 * over each one and execute the real user callback.
+	 */
+	for (node = uu_avl_first(cb.cb_avl); node != NULL;
+	    node = uu_avl_next(cb.cb_avl, node))
+		ret |= callback(node->zn_handle, data);
+
+	/*
+	 * Finally, clean up the AVL tree.
+	 */
+	if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
+		(void) fprintf(stderr,
+		    gettext("internal error: out of memory"));
+		exit(1);
+	}
+
+	while ((node = uu_avl_walk_next(walk)) != NULL) {
+		uu_avl_remove(cb.cb_avl, node);
+		zfs_close(node->zn_handle);
+		free(node);
+	}
+
+	uu_avl_walk_end(walk);
+	uu_avl_destroy(cb.cb_avl);
+	uu_avl_pool_destroy(avl_pool);
+
+	return (ret);
+}
--- /dev/null
+++ cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -0,0 +1,1843 @@
+'\" te
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").  
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\" Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
+.TH zfs 1M "16 Mar 2007" "SunOS 5.11" "System Administration Commands"
+.SH NAME
+zfs \- configures ZFS file systems
+.SH SYNOPSIS
+.LP
+.nf
+\fBzfs\fR [\fB-?\fR]
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBcreate\fR [[\fB-o\fR property=\fIvalue\fR]]... \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBcreate\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=\fIvalue\fR]]... \fB-V\fR \fIsize\fR \fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBclone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBpromote\fR \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBrename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR 
+    [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBsnapshot\fR [\fB-r\fR] \fIfilesystem at name\fR|\fIvolume at name\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...]
+    [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]... 
+    [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR ...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBget\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] 
+    [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]...
+     \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR... ...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBmount\fR 
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fB-a\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB-a\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBshare\fR \fB-a\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBshare\fR \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB-a\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBsend\fR [\fB-i\fR \fIsnapshot1\fR] \fB\fIsnapshot2\fR\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fB-d\fR \fB\fIfilesystem\fR\fR
+.fi
+.LP
+.nf
+\fBzfs\fR \fBjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
+.fi
+.LP
+.nf
+\fBzfs\fR \fBunjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
+.fi
+
+.SH DESCRIPTION
+.LP
+The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(1M). A
+dataset is identified by a unique path within the \fBZFS\fR namespace. For example:
+.sp
+.in +2
+.nf
+pool/{filesystem,volume,snapshot}
+.fi
+.in -2
+.sp
+
+.LP
+where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
+.LP
+A dataset can be one of the following:
+.sp
+.ne 2
+.mk
+.na
+\fB\fIfile system\fR\fR
+.ad
+.RS 15n
+.rt  
+A standard \fBPOSIX\fR file system. \fBZFS\fR file systems can be mounted within the standard file system namespace and behave like any other file system.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fIvolume\fR\fR
+.ad
+.RS 15n
+.rt  
+A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. Volumes cannot be used in a non-global zone.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fIsnapshot\fR\fR
+.ad
+.RS 15n
+.rt  
+A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem at name\fR or \fIvolume at name\fR.
+.RE
+
+.SS "ZFS File System Hierarchy"
+.LP
+A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy.
+.LP
+The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(1M) command.
+.LP
+See \fBzpool\fR(1M) for more information on creating and administering pools.
+.SS "Snapshots"
+.LP
+A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset.
+.LP
+Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
+.LP
+File system snapshots can be accessed under the ".zfs/snapshot" directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the ".zfs" directory can be controlled by the "snapdir"
+property.
+.SS "Clones"
+.LP
+A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
+.LP
+Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The "origin"
+property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
+.LP
+The clone parent-child dependency relationship can be reversed by using the "\fBpromote\fR" subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone
+was created from.
+.SS "Mount Points"
+.LP
+Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system will likely be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/vfstab\fR file.
+All automatically managed file systems are mounted by \fBZFS\fR at boot time.
+.LP
+By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed.
+.LP
+A file system can also have a mount point set in the "mountpoint" property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the "\fBzfs mount -a\fR" command is invoked (without editing \fB/etc/vfstab\fR). The mountpoint property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
+.LP
+A file system mountpoint property of "none" prevents the file system from being mounted.
+.LP
+If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point is set to "legacy", \fBZFS\fR makes no attempt to manage
+the file system, and the administrator is responsible for mounting and unmounting the file system.
+.SS "Zones"
+.LP
+A \fBZFS\fR file system can be added to a non-global zone by using zonecfg's "\fBadd fs\fR" subcommand. A \fBZFS\fR file system that is added to a non-global zone must have its mountpoint property set to legacy.
+.LP
+The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
+.LP
+A dataset can also be delegated to a non-global zone by using zonecfg's "\fBadd dataset\fR" subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or
+any of its children. However, the "quota" property is controlled by the global administrator.
+.LP
+A \fBZFS\fR volume can be added as a device to a non-global zone by using zonecfg's "\fBadd device\fR" subcommand. However, its physical properties can only be modified by the global administrator.
+.LP
+For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
+.LP
+After a dataset is delegated to a non-global zone, the "zoned" property is automatically set. A zoned file system cannot be mounted in the global zone, since the zone administrator might have to set the mount point to an unacceptable value.
+.LP
+The global administrator can forcibly clear the "zoned" property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property.
+.SS "Native Properties"
+.LP
+Properties are divided into two types, native properties and user defined properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior,
+but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section.
+.LP
+Every dataset has a set of properties that export statistics about the dataset as well as control various behavior. Properties are inherited from the parent unless overridden by the child. Snapshot properties can not be edited; they always inherit their inheritable properties. Properties
+that are not applicable to snapshots are not displayed.
+.LP
+The values of numeric properties can be specified using the following human-readable suffixes (for example, "k", "KB", "M", "Gb", etc, up to Z for zettabyte). The following are all valid (and equal) specifications: 
+.sp
+.in +2
+.nf
+"1536M", "1.5g", "1.50GB".
+.fi
+.in -2
+.sp
+
+.LP
+The values of non-numeric properties are case sensitive and must be lowercase, except for "mountpoint" and "sharenfs".
+.LP
+The first set of properties consist of read-only statistics about the dataset. These properties cannot be set, nor are they inherited. Native properties apply to all dataset types unless otherwise noted.
+.sp
+.ne 2
+.mk
+.na
+\fBtype\fR
+.ad
+.RS 17n
+.rt  
+The type of dataset: "filesystem", "volume", "snapshot", or "clone".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcreation\fR
+.ad
+.RS 17n
+.rt  
+The time this dataset was created.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBused\fR
+.ad
+.RS 17n
+.rt  
+The amount of space consumed by this dataset and all its descendants. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendant datasets.
+The amount of space that a dataset consumes from its parent, as well as the amount of space that will be freed if this dataset is recursively destroyed, is the greater of its space used and its reservation.
+.sp
+When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in
+the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots.
+.sp
+The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(3c) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBavailable\fR
+.ad
+.RS 17n
+.rt  
+The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets
+within the pool.
+.sp
+This property can also be referred to by its shortened column name, "avail".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBreferenced\fR
+.ad
+.RS 17n
+.rt  
+The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are
+identical.
+.sp
+This property can also be referred to by its shortened column name, "refer".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcompressratio\fR
+.ad
+.RS 17n
+.rt  
+The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running "zfs set compression=on \fIdataset\fR". The default value is "off".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBmounted\fR
+.ad
+.RS 17n
+.rt  
+For file systems, indicates whether the file system is currently mounted. This property can be either "yes" or "no".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBorigin\fR
+.ad
+.RS 17n
+.rt  
+For cloned file systems or volumes, the snapshot from which the clone was created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR options) so long as a clone exists.
+.RE
+
+.LP
+The following two properties can be set to control the way space is allocated between datasets. These properties are not inherited, but do affect their descendants.
+.sp
+.ne 2
+.mk
+.na
+\fBquota=\fIsize\fR | \fInone\fR\fR
+.ad
+.sp .6
+.RS 4n
+Limits the amount of space a dataset and its descendants can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendants, including file systems and snapshots. Setting a quota on a descendant of a dataset that already
+has a quota does not override the ancestor's quota, but rather imposes an additional limit.
+.sp
+Quotas cannot be set on volumes, as the "volsize" property acts as an implicit quota.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBreservation=\fIsize\fR | \fInone\fR\fR
+.ad
+.sp .6
+.RS 4n
+The minimum amount of space guaranteed to a dataset and its descendants. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space
+used, and count against the parent datasets' quotas and reservations.
+.sp
+This property can also be referred to by its shortened column name, "reserv".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBvolsize=\fIsize\fR\fR
+.ad
+.sp .6
+.RS 4n
+For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. Any changes to \fBvolsize\fR are reflected in an equivalent change to the reservation. The \fBvolsize\fR can only be set to a
+multiple of \fBvolblocksize\fR, and cannot be zero.
+.sp
+The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when
+the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
+.sp
+Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the "\fBzfs create -V\fR" command, or by changing the reservation after the volume has been created.
+A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBvolblocksize=\fIblocksize\fR\fR
+.ad
+.sp .6
+.RS 4n
+For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes
+to 128 Kbytes is valid.
+.sp
+This property can also be referred to by its shortened column name, "volblock".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBrecordsize=\fIsize\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical
+access patterns. 
+.sp
+For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a "recordsize" greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general
+purpose file systems is strongly discouraged, and may adversely affect performance.
+.sp
+The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
+.sp
+Changing the file system's \fBrecordsize\fR only affects files created afterward; existing files are unaffected.
+.sp
+This property can also be referred to by its shortened column name, "recsize".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBmountpoint=\fIpath\fR | \fInone\fR | \fIlegacy\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used. 
+.sp
+When the mountpoint property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is "legacy", then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was
+previously "legacy" or "none", or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBsharenfs=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a sharenfs property of "off" is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the "\fBzfs share\fR" and "\fBzfs unshare\fR" commands. If the property is set to "on", the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
+.sp
+When the "sharenfs" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is "off",
+the file systems are unshared.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBshareiscsi=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Like the "sharenfs" property, "shareiscsi" indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are "on", "off", and "type=disk".
+The default value is "off". In the future, other target types might be supported. For example, "tape".
+.sp
+You might want to set "shareiscsi=on" for a file system so that all \fBZFS\fR volumes within the file system are shared by default. Setting this property on a file system has no direct effect, however.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBchecksum=\fIon\fR | \fIoff\fR | \fIfletcher2\fR, | \fIfletcher4\fR | \fIsha256\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the checksum used to verify data integrity. The default value is "on", which automatically selects an appropriate algorithm (currently, \fIfletcher2\fR, but this may change in future releases). The value "off" disables integrity
+checking on user data. Disabling checksums is NOT a recommended practice.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcompression=\fIon\fR | \fIoff\fR | \fIlzjb\fR | \fIgzip\fR | \fIgzip-N\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the compression algorithm used for this dataset. The "lzjb" compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip"
+compression algorithm uses the same compression as the \fBgzip\fR(1) command.  You can specify the "gzip" level by using the value "gzip-\fIN\fR",
+where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
+.sp
+This property can also be referred to by its shortened column name "compress".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBatime=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value
+is "on".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBdevices=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether device nodes can be opened on this file system. The default value is "on".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBexec=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether processes can be executed from within this file system. The default value is "on".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBsetuid=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is "on".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBreadonly=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether this dataset can be modified. The default value is "off".
+.sp
+This property can also be referred to by its shortened column name, "rdonly".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBzoned=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is "off".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBsnapdir=\fIhidden\fR | \fIvisible\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the ".zfs" directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is "hidden".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBaclmode=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an "aclmode" property of "\fBdiscard\fR"
+deletes all \fBACL\fR entries that do not represent the mode of the file. An "aclmode" property of "\fBgroupmask\fR" (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission
+bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an "aclmode" property of "\fBpassthrough\fR" indicates that no changes will be made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBaclinherit=\fBdiscard\fR | \fBnoallow\fR | \fBsecure\fR | \fBpassthrough\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an "aclinherit" property of "\fBdiscard\fR" does not inherit any \fBACL\fR entries. A file system with an "aclinherit"
+property value of "\fBnoallow\fR" only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value "\fBsecure\fR" (the default) removes the "\fBwrite_acl\fR" and "\fBwrite_owner\fR" permissions when the \fBACL\fR entry is inherited. A file system with an "aclinherit" property value of "\fBpassthrough\fR" inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcanmount=\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+If this property is set to "\fBoff\fR", the file system cannot be mounted, and is ignored by "\fBzfs mount -a\fR". This is similar to setting the "mountpoint" property to "\fBnone\fR", except
+that the dataset still has a normal "mountpoint" property which can be inherited. This allows datasets to be used solely as a mechanism to inherit properties. One use case is to have two logically separate datasets have the same mountpoint, so that the children of both datasets appear
+in the same directory, but may have different inherited characteristics. The default value is "\fBon\fR". 
+.sp
+This property is not inherited.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBxattr=\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether extended attributes are enabled for this file system. The default value is "\fBon\fR".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or raid-z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated
+file and dataset, changing the "used" property and counting against quotas and reservations.
+.sp
+Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the "\fB-o\fR copies=" option.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBjailed=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the dataset is managed from within a jail. The default value is "off".
+.RE
+
+.SS "iscsioptions"
+.LP
+This read-only property, which is hidden, is used by the \fBiSCSI\fR target daemon to store persistent information, such as the \fBIQN\fR. It cannot be viewed or modified using the \fBzfs\fR command. The contents are not intended for external consumers.
+.SS "Temporary Mount Point Properties"
+.LP
+When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the "\fBzfs mount\fR" command for normal file systems,
+its mount options are set according to its properties. The correlation between properties and mount options is as follows:
+.sp
+.in +2
+.nf
+    PROPERTY                MOUNT OPTION
+    devices                 devices/nodevices
+    exec                    exec/noexec
+    readonly                ro/rw
+    setuid                  setuid/nosetuid
+    xattr                   xattr/noxattr
+.fi
+.in -2
+.sp
+
+.LP
+In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for "nodevices,nosetuid".
+These properties are reported as "temporary" by the "\fBzfs get\fR" command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
+.SS "User Properties"
+.LP
+In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets.
+.LP
+User property names must contain a colon (":") character, to distinguish them from native properties. They might contain lowercase letters, numbers, and the following punctuation characters: colon (":"), dash ("-"), period ("."), and underscore
+("_"). The expected convention is that the property name is divided into two portions such as "\fImodule\fR:\fIproperty\fR", but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters,
+and cannot begin with a dash ("-").
+.LP
+When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for
+different purposes. Property names beginning with "com.sun." are reserved for use by Sun Microsystems.
+.LP
+The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties ("zfs list", "zfs get", "zfs set", etc.) can be used to manipulate both native properties and user properties.
+Use the "\fBzfs inherit\fR" command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
+.SS "Volumes as Swap or Dump Devices"
+.LP
+To set up a swap area, create a \fBZFS\fR volume of a specific size and then enable swap on that device. For more information, see the EXAMPLES section.
+.LP
+Do not swap to a file on a \fBZFS\fR file system. A \fBZFS\fR swap file configuration is not supported.
+.LP
+Using a \fBZFS\fR volume as a dump device is not supported.
+.SH SUBCOMMANDS
+.LP
+All subcommands that modify state are logged persistently to the pool in their original form.
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs ?\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays a help message.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs create\fR [[\fB-o\fR property=value]...] \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the "mountpoint" property inherited from the parent.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR property=value\fR
+.ad
+.RS 21n
+.rt  
+Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An
+error results if the same property is specified in multiple \fB-o\fR options.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs create\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=value]...] \fB-V\fR \fIsize\fR \fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fIpath\fR\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents
+the logical size as exported by the device. By default, a reservation of equal size is created.
+.sp
+\fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR\fR
+.ad
+.RS 21n
+.rt  
+Creates a sparse volume with no reservation. See "volsize" in the Native Properties section for more information about sparse volumes.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR property=value\fR
+.ad
+.RS 21n
+.rt  
+Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An
+error results if the same property is specified in multiple \fB-o\fR options.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-b\fR \fIblocksize\fR\fR
+.ad
+.RS 21n
+.rt  
+Equivalent to "\fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR". If this option is specified in conjunction with "\fB\fR\fB-o\fR \fBvolblocksize\fR", the resulting
+behavior is undefined.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children, snapshots, clones).
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Force an unmount of any file systems using the "\fBunmount -f\fR" command. This option has no effect on non-file systems or unmounted file systems.
+.RE
+
+Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs clone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs promote\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the "origin" file system
+becomes a clone of the specified file system. 
+.sp
+The snaphot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the "origin" file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed
+by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The "\fBrename\fR" subcommand can be used to rename any conflicting snapshots.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs rename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does
+not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs snapshot\fR [\fB-r\fR] \fIfilesystem at name\fR|\fIvolume at name\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a snapshot with the given name. See the "Snapshots" section for details.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively create snapshots of all descendant datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than
+the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option. The file system is unmounted and remounted, if necessary.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively destroy any snapshots more recent than the one specified.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively destroy any more recent snapshots, as well as any clones of those snapshots.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Force an unmount of any file systems using the "\fBunmount -f\fR" command. 
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...] [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all datasets are displayed and contain the following fields:
+.sp
+.in +2
+.nf
+name,used,available,referenced,mountpoint
+.fi
+.in -2
+.sp
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 11n
+.rt  
+Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary whitespace.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 11n
+.rt  
+Recursively display any children of the dataset on the command line. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIprop\fR\fR
+.ad
+.RS 11n
+.rt  
+A comma-separated list of properties to display. The property must be one of the properties described in the "Native Properties" section, or the special value "name" to display the dataset name.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR \fIprop\fR\fR
+.ad
+.RS 11n
+.rt  
+A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value "name" to sort by the dataset name. Multiple
+properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance. 
+.sp
+The following is a list of sorting criteria:
+.RS +4
+.TP
+.ie t \(bu
+.el o
+Numeric types sort in numeric order.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+String types sort in alphabetical order.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+Types inappropriate for a row sort that row to the literal bottom, regardless of the specified ordering.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+If no sorting options are specified the existing behavior of "\fBzfs list\fR" is preserved.
+.RE
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-S\fR \fIprop\fR\fR
+.ad
+.RS 11n
+.rt  
+Same as the \fB-s\fR option, but sorts by property in descending order. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-t\fR \fItype\fR\fR
+.ad
+.RS 11n
+.rt  
+A comma-separated list of types to display, where "type" is one of "filesystem", "snapshot" or "volume". For example, specifying "\fB-t snapshot\fR" displays only snapshots.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs set\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable
+form with a suffix of "B", "K", "M", "G", "T", "P", "E", "Z" (for bytes, Kbytes, Mbytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs get\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays properties for the given datasets. If no datasets are specified, then the command displays properties for all datasets on the system. For each property, the following columns are displayed:
+.sp
+.in +2
+.nf
+    name      Dataset name
+    property  Property name
+    value     Property value
+    source    Property source. Can either be local, default,
+              temporary, inherited, or none (-).
+.fi
+.in -2
+.sp
+
+All columns are displayed by default, though this can be controlled by using the \fB-o\fR option. This command takes a comma-separated list of properties as described in the "Native Properties" and "User Properties" sections.
+.sp
+The special value "all" can be used to display all properties for the given dataset.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 13n
+.rt  
+Recursively display properties for any children.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 13n
+.rt  
+Display output in a form more easily parsed by scripts. Any headers are omitted, and fields are explicitly separated by a single tab instead of an arbitrary amount of space.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIfield\fR\fR
+.ad
+.RS 13n
+.rt  
+A comma-separated list of columns to display. "name,property,value,source" is the default value. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR \fIsource\fR\fR
+.ad
+.RS 13n
+.rt  
+A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: "local,default,inherited,temporary,none". The default value is all sources.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-p\fR\fR
+.ad
+.RS 13n
+.rt  
+Display numbers in parsable (exact) values.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs inherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Clears the specified property, causing it to be inherited from an ancestor. If no ancestor has the property set, then the default value is used. See the "Properties" section for a listing of default values, and details on which properties can be inherited.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt  
+Recursively inherit the given property for all children.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs mount\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays all \fBZFS\fR file systems currently mounted.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs mount\fR[\fB-o\fR \fIopts\fR] [\fB-O\fR] \fB-a\fR\fR
+.ad
+.sp .6
+.RS 4n
+Mounts all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIopts\fR\fR
+.ad
+.RS 11n
+.rt  
+An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-O\fR\fR
+.ad
+.RS 11n
+.rt  
+Perform an overlay mount. See \fBmount\fR(1M) for more information.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs mount\fR [\fB-o\fR \fIopts\fR] [\fB-O\fR] \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Mounts a specific \fBZFS\fR file system. This is typically not necessary, as file systems are automatically mounted when they are created or the mountpoint property has changed. See the "Mount Points" section for details.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIopts\fR\fR
+.ad
+.RS 11n
+.rt  
+An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-O\fR\fR
+.ad
+.RS 11n
+.rt  
+Perform an overlay mount. See \fBmount\fR(1M) for more information.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unmount\fR \fB-a\fR\fR
+.ad
+.sp .6
+.RS 4n
+Unmounts all currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unmount\fR [\fB-f\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
+.ad
+.sp .6
+.RS 4n
+Unmounts the given file system. The command can also be given a path to a \fBZFS\fR file system mount point on the system.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt  
+Forcefully unmount the file system, even if it is currently in use.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs share\fR \fB-a\fR\fR
+.ad
+.sp .6
+.RS 4n
+Shares all available \fBZFS\fR file systems. This is invoked automatically as part of the boot process.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs share\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Shares a specific \fBZFS\fR file system according to the "sharenfs" property. File systems are shared when the "sharenfs" property is set.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unshare\fR \fB-a\fR\fR
+.ad
+.sp .6
+.RS 4n
+Unshares all currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unshare\fR [\fB-F\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
+.ad
+.sp .6
+.RS 4n
+Unshares the given file system. The command can also be given a path to a \fBZFS\fR file system shared on the system.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-F\fR\fR
+.ad
+.RS 6n
+.rt  
+Forcefully unshare the file system, even if it is currently in use.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs send\fR [\fB-i\fR \fIsnapshot1\fR] \fIsnapshot2\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a stream representation of snapshot2, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-i\fR \fIsnapshot1\fR\fR
+.ad
+.RS 16n
+.rt  
+Generate an incremental stream from \fIsnapshot1\fR to \fIsnapshot2\fR. The incremental source \fIsnapshot1\fR can be specified as the last component of the snapshot name (for example, the part after the "@"),
+and it is assumed to be from the same file system as \fIsnapshot2\fR.
+.RE
+
+.RE
+
+.LP
+The format of the stream is evolving. No backwards compatibility is guaranteed. You may not be able to receive your streams on future versions of \fBZFS\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs receive\fR [\fB-vnF\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+.ad
+.br
+.na
+\fB\fBzfs receive\fR [\fB-vnF\fR] \fB-d\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the "\fBzfs send\fR" subcommand, which by default creates a full
+stream. "\fBzfs recv\fR" can be used as an alias for "\fBzfs receive\fR".
+.sp
+If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. The destination file system and all of its child file systems are unmounted and cannot be accessed during the receive operation.
+.sp
+The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
+.sp
+If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR.
+If the \fB-d\fR option is specified, the snapshot name is determined by appending the sent snapshot's name to the specified \fIfilesystem\fR. If the \fB-d\fR option is specified, any required file systems within the specified one are created.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR\fR
+.ad
+.RS 6n
+.rt  
+Use the name of the sent snapshot to determine the name of the new snapshot as described in the paragraph above.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.RS 6n
+.rt  
+Print verbose information about the stream and the time required to perform the receive operation.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-n\fR\fR
+.ad
+.RS 6n
+.rt  
+Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to determine what name the receive operation would use.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-F\fR\fR
+.ad
+.RS 6n
+.rt  
+Force a rollback of the \fIfilesystem\fR to the most recent snapshot before performing the receive operation.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs jail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Attaches the given file system to the given jail. From now on this file system tree can be managed from within a jail if the "\fBjailed\fR" property has been set.
+To use this functionality, sysctl \fBsecurity.jail.enforce_statfs\fR should be set to 0 and sysctl \fBsecurity.jail.mount_allowed\fR should be set to 1.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unjail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Detaches the given file system from the given jail.
+.RE
+
+.SH EXAMPLES
+.LP
+\fBExample 1 \fRCreating a ZFS File System Hierarchy
+.LP
+The following commands create a file system named "\fBpool/home\fR" and a file system named "\fBpool/home/bob\fR". The mount point "\fB/export/home\fR" is set for the parent file system, and automatically inherited
+by the child file system.
+
+.sp
+.in +2
+.nf
+# zfs create pool/home
+# zfs set mountpoint=/export/home pool/home
+# zfs create pool/home/bob
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 2 \fRCreating a ZFS Snapshot
+.LP
+The following command creates a snapshot named "yesterday". This snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of the "\fBpool/home/bob\fR" file system.
+
+.sp
+.in +2
+.nf
+# zfs snapshot pool/home/bob at yesterday
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 3 \fRTaking and destroying multiple snapshots
+.LP
+The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home\fR" and all of its descendant file systems. Each snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of its file system. The
+second command destroys the newly created snapshots.
+
+.sp
+.in +2
+.nf
+# \fBzfs snapshot -r pool/home at yesterday\fR
+\fB# zfs destroy -r pool/home at yesterday\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 4 \fRTurning Off Compression
+.LP
+The following commands turn compression off for all file systems under "\fBpool/home\fR", but explicitly turns it on for "\fBpool/home/anne\fR".
+
+.sp
+.in +2
+.nf
+\fB# zfs set compression=off pool/home
+# zfs set compression=on pool/home/anne\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 5 \fRListing ZFS Datasets
+.LP
+The following command lists all active file systems and volumes in the system.
+
+.sp
+.in +2
+.nf
+\fB# zfs list\fR
+
+
+  NAME                      USED  AVAIL  REFER  MOUNTPOINT
+  pool                      100G   60G       -  /pool
+  pool/home                 100G   60G       -  /export/home
+  pool/home/bob              40G   60G     40G  /export/home/bob
+  pool/home/bob at yesterday     3M     -     40G  -
+  pool/home/anne             60G   60G     40G  /export/home/anne
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 6 \fRSetting a Quota on a ZFS File System
+.LP
+The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
+
+.sp
+.in +2
+.nf
+\fB# zfs set quota=50G pool/home/bob\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 7 \fRListing ZFS Properties
+.LP
+The following command lists all properties for "\fBpool/home/bob\fR".
+
+.sp
+.in +2
+.nf
+\fB# zfs get all pool/home/bob\fR
+
+
+  NAME           PROPERTY       VALUE                  SOURCE
+  pool/home/bob  type           filesystem             -
+  pool/home/bob  creation       Fri Feb 23 14:20 2007  -
+  pool/home/bob  used           24.5K                  -
+  pool/home/bob  available      50.0G                  -
+  pool/home/bob  referenced     24.5K                  -
+  pool/home/bob  compressratio  1.00x                  -
+  pool/home/bob  mounted        yes                    -
+  pool/home/bob  quota          50G                    local
+  pool/home/bob  reservation    none                   default
+  pool/home/bob  recordsize     128K                   default
+  pool/home/bob  mountpoint     /pool/home/bob         default
+  pool/home/bob  sharenfs       off                    default
+  pool/home/bob  shareiscsi     off                    default
+  pool/home/bob  checksum       on                     default
+  pool/home/bob  compression    off                    default
+  pool/home/bob  atime          on                     default
+  pool/home/bob  devices        on                     default
+  pool/home/bob  exec           on                     default
+  pool/home/bob  setuid         on                     default
+  pool/home/bob  readonly       off                    default
+  pool/home/bob  zoned          off                    default
+  pool/home/bob  snapdir        hidden                 default
+  pool/home/bob  aclmode        groupmask              default
+  pool/home/bob  aclinherit     secure                 default
+  pool/home/bob  canmount       on                     default
+  pool/home/bob  xattr          on                     default
+
+   
+.fi
+.in -2
+.sp
+
+.LP
+The following command gets a single property value.
+
+.sp
+.in +2
+.nf
+\fB# zfs get -H -o value compression pool/home/bob\fR
+on
+.fi
+.in -2
+.sp
+
+.LP
+The following command lists all properties with local settings for "\fBpool/home/bob\fR".
+
+.sp
+.in +2
+.nf
+\fB# zfs get -r -s local -o name,property,value all pool/home/bob\fR
+
+  NAME             PROPERTY      VALUE
+  pool             compression   on
+  pool/home        checksum      off
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 8 \fRRolling Back a ZFS File System
+.LP
+The following command reverts the contents of "\fBpool/home/anne\fR" to the snapshot named "\fByesterday\fR", deleting all intermediate snapshots.
+
+.sp
+.in +2
+.nf
+\fB# zfs rollback -r pool/home/anne at yesterday\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 9 \fRCreating a ZFS Clone
+.LP
+The following command creates a writable file system whose initial contents are the same as "\fBpool/home/bob at yesterday\fR".
+
+.sp
+.in +2
+.nf
+\fB# zfs clone pool/home/bob at yesterday pool/clone\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 10 \fRPromoting a ZFS Clone
+.LP
+The following commands illustrate how to test out changes to a file system, and then replace the original file system with the changed one, using clones, clone promotion, and renaming:
+
+.sp
+.in +2
+.nf
+\fB# zfs create pool/project/production\fR
+ populate /pool/project/production with data
+\fB# zfs snapshot pool/project/production at today
+# zfs clone pool/project/production at today pool/project/beta\fR
+ make changes to /pool/project/beta and test them
+\fB# zfs promote pool/project/beta
+# zfs rename pool/project/production pool/project/legacy
+# zfs rename pool/project/beta pool/project/production\fR
+ once the legacy version is no longer needed, it can be
+ destroyed
+\fB# zfs destroy pool/project/legacy\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 11 \fRInheriting ZFS Properties
+.LP
+The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to inherit the "checksum" property from their parent.
+
+.sp
+.in +2
+.nf
+\fB# zfs inherit checksum pool/home/bob pool/home/anne\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 12 \fRRemotely Replicating ZFS Data
+.LP
+The following commands send a full stream and then an incremental stream to a remote machine, restoring them into "\fBpoolB/received/fs\fR at a" and "\fBpoolB/received/fs at b\fR", respectively. "\fBpoolB\fR" must contain
+the file system "\fBpoolB/received\fR", and must not initially contain "\fBpoolB/received/fs\fR".
+
+.sp
+.in +2
+.nf
+# zfs send pool/fs at a | \e
+  ssh host zfs receive poolB/received/fs at a
+# zfs send -i a pool/fs at b | ssh host \e
+  zfs receive poolB/received/fs
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 13 \fRUsing the  zfs receive -d Option
+.LP
+The following command sends a full stream of "\fBpoolA/fsA/fsB at snap\fR" to a remote machine, receiving it into "\fBpoolB/received/fsA/fsB at snap\fR". The "\fBfsA/fsB at snap\fR" portion of the received snapshot's name
+is determined from the name of the sent snapshot. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR".  If  "\fBpoolB/received/fsA\fR" does not exist, it will be created as an empty file system.
+
+.sp
+.in +2
+.nf
+\fB# zfs send poolA/fsA/fsB at snap | \e
+  ssh host zfs receive -d poolB/received
+   \fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 14 \fRCreating a ZFS volume as a Swap Device
+.LP
+The following example shows how to create a 5-Gbyte ZFS volume and then add the volume as a swap device.
+
+.sp
+.in +2
+.nf
+\fB# zfs create  -V 5gb tank/vol
+# swap -a /dev/zvol/dsk/tank/vol\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 15 \fRSetting User Properties
+.LP
+The following example sets the user defined "com.example:department" property for a dataset.
+
+.sp
+.in +2
+.nf
+\fB# zfs set com.example:department=12345 tank/accounting\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 16 \fRCreating a ZFS Volume as a iSCSI Target Device
+.LP
+The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target. 
+
+.sp
+.in +2
+.nf
+\fB# zfs create -V 2g pool/volumes/vol1
+# zfs set shareiscsi=on pool/volumes/vol1
+# iscsitadm list target\fR
+Target: pool/volumes/vol1
+iSCSI Name: 
+iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c
+Connections: 0
+.fi
+.in -2
+.sp
+
+.LP
+After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see the Solaris Administration Guide: Devices and File Systems.
+.SH EXIT STATUS
+.LP
+The following exit values are returned:
+.sp
+.ne 2
+.mk
+.na
+\fB\fB0\fR\fR
+.ad
+.RS 5n
+.rt  
+Successful completion. 
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB1\fR\fR
+.ad
+.RS 5n
+.rt  
+An error occurred.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB2\fR\fR
+.ad
+.RS 5n
+.rt  
+Invalid command line options were specified.
+.RE
+
+.SH ATTRIBUTES
+.LP
+See \fBattributes\fR(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+tab() box;
+cw(2.75i) |cw(2.75i) 
+lw(2.75i) |lw(2.75i) 
+.
+ATTRIBUTE TYPEATTRIBUTE VALUE
+_
+AvailabilitySUNWzfsu
+_
+Interface StabilityEvolving
+.TE
+
+.SH SEE ALSO
+.LP
+\fBgzip\fR(1), \fBssh\fR(1), \fBmount\fR(1M), \fBshare\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBfsync\fR(3c), \fBdfstab\fR(4), \fBattributes\fR(5)
--- /dev/null
+++ cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@@ -0,0 +1,511 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ZFS_CONTEXT_H
+#define	_SYS_ZFS_CONTEXT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	_SYS_MUTEX_H
+#define	_SYS_RWLOCK_H
+#define	_SYS_CONDVAR_H
+#define	_SYS_SYSTM_H
+#define	_SYS_DEBUG_H
+#define	_SYS_T_LOCK_H
+#define	_SYS_VNODE_H
+#define	_SYS_VFS_H
+#define	_SYS_SUNDDI_H
+#define	_SYS_CALLB_H
+#define	_SYS_SCHED_H_
+
+#include <solaris.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <strings.h>
+#include <thread.h>
+#include <assert.h>
+#include <limits.h>
+#include <dirent.h>
+#include <time.h>
+#include <math.h>
+#include <umem.h>
+#include <vmem.h>
+#include <fsshare.h>
+#include <sys/note.h>
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/resource.h>
+#include <sys/byteorder.h>
+#include <sys/list.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/zfs_debug.h>
+#include <sys/debug.h>
+#include <sys/sdt.h>
+#include <sys/kstat.h>
+#include <sys/kernel.h>
+#include <sys/disk.h>
+#include <machine/atomic.h>
+
+#define	ZFS_EXPORTS_PATH	"/etc/zfs/exports"
+
+/*
+ * Debugging
+ */
+
+/*
+ * Note that we are not using the debugging levels.
+ */
+
+#define	CE_CONT		0	/* continuation		*/
+#define	CE_NOTE		1	/* notice		*/
+#define	CE_WARN		2	/* warning		*/
+#define	CE_PANIC	3	/* panic		*/
+#define	CE_IGNORE	4	/* print nothing	*/
+
+/*
+ * ZFS debugging
+ */
+
+#define	ZFS_LOG(...)	do {  } while (0)
+
+typedef u_longlong_t      rlim64_t;
+#define	RLIM64_INFINITY	((rlim64_t)-3)
+
+#ifdef ZFS_DEBUG
+extern void dprintf_setup(int *argc, char **argv);
+#endif /* ZFS_DEBUG */
+
+extern void cmn_err(int, const char *, ...);
+extern void vcmn_err(int, const char *, __va_list);
+extern void panic(const char *, ...);
+extern void vpanic(const char *, __va_list);
+
+/* This definition is copied from assert.h. */
+#if defined(__STDC__)
+#if __STDC_VERSION__ - 0 >= 199901L
+#define	verify(EX) (void)((EX) || \
+	(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
+#else
+#define	verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
+#endif /* __STDC_VERSION__ - 0 >= 199901L */
+#else
+#define	verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
+#endif	/* __STDC__ */
+
+
+#define	VERIFY	verify
+#define	ASSERT	assert
+
+extern void __assert(const char *, const char *, int);
+
+#ifdef lint
+#define	VERIFY3_IMPL(x, y, z, t)	if (x == z) ((void)0)
+#else
+/* BEGIN CSTYLED */
+#define	VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
+	const TYPE __left = (TYPE)(LEFT); \
+	const TYPE __right = (TYPE)(RIGHT); \
+	if (!(__left OP __right)) { \
+		char *__buf = alloca(256); \
+		(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
+			#LEFT, #OP, #RIGHT, \
+			(u_longlong_t)__left, #OP, (u_longlong_t)__right); \
+		__assert(__buf, __FILE__, __LINE__); \
+	} \
+_NOTE(CONSTCOND) } while (0)
+/* END CSTYLED */
+#endif /* lint */
+
+#define	VERIFY3S(x, y, z)	VERIFY3_IMPL(x, y, z, int64_t)
+#define	VERIFY3U(x, y, z)	VERIFY3_IMPL(x, y, z, uint64_t)
+#define	VERIFY3P(x, y, z)	VERIFY3_IMPL(x, y, z, uintptr_t)
+
+#ifdef NDEBUG
+#define	ASSERT3S(x, y, z)	((void)0)
+#define	ASSERT3U(x, y, z)	((void)0)
+#define	ASSERT3P(x, y, z)	((void)0)
+#else
+#define	ASSERT3S(x, y, z)	VERIFY3S(x, y, z)
+#define	ASSERT3U(x, y, z)	VERIFY3U(x, y, z)
+#define	ASSERT3P(x, y, z)	VERIFY3P(x, y, z)
+#endif
+
+/*
+ * Dtrace SDT probes have different signatures in userland than they do in
+ * kernel.  If they're being used in kernel code, re-define them out of
+ * existence for their counterparts in libzpool.
+ */
+
+#ifdef DTRACE_PROBE1
+#undef	DTRACE_PROBE1
+#define	DTRACE_PROBE1(a, b, c)	((void)0)
+#endif	/* DTRACE_PROBE1 */
+
+#ifdef DTRACE_PROBE2
+#undef	DTRACE_PROBE2
+#define	DTRACE_PROBE2(a, b, c, d, e)	((void)0)
+#endif	/* DTRACE_PROBE2 */
+
+#ifdef DTRACE_PROBE3
+#undef	DTRACE_PROBE3
+#define	DTRACE_PROBE3(a, b, c, d, e, f, g)	((void)0)
+#endif	/* DTRACE_PROBE3 */
+
+#ifdef DTRACE_PROBE4
+#undef	DTRACE_PROBE4
+#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
+#endif	/* DTRACE_PROBE4 */
+
+/*
+ * Threads
+ */
+#define	curthread	((void *)(uintptr_t)thr_self())
+
+typedef struct kthread kthread_t;
+
+#define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
+	zk_thread_create(func, arg)
+#define	thread_exit() thr_exit(NULL)
+
+extern kthread_t *zk_thread_create(void (*func)(), void *arg);
+
+#define	issig(why)	(FALSE)
+#define	ISSIG(thr, why)	(FALSE)
+
+/*
+ * Mutexes
+ */
+typedef struct kmutex {
+	void	*m_owner;
+	mutex_t	m_lock;
+} kmutex_t;
+
+#define	MUTEX_DEFAULT	USYNC_THREAD
+#undef MUTEX_HELD
+#define	MUTEX_HELD(m)	((m)->m_owner == curthread)
+
+/*
+ * Argh -- we have to get cheesy here because the kernel and userland
+ * have different signatures for the same routine.
+ */
+//extern int _mutex_init(mutex_t *mp, int type, void *arg);
+//extern int _mutex_destroy(mutex_t *mp);
+
+#define	mutex_init(mp, b, c, d)		zmutex_init((kmutex_t *)(mp))
+#define	mutex_destroy(mp)		zmutex_destroy((kmutex_t *)(mp))
+
+extern void zmutex_init(kmutex_t *mp);
+extern void zmutex_destroy(kmutex_t *mp);
+extern void mutex_enter(kmutex_t *mp);
+extern void mutex_exit(kmutex_t *mp);
+extern int mutex_tryenter(kmutex_t *mp);
+extern void *mutex_owner(kmutex_t *mp);
+
+/*
+ * RW locks
+ */
+typedef struct krwlock {
+	int		rw_count;
+	void		*rw_owner;
+	rwlock_t	rw_lock;
+} krwlock_t;
+
+typedef int krw_t;
+
+#define	RW_READER	0
+#define	RW_WRITER	1
+#define	RW_DEFAULT	USYNC_THREAD
+
+#undef RW_READ_HELD
+
+#undef RW_WRITE_HELD
+#define	RW_WRITE_HELD(x)	((x)->rw_owner == curthread)
+#define	RW_LOCK_HELD(x)		rw_lock_held(x)
+
+extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
+extern void rw_destroy(krwlock_t *rwlp);
+extern void rw_enter(krwlock_t *rwlp, krw_t rw);
+extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
+extern int rw_tryupgrade(krwlock_t *rwlp);
+extern void rw_exit(krwlock_t *rwlp);
+extern int rw_lock_held(krwlock_t *rwlp);
+#define	rw_downgrade(rwlp) do { } while (0)
+
+/*
+ * Condition variables
+ */
+typedef cond_t kcondvar_t;
+
+#define	CV_DEFAULT	USYNC_THREAD
+
+extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
+extern void cv_destroy(kcondvar_t *cv);
+extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
+extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
+extern void cv_signal(kcondvar_t *cv);
+extern void cv_broadcast(kcondvar_t *cv);
+
+/*
+ * Kernel memory
+ */
+#define	KM_SLEEP		UMEM_NOFAIL
+#define	KM_NOSLEEP		UMEM_DEFAULT
+#define	KMC_NODEBUG		UMC_NODEBUG
+#define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
+#define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
+#define	kmem_free(_b, _s)	umem_free(_b, _s)
+#define	kmem_size()		(physmem * PAGESIZE)
+#define	kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
+	umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
+#define	kmem_cache_destroy(_c)	umem_cache_destroy(_c)
+#define	kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
+#define	kmem_cache_free(_c, _b)	umem_cache_free(_c, _b)
+#define	kmem_debugging()	0
+#define	kmem_cache_reap_now(c)
+
+typedef umem_cache_t kmem_cache_t;
+
+/*
+ * Task queues
+ */
+typedef struct taskq taskq_t;
+typedef uintptr_t taskqid_t;
+typedef void (task_func_t)(void *);
+
+#define	TASKQ_PREPOPULATE	0x0001
+#define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
+#define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
+
+#define	TQ_SLEEP	KM_SLEEP	/* Can block for memory */
+#define	TQ_NOSLEEP	KM_NOSLEEP	/* cannot block for memory; may fail */
+#define	TQ_NOQUEUE	0x02	/* Do not enqueue if can't dispatch */
+
+extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern void	taskq_destroy(taskq_t *);
+extern void	taskq_wait(taskq_t *);
+extern int	taskq_member(taskq_t *, void *);
+
+/*
+ * vnodes
+ */
+typedef struct vnode {
+	uint64_t	v_size;
+	int		v_fd;
+	char		*v_path;
+} vnode_t;
+
+typedef struct vattr {
+	uint_t		va_mask;	/* bit-mask of attributes */
+	u_offset_t	va_size;	/* file size in bytes */
+} vattr_t;
+
+#define	AT_TYPE		0x0001
+#define	AT_MODE		0x0002
+#define	AT_UID		0x0004
+#define	AT_GID		0x0008
+#define	AT_FSID		0x0010
+#define	AT_NODEID	0x0020
+#define	AT_NLINK	0x0040
+#define	AT_SIZE		0x0080
+#define	AT_ATIME	0x0100
+#define	AT_MTIME	0x0200
+#define	AT_CTIME	0x0400
+#define	AT_RDEV		0x0800
+#define	AT_BLKSIZE	0x1000
+#define	AT_NBLOCKS	0x2000
+#define	AT_SEQ		0x8000
+
+#define	CRCREAT		0
+
+#define	VOP_CLOSE(vp, f, c, o, cr)	0
+#define	VOP_PUTPAGE(vp, of, sz, fl, cr)	0
+#define	VOP_GETATTR(vp, vap, fl, cr)	((vap)->va_size = (vp)->v_size, 0)
+
+#define	VOP_FSYNC(vp, f, cr)	fsync((vp)->v_fd)
+
+#define	VN_RELE(vp)	vn_close(vp)
+
+extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
+    int x2, int x3);
+extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
+    int x2, int x3, vnode_t *vp);
+extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
+    offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
+extern void vn_close(vnode_t *vp);
+
+#define	vn_remove(path, x1, x2)		remove(path)
+#define	vn_rename(from, to, seg)	rename((from), (to))
+#define	vn_is_readonly(vp)		B_FALSE
+
+extern vnode_t *rootdir;
+
+#include <sys/file.h>		/* for FREAD, FWRITE, etc */
+#define	FTRUNC	O_TRUNC
+
+/*
+ * Random stuff
+ */
+#define	lbolt	(gethrtime() >> 23)
+#define	lbolt64	(gethrtime() >> 23)
+//#define	hz	119	/* frequency when using gethrtime() >> 23 for lbolt */
+
+extern void delay(clock_t ticks);
+
+#define	gethrestime_sec() time(NULL)
+
+#define	max_ncpus	64
+
+#define	minclsyspri	60
+#define	maxclsyspri	99
+
+#define	CPU_SEQID	(thr_self() & (max_ncpus - 1))
+
+#define	kcred		NULL
+#define	CRED()		NULL
+
+extern uint64_t physmem;
+
+extern int highbit(ulong_t i);
+extern int random_get_bytes(uint8_t *ptr, size_t len);
+extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
+
+extern void kernel_init(int);
+extern void kernel_fini(void);
+
+struct spa;
+extern void nicenum(uint64_t num, char *buf);
+extern void show_pool_stats(struct spa *);
+
+typedef struct callb_cpr {
+	kmutex_t	*cc_lockp;
+} callb_cpr_t;
+
+#define	CALLB_CPR_INIT(cp, lockp, func, name)	{		\
+	(cp)->cc_lockp = lockp;					\
+}
+
+#define	CALLB_CPR_SAFE_BEGIN(cp) {				\
+	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
+}
+
+#define	CALLB_CPR_SAFE_END(cp, lockp) {				\
+	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
+}
+
+#define	CALLB_CPR_EXIT(cp) {					\
+	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
+	mutex_exit((cp)->cc_lockp);				\
+}
+
+#define	zone_dataset_visible(x, y)	(1)
+#define	INGLOBALZONE(z)			(1)
+
+/*
+ * Hostname information
+ */
+extern struct utsname utsname;
+extern char hw_serial[];
+extern int ddi_strtoul(const char *str, char **nptr, int base,
+    unsigned long *result);
+
+/* ZFS Boot Related stuff. */
+
+struct _buf {
+	intptr_t	_fd;
+};
+
+struct bootstat {
+	uint64_t st_size;
+};
+
+extern struct _buf *kobj_open_file(char *name);
+extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
+    unsigned off);
+extern void kobj_close_file(struct _buf *file);
+extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
+/* Random compatibility stuff. */
+#define	lbolt	(gethrtime() >> 23)
+#define	lbolt64	(gethrtime() >> 23)
+
+extern int hz;
+extern uint64_t physmem;
+
+#define	gethrestime_sec()	time(NULL)
+
+#define	open64(...)		open(__VA_ARGS__)
+#define	pread64(d, p, n, o)	pread(d, p, n, o)
+#define	pwrite64(d, p, n, o)	pwrite(d, p, n, o)
+#define	readdir64(d)		readdir(d)
+#define	SIGPENDING(td)		(0)
+#define	root_mount_wait()	do { } while (0)
+#define	root_mounted()		(1)
+
+struct file {
+	void *dummy;
+};
+
+#define	FCREAT	O_CREAT
+#define	FOFFMAX	0x0
+
+#define	SX_SYSINIT(name, lock, desc)
+
+#define	SYSCTL_DECL(...)
+#define	SYSCTL_NODE(...)
+#define	SYSCTL_INT(...)
+#define	SYSCTL_ULONG(...)
+#ifdef TUNABLE_INT
+#undef TUNABLE_INT
+#undef TUNABLE_ULONG
+#endif
+#define	TUNABLE_INT(...)
+#define	TUNABLE_ULONG(...)
+
+/* Errors */
+
+#ifndef	ERESTART
+#define	ERESTART	(-1)
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_CONTEXT_H */