[Midnightbsd-cvs] src [10719] trunk/usr.sbin/bhyve: add bhyve

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sat Jun 9 17:54:02 EDT 2018


Revision: 10719
          http://svnweb.midnightbsd.org/src/?rev=10719
Author:   laffer1
Date:     2018-06-09 17:54:00 -0400 (Sat, 09 Jun 2018)
Log Message:
-----------
add bhyve

Added Paths:
-----------
    trunk/usr.sbin/bhyve/
    trunk/usr.sbin/bhyve/Makefile
    trunk/usr.sbin/bhyve/acpi.c
    trunk/usr.sbin/bhyve/acpi.h
    trunk/usr.sbin/bhyve/ahci.h
    trunk/usr.sbin/bhyve/atkbdc.c
    trunk/usr.sbin/bhyve/bhyve.8
    trunk/usr.sbin/bhyve/bhyverun.c
    trunk/usr.sbin/bhyve/bhyverun.h
    trunk/usr.sbin/bhyve/block_if.c
    trunk/usr.sbin/bhyve/block_if.h
    trunk/usr.sbin/bhyve/bootrom.c
    trunk/usr.sbin/bhyve/bootrom.h
    trunk/usr.sbin/bhyve/consport.c
    trunk/usr.sbin/bhyve/dbgport.c
    trunk/usr.sbin/bhyve/dbgport.h
    trunk/usr.sbin/bhyve/fwctl.c
    trunk/usr.sbin/bhyve/fwctl.h
    trunk/usr.sbin/bhyve/inout.c
    trunk/usr.sbin/bhyve/inout.h
    trunk/usr.sbin/bhyve/ioapic.c
    trunk/usr.sbin/bhyve/ioapic.h
    trunk/usr.sbin/bhyve/mem.c
    trunk/usr.sbin/bhyve/mem.h
    trunk/usr.sbin/bhyve/mevent.c
    trunk/usr.sbin/bhyve/mevent.h
    trunk/usr.sbin/bhyve/mevent_test.c
    trunk/usr.sbin/bhyve/mptbl.c
    trunk/usr.sbin/bhyve/mptbl.h
    trunk/usr.sbin/bhyve/pci_ahci.c
    trunk/usr.sbin/bhyve/pci_e82545.c
    trunk/usr.sbin/bhyve/pci_emul.c
    trunk/usr.sbin/bhyve/pci_emul.h
    trunk/usr.sbin/bhyve/pci_hostbridge.c
    trunk/usr.sbin/bhyve/pci_irq.c
    trunk/usr.sbin/bhyve/pci_irq.h
    trunk/usr.sbin/bhyve/pci_lpc.c
    trunk/usr.sbin/bhyve/pci_lpc.h
    trunk/usr.sbin/bhyve/pci_passthru.c
    trunk/usr.sbin/bhyve/pci_uart.c
    trunk/usr.sbin/bhyve/pci_virtio_block.c
    trunk/usr.sbin/bhyve/pci_virtio_net.c
    trunk/usr.sbin/bhyve/pci_virtio_rnd.c
    trunk/usr.sbin/bhyve/pm.c
    trunk/usr.sbin/bhyve/post.c
    trunk/usr.sbin/bhyve/rtc.c
    trunk/usr.sbin/bhyve/rtc.h
    trunk/usr.sbin/bhyve/smbiostbl.c
    trunk/usr.sbin/bhyve/smbiostbl.h
    trunk/usr.sbin/bhyve/spinup_ap.c
    trunk/usr.sbin/bhyve/spinup_ap.h
    trunk/usr.sbin/bhyve/task_switch.c
    trunk/usr.sbin/bhyve/uart_emul.c
    trunk/usr.sbin/bhyve/uart_emul.h
    trunk/usr.sbin/bhyve/virtio.c
    trunk/usr.sbin/bhyve/virtio.h
    trunk/usr.sbin/bhyve/xmsr.c
    trunk/usr.sbin/bhyve/xmsr.h

Added: trunk/usr.sbin/bhyve/Makefile
===================================================================
--- trunk/usr.sbin/bhyve/Makefile	                        (rev 0)
+++ trunk/usr.sbin/bhyve/Makefile	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,61 @@
+# $MidnightBSD$
+#
+# $FreeBSD: stable/10/usr.sbin/bhyve/Makefile 304569 2016-08-21 17:57:32Z mav $
+#
+
+PROG=	bhyve
+
+DEBUG_FLAGS= -g -O0 
+
+MAN=	bhyve.8
+
+BHYVE_SYSDIR?=${SRCTOP}
+
+SRCS=	\
+	atkbdc.c		\
+	acpi.c			\
+	bhyverun.c		\
+	block_if.c		\
+	bootrom.c		\
+	consport.c		\
+	dbgport.c		\
+	fwctl.c			\
+	inout.c			\
+	ioapic.c		\
+	mem.c			\
+	mevent.c		\
+	mptbl.c			\
+	pci_ahci.c		\
+	pci_e82545.c		\
+	pci_emul.c		\
+	pci_hostbridge.c	\
+	pci_irq.c		\
+	pci_lpc.c		\
+	pci_passthru.c		\
+	pci_virtio_block.c	\
+	pci_virtio_net.c	\
+	pci_virtio_rnd.c	\
+	pci_uart.c		\
+	pm.c			\
+	post.c			\
+	rtc.c			\
+	smbiostbl.c		\
+	task_switch.c		\
+	uart_emul.c		\
+	virtio.c		\
+	xmsr.c			\
+	spinup_ap.c
+
+.PATH:  ${BHYVE_SYSDIR}/sys/amd64/vmm
+SRCS+=	vmm_instruction_emul.c
+
+DPADD=	${LIBVMMAPI} ${LIBMD} ${LIBUTIL} ${LIBPTHREAD}
+LDADD=	-lvmmapi -lmd -lutil -lpthread
+
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
+
+WARNS?=	2
+
+.include <bsd.prog.mk>


Property changes on: trunk/usr.sbin/bhyve/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/acpi.c
===================================================================
--- trunk/usr.sbin/bhyve/acpi.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/acpi.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,1013 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/acpi.c 288434 2015-10-01 00:44:45Z delphij $
+ */
+
+/*
+ * bhyve ACPI table generator.
+ *
+ * Create the minimal set of ACPI tables required to boot FreeBSD (and
+ * hopefully other o/s's) by writing out ASL template files for each of
+ * the tables and the compiling them to AML with the Intel iasl compiler.
+ * The AML files are then read into guest memory.
+ *
+ *  The tables are placed in the guest's ROM area just below 1MB physical,
+ * above the MPTable.
+ *
+ *  Layout
+ *  ------
+ *   RSDP  ->   0xf2400    (36 bytes fixed)
+ *     RSDT  ->   0xf2440    (36 bytes + 4*7 table addrs, 4 used)
+ *     XSDT  ->   0xf2480    (36 bytes + 8*7 table addrs, 4 used)
+ *       MADT  ->   0xf2500  (depends on #CPUs)
+ *       FADT  ->   0xf2600  (268 bytes)
+ *       HPET  ->   0xf2740  (56 bytes)
+ *       MCFG  ->   0xf2780  (60 bytes)
+ *         FACS  ->   0xf27C0 (64 bytes)
+ *         DSDT  ->   0xf2800 (variable - can go up to 0x100000)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/acpi.c 288434 2015-10-01 00:44:45Z delphij $");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <paths.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "pci_emul.h"
+
+/*
+ * Define the base address of the ACPI tables, and the offsets to
+ * the individual tables
+ */
+#define BHYVE_ACPI_BASE		0xf2400
+#define RSDT_OFFSET		0x040
+#define XSDT_OFFSET		0x080
+#define MADT_OFFSET		0x100
+#define FADT_OFFSET		0x200
+#define	HPET_OFFSET		0x340
+#define	MCFG_OFFSET		0x380
+#define FACS_OFFSET		0x3C0
+#define DSDT_OFFSET		0x400
+
+#define	BHYVE_ASL_TEMPLATE	"bhyve.XXXXXXX"
+#define BHYVE_ASL_SUFFIX	".aml"
+#define BHYVE_ASL_COMPILER	"/usr/sbin/iasl"
+
+static int basl_keep_temps;
+static int basl_verbose_iasl;
+static int basl_ncpu;
+static uint32_t basl_acpi_base = BHYVE_ACPI_BASE;
+static uint32_t hpet_capabilities;
+
+/*
+ * Contains the full pathname of the template to be passed
+ * to mkstemp/mktemps(3)
+ */
+static char basl_template[MAXPATHLEN];
+static char basl_stemplate[MAXPATHLEN];
+
+/*
+ * State for dsdt_line(), dsdt_indent(), and dsdt_unindent().
+ */
+static FILE *dsdt_fp;
+static int dsdt_indent_level;
+static int dsdt_error;
+
+struct basl_fio {
+	int	fd;
+	FILE	*fp;
+	char	f_name[MAXPATHLEN];
+};
+
+#define EFPRINTF(...) \
+	err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit;
+
+#define EFFLUSH(x) \
+	err = fflush(x); if (err != 0) goto err_exit;
+
+static int
+basl_fwrite_rsdp(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve RSDP template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 43\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 02\n");
+	EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n",
+	    basl_acpi_base + RSDT_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tLength : 00000024\n");
+	EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n",
+	    basl_acpi_base + XSDT_OFFSET);
+	EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n");
+	EFPRINTF(fp, "[0003]\t\tReserved : 000000\n");
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_rsdt(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve RSDT template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	/* Add in pointers to the MADT, FADT and HPET */
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n",
+	    basl_acpi_base + MADT_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n",
+	    basl_acpi_base + FADT_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : %08X\n",
+	    basl_acpi_base + HPET_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : %08X\n",
+	    basl_acpi_base + MCFG_OFFSET);
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_xsdt(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve XSDT template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	/* Add in pointers to the MADT, FADT and HPET */
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n",
+	    basl_acpi_base + MADT_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n",
+	    basl_acpi_base + FADT_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : 00000000%08X\n",
+	    basl_acpi_base + HPET_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : 00000000%08X\n",
+	    basl_acpi_base + MCFG_OFFSET);
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_madt(FILE *fp)
+{
+	int err;
+	int i;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve MADT template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n");
+	EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n");
+	EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n");
+	EFPRINTF(fp, "\n");
+
+	/* Add a Processor Local APIC entry for each CPU */
+	for (i = 0; i < basl_ncpu; i++) {
+		EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n");
+		EFPRINTF(fp, "[0001]\t\tLength : 08\n");
+		/* iasl expects hex values for the proc and apic id's */
+		EFPRINTF(fp, "[0001]\t\tProcessor ID : %02x\n", i);
+		EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02x\n", i);
+		EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n");
+		EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n");
+		EFPRINTF(fp, "\n");
+	}
+
+	/* Always a single IOAPIC entry, with ID 0 */
+	EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n");
+	EFPRINTF(fp, "[0001]\t\tLength : 0C\n");
+	/* iasl expects a hex value for the i/o apic id */
+	EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02x\n", 0);
+	EFPRINTF(fp, "[0001]\t\tReserved : 00\n");
+	EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n");
+	EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	/* Legacy IRQ0 is connected to pin 2 of the IOAPIC */
+	EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n");
+	EFPRINTF(fp, "[0001]\t\tLength : 0A\n");
+	EFPRINTF(fp, "[0001]\t\tBus : 00\n");
+	EFPRINTF(fp, "[0001]\t\tSource : 00\n");
+	EFPRINTF(fp, "[0004]\t\tInterrupt : 00000002\n");
+	EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n");
+	EFPRINTF(fp, "\t\t\tPolarity : 1\n");
+	EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n");
+	EFPRINTF(fp, "[0001]\t\tLength : 0A\n");
+	EFPRINTF(fp, "[0001]\t\tBus : 00\n");
+	EFPRINTF(fp, "[0001]\t\tSource : %02X\n", SCI_INT);
+	EFPRINTF(fp, "[0004]\t\tInterrupt : %08X\n", SCI_INT);
+	EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n");
+	EFPRINTF(fp, "\t\t\tPolarity : 3\n");
+	EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n");
+	EFPRINTF(fp, "\n");
+
+	/* Local APIC NMI is connected to LINT 1 on all CPUs */
+	EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n");
+	EFPRINTF(fp, "[0001]\t\tLength : 06\n");
+	EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n");
+	EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n");
+	EFPRINTF(fp, "\t\t\tPolarity : 1\n");
+	EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n");
+	EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n");
+	EFPRINTF(fp, "\n");
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_fadt(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve FADT template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 05\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n",
+	    basl_acpi_base + FACS_OFFSET);
+	EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n",
+	    basl_acpi_base + DSDT_OFFSET);
+	EFPRINTF(fp, "[0001]\t\tModel : 01\n");
+	EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n");
+	EFPRINTF(fp, "[0002]\t\tSCI Interrupt : %04X\n",
+	    SCI_INT);
+	EFPRINTF(fp, "[0004]\t\tSMI Command Port : %08X\n",
+	    SMI_CMD);
+	EFPRINTF(fp, "[0001]\t\tACPI Enable Value : %02X\n",
+	    BHYVE_ACPI_ENABLE);
+	EFPRINTF(fp, "[0001]\t\tACPI Disable Value : %02X\n",
+	    BHYVE_ACPI_DISABLE);
+	EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n");
+	EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n");
+	EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : %08X\n",
+	    PM1A_EVT_ADDR);
+	EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : %08X\n",
+	    PM1A_CNT_ADDR);
+	EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n",
+	    IO_PMTMR);
+	EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n");
+	EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n");
+	EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n");
+	EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n");
+	EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n");
+	EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n");
+	EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n");
+	EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n");
+	EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n");
+	EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n");
+	EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n");
+	EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n");
+	EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n");
+	EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n");
+	EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n");
+	EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n");
+	EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n");
+	EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n");
+	EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n");
+	EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n");
+	EFPRINTF(fp, "[0001]\t\tReserved : 00\n");
+	EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n");
+	EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n");
+	EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 1\n");
+	EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n");
+	EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n");
+	EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n");
+	EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 1\n");
+	EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n");
+	EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n");
+	EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n");
+	EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n");
+	EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n");
+	EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n");
+	EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	    "[0012]\t\tReset Register : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000CF9\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0001]\t\tValue to cause reset : 06\n");
+	EFPRINTF(fp, "[0002]\t\tARM Flags (decoded below): 0000\n");
+	EFPRINTF(fp, "\t\t\tPSCI Compliant : 0\n");
+	EFPRINTF(fp, "\t\t\tMust use HVC for PSCI : 0\n");
+	EFPRINTF(fp, "[0001]\t\tFADT Minor Revision : 01\n");
+	EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n",
+	    basl_acpi_base + FACS_OFFSET);
+	EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n",
+	    basl_acpi_base + DSDT_OFFSET);
+	EFPRINTF(fp,
+	    "[0012]\t\tPM1A Event Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 20\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
+	    PM1A_EVT_ADDR);
+	EFPRINTF(fp, "\n");
+	
+	EFPRINTF(fp,
+	    "[0012]\t\tPM1B Event Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+	    "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	    "[0012]\t\tPM1A Control Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 10\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
+	    PM1A_CNT_ADDR);
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	    "[0012]\t\tPM1B Control Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+	    "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	    "[0012]\t\tPM2 Control Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+	    "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	/* Valid for bhyve */
+	EFPRINTF(fp,
+	    "[0012]\t\tPM Timer Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 20\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+	    "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
+	    IO_PMTMR);
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+	    "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	   "[0012]\t\tSleep Control Register : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp,
+	    "[0012]\t\tSleep Status Register : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_hpet(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve HPET template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"HPET\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVHPET  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0004]\t\tTimer Block ID : %08X\n", hpet_capabilities);
+	EFPRINTF(fp,
+	    "[0012]\t\tTimer Block Register : [Generic Address Structure]\n");
+	EFPRINTF(fp, "[0001]\t\tSpace ID : 00 [SystemMemory]\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
+	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
+	EFPRINTF(fp,
+		 "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
+	EFPRINTF(fp, "[0008]\t\tAddress : 00000000FED00000\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0001]\t\tHPET Number : 00\n");
+	EFPRINTF(fp, "[0002]\t\tMinimum Clock Ticks : 0000\n");
+	EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n");
+	EFPRINTF(fp, "\t\t\t4K Page Protect : 1\n");
+	EFPRINTF(fp, "\t\t\t64K Page Protect : 0\n");
+	EFPRINTF(fp, "\n");
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_mcfg(FILE *fp)
+{
+	int err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve MCFG template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"MCFG\"\n");
+	EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
+	EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
+	EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
+	EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
+	EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMCFG  \"\n");
+	EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
+
+	/* iasl will fill in the compiler ID/revision fields */
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
+	EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
+	EFPRINTF(fp, "[0008]\t\tReserved : 0\n");
+	EFPRINTF(fp, "\n");
+
+	EFPRINTF(fp, "[0008]\t\tBase Address : %016lX\n", pci_ecfg_base());
+	EFPRINTF(fp, "[0002]\t\tSegment Group: 0000\n");
+	EFPRINTF(fp, "[0001]\t\tStart Bus: 00\n");
+	EFPRINTF(fp, "[0001]\t\tEnd Bus: FF\n");
+	EFPRINTF(fp, "[0004]\t\tReserved : 0\n");
+	EFFLUSH(fp);
+	return (0);
+err_exit:
+	return (errno);
+}
+
+static int
+basl_fwrite_facs(FILE *fp)
+{
+	int err;
+
+	err = 0;
+
+	EFPRINTF(fp, "/*\n");
+	EFPRINTF(fp, " * bhyve FACS template\n");
+	EFPRINTF(fp, " */\n");
+	EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n");
+	EFPRINTF(fp, "[0004]\t\tLength : 00000040\n");
+	EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n");
+	EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n");
+	EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n");
+	EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n");
+	EFPRINTF(fp,
+	    "[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n");
+	EFPRINTF(fp, "[0001]\t\tVersion : 02\n");
+	EFPRINTF(fp, "[0003]\t\tReserved : 000000\n");
+	EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n");
+	EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n");
+
+	EFFLUSH(fp);
+
+	return (0);
+	
+err_exit:
+	return (errno);
+}
+
+/*
+ * Helper routines for writing to the DSDT from other modules.
+ */
+void
+dsdt_line(const char *fmt, ...)
+{
+	va_list ap;
+	int err;
+
+	if (dsdt_error != 0)
+		return;
+
+	if (strcmp(fmt, "") != 0) {
+		if (dsdt_indent_level != 0)
+			EFPRINTF(dsdt_fp, "%*c", dsdt_indent_level * 2, ' ');
+		va_start(ap, fmt);
+		if (vfprintf(dsdt_fp, fmt, ap) < 0)
+			goto err_exit;
+		va_end(ap);
+	}
+	EFPRINTF(dsdt_fp, "\n");
+	return;
+
+err_exit:
+	dsdt_error = errno;
+}
+
+void
+dsdt_indent(int levels)
+{
+
+	dsdt_indent_level += levels;
+	assert(dsdt_indent_level >= 0);
+}
+
+void
+dsdt_unindent(int levels)
+{
+
+	assert(dsdt_indent_level >= levels);
+	dsdt_indent_level -= levels;
+}
+
+void
+dsdt_fixed_ioport(uint16_t iobase, uint16_t length)
+{
+
+	dsdt_line("IO (Decode16,");
+	dsdt_line("  0x%04X,             // Range Minimum", iobase);
+	dsdt_line("  0x%04X,             // Range Maximum", iobase);
+	dsdt_line("  0x01,               // Alignment");
+	dsdt_line("  0x%02X,               // Length", length);
+	dsdt_line("  )");
+}
+
+void
+dsdt_fixed_irq(uint8_t irq)
+{
+
+	dsdt_line("IRQNoFlags ()");
+	dsdt_line("  {%d}", irq);
+}
+
+void
+dsdt_fixed_mem32(uint32_t base, uint32_t length)
+{
+
+	dsdt_line("Memory32Fixed (ReadWrite,");
+	dsdt_line("  0x%08X,         // Address Base", base);
+	dsdt_line("  0x%08X,         // Address Length", length);
+	dsdt_line("  )");
+}
+
+static int
+basl_fwrite_dsdt(FILE *fp)
+{
+	int err;
+
+	err = 0;
+	dsdt_fp = fp;
+	dsdt_error = 0;
+	dsdt_indent_level = 0;
+
+	dsdt_line("/*");
+	dsdt_line(" * bhyve DSDT template");
+	dsdt_line(" */");
+	dsdt_line("DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2,"
+		 "\"BHYVE \", \"BVDSDT  \", 0x00000001)");
+	dsdt_line("{");
+	dsdt_line("  Name (_S5, Package ()");
+	dsdt_line("  {");
+	dsdt_line("      0x05,");
+	dsdt_line("      Zero,");
+	dsdt_line("  })");
+
+	pci_write_dsdt();
+
+	dsdt_line("");
+	dsdt_line("  Scope (_SB.PC00)");
+	dsdt_line("  {");
+	dsdt_line("    Device (HPET)");
+	dsdt_line("    {");
+	dsdt_line("      Name (_HID, EISAID(\"PNP0103\"))");
+	dsdt_line("      Name (_UID, 0)");
+	dsdt_line("      Name (_CRS, ResourceTemplate ()");
+	dsdt_line("      {");
+	dsdt_indent(4);
+	dsdt_fixed_mem32(0xFED00000, 0x400);
+	dsdt_unindent(4);
+	dsdt_line("      })");
+	dsdt_line("    }");
+	dsdt_line("  }");
+	dsdt_line("}");
+
+	if (dsdt_error != 0)
+		return (dsdt_error);
+
+	EFFLUSH(fp);
+
+	return (0);
+
+err_exit:
+	return (errno);
+}
+
+static int
+basl_open(struct basl_fio *bf, int suffix)
+{
+	int err;
+
+	err = 0;
+
+	if (suffix) {
+		strlcpy(bf->f_name, basl_stemplate, MAXPATHLEN);
+		bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX));
+	} else {
+		strlcpy(bf->f_name, basl_template, MAXPATHLEN);
+		bf->fd = mkstemp(bf->f_name);
+	}
+
+	if (bf->fd > 0) {
+		bf->fp = fdopen(bf->fd, "w+");
+		if (bf->fp == NULL) {
+			unlink(bf->f_name);
+			close(bf->fd);
+		}
+	} else {
+		err = 1;
+	}
+
+	return (err);
+}
+
+static void
+basl_close(struct basl_fio *bf)
+{
+
+	if (!basl_keep_temps)
+		unlink(bf->f_name);
+	fclose(bf->fp);
+}
+
+static int
+basl_start(struct basl_fio *in, struct basl_fio *out)
+{
+	int err;
+
+	err = basl_open(in, 0);
+	if (!err) {
+		err = basl_open(out, 1);
+		if (err) {
+			basl_close(in);
+		}
+	}
+
+	return (err);
+}
+
+static void
+basl_end(struct basl_fio *in, struct basl_fio *out)
+{
+
+	basl_close(in);
+	basl_close(out);
+}
+
+static int
+basl_load(struct vmctx *ctx, int fd, uint64_t off)
+{
+	struct stat sb;
+	void *gaddr;
+
+	if (fstat(fd, &sb) < 0)
+		return (errno);
+		
+	gaddr = paddr_guest2host(ctx, basl_acpi_base + off, sb.st_size);
+	if (gaddr == NULL)
+		return (EFAULT);
+
+	if (read(fd, gaddr, sb.st_size) < 0)
+		return (errno);
+
+	return (0);
+}
+
+static int
+basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *), uint64_t offset)
+{
+	struct basl_fio io[2];
+	static char iaslbuf[3*MAXPATHLEN + 10];
+	char *fmt;
+	int err;
+
+	err = basl_start(&io[0], &io[1]);
+	if (!err) {
+		err = (*fwrite_section)(io[0].fp);
+
+		if (!err) {
+			/*
+			 * iasl sends the results of the compilation to
+			 * stdout. Shut this down by using the shell to
+			 * redirect stdout to /dev/null, unless the user
+			 * has requested verbose output for debugging
+			 * purposes
+			 */
+			fmt = basl_verbose_iasl ?
+				"%s -p %s %s" :
+				"/bin/sh -c \"%s -p %s %s\" 1> /dev/null";
+				
+			snprintf(iaslbuf, sizeof(iaslbuf),
+				 fmt,
+				 BHYVE_ASL_COMPILER,
+				 io[1].f_name, io[0].f_name);
+			err = system(iaslbuf);
+
+			if (!err) {
+				/*
+				 * Copy the aml output file into guest
+				 * memory at the specified location
+				 */
+				err = basl_load(ctx, io[1].fd, offset);
+			}
+		}
+		basl_end(&io[0], &io[1]);
+	}
+
+	return (err);
+}
+
+static int
+basl_make_templates(void)
+{
+	const char *tmpdir;
+	int err;
+	int len;
+
+	err = 0;
+	
+	/*
+	 * 
+	 */
+	if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' ||
+	    (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') {
+		tmpdir = _PATH_TMP;
+	}
+
+	len = strlen(tmpdir);
+
+	if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) {
+		strcpy(basl_template, tmpdir);
+		while (len > 0 && basl_template[len - 1] == '/')
+			len--;
+		basl_template[len] = '/';
+		strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE);
+	} else
+		err = E2BIG;
+
+	if (!err) {
+		/*
+		 * len has been intialized (and maybe adjusted) above
+		 */
+		if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 +
+		     sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) {
+			strcpy(basl_stemplate, tmpdir);
+			basl_stemplate[len] = '/';
+			strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE);
+			len = strlen(basl_stemplate);
+			strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX);
+		} else
+			err = E2BIG;
+	}
+
+	return (err);
+}
+
+static struct {
+	int	(*wsect)(FILE *fp);
+	uint64_t  offset;
+} basl_ftables[] =
+{
+	{ basl_fwrite_rsdp, 0},
+	{ basl_fwrite_rsdt, RSDT_OFFSET },
+	{ basl_fwrite_xsdt, XSDT_OFFSET },
+	{ basl_fwrite_madt, MADT_OFFSET },
+	{ basl_fwrite_fadt, FADT_OFFSET },
+	{ basl_fwrite_hpet, HPET_OFFSET },
+	{ basl_fwrite_mcfg, MCFG_OFFSET },
+	{ basl_fwrite_facs, FACS_OFFSET },
+	{ basl_fwrite_dsdt, DSDT_OFFSET },
+	{ NULL }
+};
+
+int
+acpi_build(struct vmctx *ctx, int ncpu)
+{
+	int err;
+	int i;
+
+	basl_ncpu = ncpu;
+
+	err = vm_get_hpet_capabilities(ctx, &hpet_capabilities);
+	if (err != 0)
+		return (err);
+
+	/*
+	 * For debug, allow the user to have iasl compiler output sent
+	 * to stdout rather than /dev/null
+	 */
+	if (getenv("BHYVE_ACPI_VERBOSE_IASL"))
+		basl_verbose_iasl = 1;
+
+	/*
+	 * Allow the user to keep the generated ASL files for debugging
+	 * instead of deleting them following use
+	 */
+	if (getenv("BHYVE_ACPI_KEEPTMPS"))
+		basl_keep_temps = 1;
+
+	i = 0;
+	err = basl_make_templates();
+
+	/*
+	 * Run through all the ASL files, compiling them and
+	 * copying them into guest memory
+	 */
+	while (!err && basl_ftables[i].wsect != NULL) {
+		err = basl_compile(ctx, basl_ftables[i].wsect,
+				   basl_ftables[i].offset);
+		i++;
+	}
+
+	return (err);
+}


Property changes on: trunk/usr.sbin/bhyve/acpi.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/acpi.h
===================================================================
--- trunk/usr.sbin/bhyve/acpi.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/acpi.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,55 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/acpi.h 268972 2014-07-22 03:14:37Z jhb $
+ */
+
+#ifndef _ACPI_H_
+#define _ACPI_H_
+
+#define	SCI_INT			9
+
+#define	SMI_CMD			0xb2
+#define	BHYVE_ACPI_ENABLE	0xa0
+#define	BHYVE_ACPI_DISABLE	0xa1
+
+#define	PM1A_EVT_ADDR		0x400
+#define	PM1A_CNT_ADDR		0x404
+
+#define	IO_PMTMR		0x408	/* 4-byte i/o port for the timer */
+
+struct vmctx;
+
+int	acpi_build(struct vmctx *ctx, int ncpu);
+void	dsdt_line(const char *fmt, ...);
+void	dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
+void	dsdt_fixed_irq(uint8_t irq);
+void	dsdt_fixed_mem32(uint32_t base, uint32_t length);
+void	dsdt_indent(int levels);
+void	dsdt_unindent(int levels);
+void	sci_init(struct vmctx *ctx);
+
+#endif /* _ACPI_H_ */


Property changes on: trunk/usr.sbin/bhyve/acpi.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/ahci.h
===================================================================
--- trunk/usr.sbin/bhyve/ahci.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/ahci.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,323 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 1998 - 2008 Søren Schmidt <sos at FreeBSD.org>
+ * Copyright (c) 2009-2012 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/ahci.h 280733 2015-03-27 08:43:45Z mav $
+ */
+
+#ifndef _AHCI_H_
+#define	_AHCI_H_
+
+/* ATA register defines */
+#define ATA_DATA                        0       /* (RW) data */
+
+#define ATA_FEATURE                     1       /* (W) feature */
+#define         ATA_F_DMA               0x01    /* enable DMA */
+#define         ATA_F_OVL               0x02    /* enable overlap */
+
+#define ATA_COUNT                       2       /* (W) sector count */
+
+#define ATA_SECTOR                      3       /* (RW) sector # */
+#define ATA_CYL_LSB                     4       /* (RW) cylinder# LSB */
+#define ATA_CYL_MSB                     5       /* (RW) cylinder# MSB */
+#define ATA_DRIVE                       6       /* (W) Sector/Drive/Head */
+#define         ATA_D_LBA               0x40    /* use LBA addressing */
+#define         ATA_D_IBM               0xa0    /* 512 byte sectors, ECC */
+
+#define ATA_COMMAND                     7       /* (W) command */
+
+#define ATA_ERROR                       8       /* (R) error */
+#define         ATA_E_ILI               0x01    /* illegal length */
+#define         ATA_E_NM                0x02    /* no media */
+#define         ATA_E_ABORT             0x04    /* command aborted */
+#define         ATA_E_MCR               0x08    /* media change request */
+#define         ATA_E_IDNF              0x10    /* ID not found */
+#define         ATA_E_MC                0x20    /* media changed */
+#define         ATA_E_UNC               0x40    /* uncorrectable data */
+#define         ATA_E_ICRC              0x80    /* UDMA crc error */
+#define		ATA_E_ATAPI_SENSE_MASK	0xf0	/* ATAPI sense key mask */
+
+#define ATA_IREASON                     9       /* (R) interrupt reason */
+#define         ATA_I_CMD               0x01    /* cmd (1) | data (0) */
+#define         ATA_I_IN                0x02    /* read (1) | write (0) */
+#define         ATA_I_RELEASE           0x04    /* released bus (1) */
+#define         ATA_I_TAGMASK           0xf8    /* tag mask */
+
+#define ATA_STATUS                      10      /* (R) status */
+#define ATA_ALTSTAT                     11      /* (R) alternate status */
+#define         ATA_S_ERROR             0x01    /* error */
+#define         ATA_S_INDEX             0x02    /* index */
+#define         ATA_S_CORR              0x04    /* data corrected */
+#define         ATA_S_DRQ               0x08    /* data request */
+#define         ATA_S_DSC               0x10    /* drive seek completed */
+#define         ATA_S_SERVICE           0x10    /* drive needs service */
+#define         ATA_S_DWF               0x20    /* drive write fault */
+#define         ATA_S_DMA               0x20    /* DMA ready */
+#define         ATA_S_READY             0x40    /* drive ready */
+#define         ATA_S_BUSY              0x80    /* busy */
+
+#define ATA_CONTROL                     12      /* (W) control */
+#define         ATA_A_IDS               0x02    /* disable interrupts */
+#define         ATA_A_RESET             0x04    /* RESET controller */
+#define         ATA_A_4BIT              0x08    /* 4 head bits */
+#define         ATA_A_HOB               0x80    /* High Order Byte enable */
+
+/* SATA register defines */
+#define ATA_SSTATUS                     13
+#define         ATA_SS_DET_MASK         0x0000000f
+#define         ATA_SS_DET_NO_DEVICE    0x00000000
+#define         ATA_SS_DET_DEV_PRESENT  0x00000001
+#define         ATA_SS_DET_PHY_ONLINE   0x00000003
+#define         ATA_SS_DET_PHY_OFFLINE  0x00000004
+
+#define         ATA_SS_SPD_MASK         0x000000f0
+#define         ATA_SS_SPD_NO_SPEED     0x00000000
+#define         ATA_SS_SPD_GEN1         0x00000010
+#define         ATA_SS_SPD_GEN2         0x00000020
+#define         ATA_SS_SPD_GEN3         0x00000030
+
+#define         ATA_SS_IPM_MASK         0x00000f00
+#define         ATA_SS_IPM_NO_DEVICE    0x00000000
+#define         ATA_SS_IPM_ACTIVE       0x00000100
+#define         ATA_SS_IPM_PARTIAL      0x00000200
+#define         ATA_SS_IPM_SLUMBER      0x00000600
+#define         ATA_SS_IPM_DEVSLEEP     0x00000800
+
+#define ATA_SERROR                      14
+#define         ATA_SE_DATA_CORRECTED   0x00000001
+#define         ATA_SE_COMM_CORRECTED   0x00000002
+#define         ATA_SE_DATA_ERR         0x00000100
+#define         ATA_SE_COMM_ERR         0x00000200
+#define         ATA_SE_PROT_ERR         0x00000400
+#define         ATA_SE_HOST_ERR         0x00000800
+#define         ATA_SE_PHY_CHANGED      0x00010000
+#define         ATA_SE_PHY_IERROR       0x00020000
+#define         ATA_SE_COMM_WAKE        0x00040000
+#define         ATA_SE_DECODE_ERR       0x00080000
+#define         ATA_SE_PARITY_ERR       0x00100000
+#define         ATA_SE_CRC_ERR          0x00200000
+#define         ATA_SE_HANDSHAKE_ERR    0x00400000
+#define         ATA_SE_LINKSEQ_ERR      0x00800000
+#define         ATA_SE_TRANSPORT_ERR    0x01000000
+#define         ATA_SE_UNKNOWN_FIS      0x02000000
+#define         ATA_SE_EXCHANGED        0x04000000
+
+#define ATA_SCONTROL                    15
+#define         ATA_SC_DET_MASK         0x0000000f
+#define         ATA_SC_DET_IDLE         0x00000000
+#define         ATA_SC_DET_RESET        0x00000001
+#define         ATA_SC_DET_DISABLE      0x00000004
+
+#define         ATA_SC_SPD_MASK         0x000000f0
+#define         ATA_SC_SPD_NO_SPEED     0x00000000
+#define         ATA_SC_SPD_SPEED_GEN1   0x00000010
+#define         ATA_SC_SPD_SPEED_GEN2   0x00000020
+#define         ATA_SC_SPD_SPEED_GEN3   0x00000030
+
+#define         ATA_SC_IPM_MASK         0x00000f00
+#define         ATA_SC_IPM_NONE         0x00000000
+#define         ATA_SC_IPM_DIS_PARTIAL  0x00000100
+#define         ATA_SC_IPM_DIS_SLUMBER  0x00000200
+#define         ATA_SC_IPM_DIS_DEVSLEEP 0x00000400
+
+#define ATA_SACTIVE                     16
+
+#define AHCI_MAX_PORTS			32
+#define AHCI_MAX_SLOTS			32
+#define AHCI_MAX_IRQS			16
+
+/* SATA AHCI v1.0 register defines */
+#define AHCI_CAP                    0x00
+#define		AHCI_CAP_NPMASK	0x0000001f
+#define		AHCI_CAP_SXS	0x00000020
+#define		AHCI_CAP_EMS	0x00000040
+#define		AHCI_CAP_CCCS	0x00000080
+#define		AHCI_CAP_NCS	0x00001F00
+#define		AHCI_CAP_NCS_SHIFT	8
+#define		AHCI_CAP_PSC	0x00002000
+#define		AHCI_CAP_SSC	0x00004000
+#define		AHCI_CAP_PMD	0x00008000
+#define		AHCI_CAP_FBSS	0x00010000
+#define		AHCI_CAP_SPM	0x00020000
+#define		AHCI_CAP_SAM	0x00080000
+#define		AHCI_CAP_ISS	0x00F00000
+#define		AHCI_CAP_ISS_SHIFT	20
+#define		AHCI_CAP_SCLO	0x01000000
+#define		AHCI_CAP_SAL	0x02000000
+#define		AHCI_CAP_SALP	0x04000000
+#define		AHCI_CAP_SSS	0x08000000
+#define		AHCI_CAP_SMPS	0x10000000
+#define		AHCI_CAP_SSNTF	0x20000000
+#define		AHCI_CAP_SNCQ	0x40000000
+#define		AHCI_CAP_64BIT	0x80000000
+
+#define AHCI_GHC                    0x04
+#define         AHCI_GHC_AE         0x80000000
+#define         AHCI_GHC_MRSM       0x00000004
+#define         AHCI_GHC_IE         0x00000002
+#define         AHCI_GHC_HR         0x00000001
+
+#define AHCI_IS                     0x08
+#define AHCI_PI                     0x0c
+#define AHCI_VS                     0x10
+
+#define AHCI_CCCC                   0x14
+#define		AHCI_CCCC_TV_MASK	0xffff0000
+#define		AHCI_CCCC_TV_SHIFT	16
+#define		AHCI_CCCC_CC_MASK	0x0000ff00
+#define		AHCI_CCCC_CC_SHIFT	8
+#define		AHCI_CCCC_INT_MASK	0x000000f8
+#define		AHCI_CCCC_INT_SHIFT	3
+#define		AHCI_CCCC_EN		0x00000001
+#define AHCI_CCCP                   0x18
+
+#define AHCI_EM_LOC                 0x1C
+#define AHCI_EM_CTL                 0x20
+#define 	AHCI_EM_MR              0x00000001
+#define 	AHCI_EM_TM              0x00000100
+#define 	AHCI_EM_RST             0x00000200
+#define 	AHCI_EM_LED             0x00010000
+#define 	AHCI_EM_SAFTE           0x00020000
+#define 	AHCI_EM_SES2            0x00040000
+#define 	AHCI_EM_SGPIO           0x00080000
+#define 	AHCI_EM_SMB             0x01000000
+#define 	AHCI_EM_XMT             0x02000000
+#define 	AHCI_EM_ALHD            0x04000000
+#define 	AHCI_EM_PM              0x08000000
+
+#define AHCI_CAP2                   0x24
+#define		AHCI_CAP2_BOH	0x00000001
+#define		AHCI_CAP2_NVMP	0x00000002
+#define		AHCI_CAP2_APST	0x00000004
+#define		AHCI_CAP2_SDS	0x00000008
+#define		AHCI_CAP2_SADM	0x00000010
+#define		AHCI_CAP2_DESO	0x00000020
+
+#define AHCI_OFFSET                 0x100
+#define AHCI_STEP                   0x80
+
+#define AHCI_P_CLB                  0x00
+#define AHCI_P_CLBU                 0x04
+#define AHCI_P_FB                   0x08
+#define AHCI_P_FBU                  0x0c
+#define AHCI_P_IS                   0x10
+#define AHCI_P_IE                   0x14
+#define         AHCI_P_IX_DHR       0x00000001
+#define         AHCI_P_IX_PS        0x00000002
+#define         AHCI_P_IX_DS        0x00000004
+#define         AHCI_P_IX_SDB       0x00000008
+#define         AHCI_P_IX_UF        0x00000010
+#define         AHCI_P_IX_DP        0x00000020
+#define         AHCI_P_IX_PC        0x00000040
+#define         AHCI_P_IX_MP        0x00000080
+
+#define         AHCI_P_IX_PRC       0x00400000
+#define         AHCI_P_IX_IPM       0x00800000
+#define         AHCI_P_IX_OF        0x01000000
+#define         AHCI_P_IX_INF       0x04000000
+#define         AHCI_P_IX_IF        0x08000000
+#define         AHCI_P_IX_HBD       0x10000000
+#define         AHCI_P_IX_HBF       0x20000000
+#define         AHCI_P_IX_TFE       0x40000000
+#define         AHCI_P_IX_CPD       0x80000000
+
+#define AHCI_P_CMD                  0x18
+#define         AHCI_P_CMD_ST       0x00000001
+#define         AHCI_P_CMD_SUD      0x00000002
+#define         AHCI_P_CMD_POD      0x00000004
+#define         AHCI_P_CMD_CLO      0x00000008
+#define         AHCI_P_CMD_FRE      0x00000010
+#define         AHCI_P_CMD_CCS_MASK 0x00001f00
+#define         AHCI_P_CMD_CCS_SHIFT 8
+#define         AHCI_P_CMD_ISS      0x00002000
+#define         AHCI_P_CMD_FR       0x00004000
+#define         AHCI_P_CMD_CR       0x00008000
+#define         AHCI_P_CMD_CPS      0x00010000
+#define         AHCI_P_CMD_PMA      0x00020000
+#define         AHCI_P_CMD_HPCP     0x00040000
+#define         AHCI_P_CMD_MPSP     0x00080000
+#define         AHCI_P_CMD_CPD      0x00100000
+#define         AHCI_P_CMD_ESP      0x00200000
+#define         AHCI_P_CMD_FBSCP    0x00400000
+#define         AHCI_P_CMD_APSTE    0x00800000
+#define         AHCI_P_CMD_ATAPI    0x01000000
+#define         AHCI_P_CMD_DLAE     0x02000000
+#define         AHCI_P_CMD_ALPE     0x04000000
+#define         AHCI_P_CMD_ASP      0x08000000
+#define         AHCI_P_CMD_ICC_MASK 0xf0000000
+#define         AHCI_P_CMD_NOOP     0x00000000
+#define         AHCI_P_CMD_ACTIVE   0x10000000
+#define         AHCI_P_CMD_PARTIAL  0x20000000
+#define         AHCI_P_CMD_SLUMBER  0x60000000
+#define         AHCI_P_CMD_DEVSLEEP 0x80000000
+
+#define AHCI_P_TFD                  0x20
+#define AHCI_P_SIG                  0x24
+#define AHCI_P_SSTS                 0x28
+#define AHCI_P_SCTL                 0x2c
+#define AHCI_P_SERR                 0x30
+#define AHCI_P_SACT                 0x34
+#define AHCI_P_CI                   0x38
+#define AHCI_P_SNTF                 0x3C
+#define AHCI_P_FBS                  0x40
+#define 	AHCI_P_FBS_EN       0x00000001
+#define 	AHCI_P_FBS_DEC      0x00000002
+#define 	AHCI_P_FBS_SDE      0x00000004
+#define 	AHCI_P_FBS_DEV      0x00000f00
+#define 	AHCI_P_FBS_DEV_SHIFT 8
+#define 	AHCI_P_FBS_ADO      0x0000f000
+#define 	AHCI_P_FBS_ADO_SHIFT 12
+#define 	AHCI_P_FBS_DWE      0x000f0000
+#define 	AHCI_P_FBS_DWE_SHIFT 16
+#define AHCI_P_DEVSLP               0x44
+#define 	AHCI_P_DEVSLP_ADSE  0x00000001
+#define 	AHCI_P_DEVSLP_DSP   0x00000002
+#define 	AHCI_P_DEVSLP_DETO  0x000003fc
+#define 	AHCI_P_DEVSLP_DETO_SHIFT 2
+#define 	AHCI_P_DEVSLP_MDAT  0x00007c00
+#define 	AHCI_P_DEVSLP_MDAT_SHIFT 10
+#define 	AHCI_P_DEVSLP_DITO  0x01ff8000
+#define 	AHCI_P_DEVSLP_DITO_SHIFT 15
+#define 	AHCI_P_DEVSLP_DM    0x0e000000
+#define 	AHCI_P_DEVSLP_DM_SHIFT 25
+
+/* Just to be sure, if building as module. */
+#if MAXPHYS < 512 * 1024
+#undef MAXPHYS
+#define MAXPHYS				512 * 1024
+#endif
+/* Pessimistic prognosis on number of required S/G entries */
+#define AHCI_SG_ENTRIES	(roundup(btoc(MAXPHYS) + 1, 8))
+/* Command list. 32 commands. First, 1Kbyte aligned. */
+#define AHCI_CL_OFFSET              0
+#define AHCI_CL_SIZE                32
+/* Command tables. Up to 32 commands, Each, 128byte aligned. */
+#define AHCI_CT_OFFSET              (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS)
+#define AHCI_CT_SIZE                (128 + AHCI_SG_ENTRIES * 16)
+/* Total main work area. */
+#define AHCI_WORK_SIZE              (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots)
+
+#endif /* _AHCI_H_ */


Property changes on: trunk/usr.sbin/bhyve/ahci.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/atkbdc.c
===================================================================
--- trunk/usr.sbin/bhyve/atkbdc.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/atkbdc.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,91 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale at pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/atkbdc.c 270159 2014-08-19 01:20:24Z grehan $");
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+
+#include <vmmapi.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+
+#define	KBD_DATA_PORT		0x60
+
+#define	KBD_STS_CTL_PORT	0x64
+#define	 KBD_SYS_FLAG		0x4
+
+#define	KBDC_RESET		0xfe
+
+static int
+atkbdc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+	if (bytes != 1)
+		return (-1);
+
+	*eax = 0;
+
+	return (0);
+}
+
+static int
+atkbdc_sts_ctl_handler(struct vmctx *ctx, int vcpu, int in, int port,
+    int bytes, uint32_t *eax, void *arg)
+{
+	int error, retval;
+
+	if (bytes != 1)
+		return (-1);
+
+	retval = 0;
+	if (in) {
+		*eax = KBD_SYS_FLAG;	/* system passed POST */
+	} else {
+		switch (*eax) {
+		case KBDC_RESET:	/* Pulse "reset" line. */
+			error = vm_suspend(ctx, VM_SUSPEND_RESET);
+			assert(error == 0 || errno == EALREADY);
+			break;
+		}
+	}
+
+	return (retval);
+}
+
+INOUT_PORT(atkdbc, KBD_DATA_PORT, IOPORT_F_INOUT, atkbdc_data_handler);
+SYSRES_IO(KBD_DATA_PORT, 1);
+INOUT_PORT(atkbdc, KBD_STS_CTL_PORT,  IOPORT_F_INOUT,
+    atkbdc_sts_ctl_handler);
+SYSRES_IO(KBD_STS_CTL_PORT, 1);


Property changes on: trunk/usr.sbin/bhyve/atkbdc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/bhyve.8
===================================================================
--- trunk/usr.sbin/bhyve/bhyve.8	                        (rev 0)
+++ trunk/usr.sbin/bhyve/bhyve.8	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,353 @@
+.\" $MidnightBSD$
+.\" Copyright (c) 2013 Peter Grehan
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: stable/10/usr.sbin/bhyve/bhyve.8 304425 2016-08-18 11:56:07Z mav $
+.\"
+.Dd July 9, 2016
+.Dt BHYVE 8
+.Os
+.Sh NAME
+.Nm bhyve
+.Nd "run a guest operating system inside a virtual machine"
+.Sh SYNOPSIS
+.Nm
+.Op Fl abehuwxACHPSWY
+.Op Fl c Ar numcpus
+.Op Fl g Ar gdbport
+.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
+.Op Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
+.Op Fl p Ar vcpu:hostcpu
+.Op Fl s Ar slot,emulation Ns Op , Ns Ar conf
+.Op Fl U Ar uuid
+.Ar vmname
+.Sh DESCRIPTION
+.Nm
+is a hypervisor that runs guest operating systems inside a
+virtual machine.
+.Pp
+Parameters such as the number of virtual CPUs, amount of guest memory, and
+I/O connectivity can be specified with command-line parameters.
+.Pp
+The guest operating system must be loaded with
+.Xr bhyveload 8
+or a similar boot loader before running
+.Nm .
+.Pp
+.Nm
+runs until the guest operating system reboots or an unhandled hypervisor
+exit is detected.
+.Sh OPTIONS
+.Bl -tag -width 10n
+.It Fl a
+The guest's local APIC is configured in xAPIC mode.
+The xAPIC mode is the default setting so this option is redundant.
+It will be deprecated in a future version.
+.It Fl A
+Generate ACPI tables.
+Required for
+.Fx Ns /amd64
+guests.
+.It Fl b
+Enable a low-level console device supported by
+.Fx
+kernels compiled with
+.Cd "device bvmconsole" .
+This option will be deprecated in a future version.
+.It Fl c Ar numcpus
+Number of guest virtual CPUs.
+The default is 1 and the maximum is 16.
+.It Fl C
+Include guest memory in core file.
+.It Fl e
+Force
+.Nm
+to exit when a guest issues an access to an I/O port that is not emulated.
+This is intended for debug purposes.
+.It Fl g Ar gdbport
+For
+.Fx
+kernels compiled with
+.Cd "device bvmdebug" ,
+allow a remote kernel kgdb to be relayed to the guest kernel gdb stub
+via a local IPv4 address and this port.
+This option will be deprecated in a future version.
+.It Fl h
+Print help message and exit.
+.It Fl H
+Yield the virtual CPU thread when a HLT instruction is detected.
+If this option is not specified, virtual CPUs will use 100% of a host CPU.
+.It Fl l Ar lpcdev Ns Op , Ns Ar conf
+Allow devices behind the LPC PCI-ISA bridge to be configured.
+The only supported devices are the TTY-class devices
+.Ar com1
+and
+.Ar com2
+and the boot ROM device
+.Ar bootrom .
+.It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
+Guest physical memory size in bytes.
+This must be the same size that was given to
+.Xr bhyveload 8 .
+.Pp
+The size argument may be suffixed with one of K, M, G or T (either upper
+or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes,
+or terabytes.
+If no suffix is given, the value is assumed to be in megabytes.
+.It Fl p Ar vcpu:hostcpu
+Pin guest's virtual CPU
+.Em vcpu
+to
+.Em hostcpu .
+.It Fl P
+Force the guest virtual CPU to exit when a PAUSE instruction is detected.
+.It Fl s Ar slot,emulation Ns Op , Ns Ar conf
+Configure a virtual PCI slot and function.
+.Pp
+.Nm
+provides PCI bus emulation and virtual devices that can be attached to
+slots on the bus.
+There are 32 available slots, with the option of providing up to 8 functions
+per slot.
+.Bl -tag -width 10n
+.It Ar slot
+.Ar pcislot[:function]
+.Ar bus:pcislot:function
+.Pp
+The
+.Ar pcislot
+value is 0 to 31.
+The optional
+.Ar function
+value is 0 to 7.
+The optional
+.Ar bus
+value is 0 to 255.
+If not specified, the
+.Ar function
+value defaults to 0.
+If not specified, the
+.Ar bus
+value defaults to 0.
+.It Ar emulation
+.Bl -tag -width 10n
+.It Li hostbridge | Li amd_hostbridge
+.Pp
+Provide a simple host bridge.
+This is usually configured at slot 0, and is required by most guest
+operating systems.
+The
+.Li amd_hostbridge
+emulation is identical but uses a PCI vendor ID of
+.Li AMD .
+.It Li passthru
+PCI pass-through device.
+.It Li virtio-net
+Virtio network interface.
+.It Li virtio-blk
+Virtio block storage interface.
+.It Li virtio-rnd
+Virtio RNG interface.
+.It Li ahci
+AHCI controller attached to arbitraty devices.
+.It Li ahci-cd
+AHCI controller attached to an ATAPI CD/DVD.
+.It Li ahci-hd
+AHCI controller attached to a SATA hard-drive.
+.It Li e1000
+Intel e82545 network interface.
+.It Li uart
+PCI 16550 serial device.
+.It Li lpc
+LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
+The LPC bridge emulation can only be configured on bus 0.
+.El
+.It Op Ar conf
+This optional parameter describes the backend for device emulations.
+If
+.Ar conf
+is not specified, the device emulation has no backend and can be
+considered unconnected.
+.Pp
+Network devices:
+.Bl -tag -width 10n
+.It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
+.It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
+.Pp
+If
+.Ar mac
+is not specified, the MAC address is derived from a fixed OUI and the
+remaining bytes from an MD5 hash of the slot and function numbers and
+the device name.
+.Pp
+The MAC address is an ASCII string in
+.Xr ethers 5
+format.
+.El
+.Pp
+Block storage devices:
+.Bl -tag -width 10n
+.It Pa /filename Ns Oo , Ns Ar block-device-options Oc
+.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
+.El
+.Pp
+The
+.Ar block-device-options
+are:
+.Bl -tag -width 8n
+.It Li nocache
+Open the file with
+.Dv O_DIRECT .
+.It Li direct
+Open the file using
+.Dv O_SYNC .
+.It Li ro
+Force the file to be opened read-only.
+.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
+Specify the logical and physical sector sizes of the emulated disk.
+The physical sector size is optional and is equal to the logical sector size
+if not explicitly specified.
+.El
+.Pp
+TTY devices:
+.Bl -tag -width 10n
+.It Li stdio
+Connect the serial port to the standard input and output of
+the
+.Nm
+process.
+.It Pa /dev/xxx
+Use the host TTY device for serial port I/O.
+.El
+.Pp
+Boot ROM device:
+.Bl -tag -width 10n
+.It Pa romfile
+Map
+.Ar romfile
+in the guest address space reserved for boot firmware.
+.El
+.Pp
+Pass-through devices:
+.Bl -tag -width 10n
+.It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
+Connect to a PCI device on the host at the selector described by
+.Ar slot ,
+.Ar bus ,
+and
+.Ar function
+numbers.
+.El
+.Pp
+Guest memory must be wired using the
+.Fl S
+option when a pass-through device is configured.
+.Pp
+The host device must have been reserved at boot-time using the
+.Va pptdev
+loader variable as described in
+.Xr vmm 4 .
+.El
+.It Fl S
+Wire guest memory.
+.It Fl u
+RTC keeps UTC time.
+.It Fl U Ar uuid
+Set the universally unique identifier
+.Pq UUID
+in the guest's System Management BIOS System Information structure.
+By default a UUID is generated from the host's hostname and
+.Ar vmname .
+.It Fl w
+Ignore accesses to unimplemented Model Specific Registers (MSRs).
+This is intended for debug purposes.
+.It Fl W
+Force virtio PCI device emulations to use MSI interrupts instead of MSI-X
+interrupts.
+.It Fl x
+The guest's local APIC is configured in x2APIC mode.
+.It Fl Y
+Disable MPtable generation.
+.It Ar vmname
+Alphanumeric name of the guest.
+This should be the same as that created by
+.Xr bhyveload 8 .
+.El
+.Sh EXAMPLES
+The guest operating system must have been loaded with
+.Xr bhyveload 8
+or a similar boot loader before
+.Xr bhyve 4
+can be run.
+.Pp
+To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio
+block device backed by the
+.Pa /my/image
+filesystem image, and a serial port for the console:
+.Bd -literal -offset indent
+bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\
+  -l com1,stdio -A -H -P -m 1G vm1
+.Ed
+.Pp
+Run a 24GB single-CPU virtual machine with three network ports, one of which
+has a MAC address specified:
+.Bd -literal -offset indent
+bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\
+  -s 2:1,virtio-net,tap1 \\
+  -s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\
+  -s 3,virtio-blk,/my/image -l com1,stdio \\
+  -A -H -P -m 24G bigvm
+.Ed
+.Pp
+Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI
+CD-ROM, a single virtio network port, an AMD hostbridge, and the console
+port connected to an
+.Xr nmdm 4
+null-modem device.
+.Bd -literal -offset indent
+bhyve -c 4 \\
+  -s 0,amd_hostbridge -s 1,lpc \\
+  -s 1:0,ahci,hd:/images/disk.1,hd:/images/disk.2,\\
+hd:/images/disk.3,hd:/images/disk.4,\\
+hd:/images/disk.5,hd:/images/disk.6,\\
+hd:/images/disk.7,hd:/images/disk.8,\\
+cd:/images/install.iso \\
+  -s 3,virtio-net,tap0 \\
+  -l com1,/dev/nmdm0A \\
+  -A -H -P -m 8G
+.Ed
+.Sh SEE ALSO
+.Xr bhyve 4 ,
+.Xr nmdm 4 ,
+.Xr vmm 4 ,
+.Xr ethers 5 ,
+.Xr bhyvectl 8 ,
+.Xr bhyveload 8
+.Sh HISTORY
+.Nm
+first appeared in
+.Fx 10.0 .
+.Sh AUTHORS
+.An Neel Natu Aq neel at freebsd.org
+.An Peter Grehan Aq grehan at freebsd.org


Property changes on: trunk/usr.sbin/bhyve/bhyve.8
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/bhyverun.c
===================================================================
--- trunk/usr.sbin/bhyve/bhyverun.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/bhyverun.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,970 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/bhyverun.c 302705 2016-07-13 06:09:34Z ngie $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/bhyverun.c 302705 2016-07-13 06:09:34Z ngie $");
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+#include <machine/atomic.h>
+#include <machine/segments.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "inout.h"
+#include "dbgport.h"
+#include "fwctl.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "mevent.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "smbiostbl.h"
+#include "xmsr.h"
+#include "spinup_ap.h"
+#include "rtc.h"
+
+#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
+
+#define MB		(1024UL * 1024)
+#define GB		(1024UL * MB)
+
+typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
+extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
+
+char *vmname;
+
+int guest_ncpus;
+char *guest_uuid_str;
+
+static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
+static int virtio_msix = 1;
+static int x2apic_mode = 0;	/* default is xAPIC */
+
+static int strictio;
+static int strictmsr = 1;
+
+static int acpi;
+
+static char *progname;
+static const int BSP = 0;
+
+static cpuset_t cpumask;
+
+static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
+
+static struct vm_exit vmexit[VM_MAXCPU];
+
+struct bhyvestats {
+        uint64_t        vmexit_bogus;
+	uint64_t	vmexit_reqidle;
+        uint64_t        vmexit_hlt;
+        uint64_t        vmexit_pause;
+        uint64_t        vmexit_mtrap;
+        uint64_t        vmexit_inst_emul;
+        uint64_t        cpu_switch_rotate;
+        uint64_t        cpu_switch_direct;
+} stats;
+
+struct mt_vmm_info {
+	pthread_t	mt_thr;
+	struct vmctx	*mt_ctx;
+	int		mt_vcpu;	
+} mt_vmm_info[VM_MAXCPU];
+
+static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
+
+static void
+usage(int code)
+{
+
+        fprintf(stderr,
+                "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+		"       %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
+		"       -a: local apic is in xAPIC mode (deprecated)\n"
+		"       -A: create ACPI tables\n"
+		"       -c: # cpus (default 1)\n"
+		"       -C: include guest memory in core file\n"
+		"       -e: exit on unhandled I/O access\n"
+		"       -g: gdb port\n"
+		"       -h: help\n"
+		"       -H: vmexit from the guest on hlt\n"
+		"       -l: LPC device configuration\n"
+		"       -m: memory size in MB\n"
+		"       -p: pin 'vcpu' to 'hostcpu'\n"
+		"       -P: vmexit from the guest on pause\n"
+		"       -s: <slot,driver,configinfo> PCI slot config\n"
+		"       -S: guest memory cannot be swapped\n"
+		"       -u: RTC keeps UTC time\n"
+		"       -U: uuid\n"
+		"       -w: ignore unimplemented MSRs\n"
+		"       -W: force virtio to use single-vector MSI\n"
+		"       -x: local apic is in x2APIC mode\n"
+		"       -Y: disable MPtable generation\n",
+		progname, (int)strlen(progname), "");
+
+	exit(code);
+}
+
+static int
+pincpu_parse(const char *opt)
+{
+	int vcpu, pcpu;
+
+	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
+		fprintf(stderr, "invalid format: %s\n", opt);
+		return (-1);
+	}
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+		fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
+		    vcpu, VM_MAXCPU - 1);
+		return (-1);
+	}
+
+	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
+		fprintf(stderr, "hostcpu '%d' outside valid range from "
+		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
+		return (-1);
+	}
+
+	if (vcpumap[vcpu] == NULL) {
+		if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
+			perror("malloc");
+			return (-1);
+		}
+		CPU_ZERO(vcpumap[vcpu]);
+	}
+	CPU_SET(pcpu, vcpumap[vcpu]);
+	return (0);
+}
+
+void
+vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
+    int errcode)
+{
+	struct vmctx *ctx;
+	int error, restart_instruction;
+
+	ctx = arg;
+	restart_instruction = 1;
+
+	error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
+	    restart_instruction);
+	assert(error == 0);
+}
+
+void *
+paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
+{
+
+	return (vm_map_gpa(ctx, gaddr, len));
+}
+
+int
+fbsdrun_vmexit_on_pause(void)
+{
+
+	return (guest_vmexit_on_pause);
+}
+
+int
+fbsdrun_vmexit_on_hlt(void)
+{
+
+	return (guest_vmexit_on_hlt);
+}
+
+int
+fbsdrun_virtio_msix(void)
+{
+
+	return (virtio_msix);
+}
+
+static void *
+fbsdrun_start_thread(void *param)
+{
+	char tname[MAXCOMLEN + 1];
+	struct mt_vmm_info *mtp;
+	int vcpu;
+
+	mtp = param;
+	vcpu = mtp->mt_vcpu;
+
+	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
+	pthread_set_name_np(mtp->mt_thr, tname);
+
+	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
+
+	/* not reached */
+	exit(1);
+	return (NULL);
+}
+
+void
+fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
+{
+	int error;
+
+	assert(fromcpu == BSP);
+
+	/*
+	 * The 'newcpu' must be activated in the context of 'fromcpu'. If
+	 * vm_activate_cpu() is delayed until newcpu's pthread starts running
+	 * then vmm.ko is out-of-sync with bhyve and this can create a race
+	 * with vm_suspend().
+	 */
+	error = vm_activate_cpu(ctx, newcpu);
+	if (error != 0)
+		err(EX_OSERR, "could not activate CPU %d", newcpu);
+
+	CPU_SET_ATOMIC(newcpu, &cpumask);
+
+	/*
+	 * Set up the vmexit struct to allow execution to start
+	 * at the given RIP
+	 */
+	vmexit[newcpu].rip = rip;
+	vmexit[newcpu].inst_length = 0;
+
+	mt_vmm_info[newcpu].mt_ctx = ctx;
+	mt_vmm_info[newcpu].mt_vcpu = newcpu;
+
+	error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
+	    fbsdrun_start_thread, &mt_vmm_info[newcpu]);
+	assert(error == 0);
+}
+
+static int
+fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
+{
+
+	if (!CPU_ISSET(vcpu, &cpumask)) {
+		fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
+		exit(1);
+	}
+
+	CPU_CLR_ATOMIC(vcpu, &cpumask);
+	return (CPU_EMPTY(&cpumask));
+}
+
+static int
+vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
+		     uint32_t eax)
+{
+#if BHYVE_DEBUG
+	/*
+	 * put guest-driven debug here
+	 */
+#endif
+        return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int error;
+	int bytes, port, in, out;
+	int vcpu;
+
+	vcpu = *pvcpu;
+
+	port = vme->u.inout.port;
+	bytes = vme->u.inout.bytes;
+	in = vme->u.inout.in;
+	out = !in;
+
+        /* Extra-special case of host notifications */
+        if (out && port == GUEST_NIO_PORT) {
+                error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
+		return (error);
+	}
+
+	error = emulate_inout(ctx, vcpu, vme, strictio);
+	if (error) {
+		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+		    in ? "in" : "out",
+		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+		    port, vmexit->rip);
+		return (VMEXIT_ABORT);
+	} else {
+		return (VMEXIT_CONTINUE);
+	}
+}
+
+static int
+vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	uint64_t val;
+	uint32_t eax, edx;
+	int error;
+
+	val = 0;
+	error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
+	if (error != 0) {
+		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
+		    vme->u.msr.code, *pvcpu);
+		if (strictmsr) {
+			vm_inject_gp(ctx, *pvcpu);
+			return (VMEXIT_CONTINUE);
+		}
+	}
+
+	eax = val;
+	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
+	assert(error == 0);
+
+	edx = val >> 32;
+	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
+	assert(error == 0);
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int error;
+
+	error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
+	if (error != 0) {
+		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
+		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
+		if (strictmsr) {
+			vm_inject_gp(ctx, *pvcpu);
+			return (VMEXIT_CONTINUE);
+		}
+	}
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+
+	(void)spinup_ap(ctx, *pvcpu,
+		    vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
+
+	return (VMEXIT_CONTINUE);
+}
+
+#define	DEBUG_EPT_MISCONFIG
+#ifdef DEBUG_EPT_MISCONFIG
+#define	EXIT_REASON_EPT_MISCONFIG	49
+#define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
+#define	VMCS_IDENT(x)			((x) | 0x80000000)
+
+static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
+static int ept_misconfig_ptenum;
+#endif
+
+static int
+vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+	fprintf(stderr, "\treason\t\tVMX\n");
+	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
+	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+	fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
+	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
+	fprintf(stderr, "\tqualification\t0x%016lx\n",
+	    vmexit->u.vmx.exit_qualification);
+	fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
+	fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
+#ifdef DEBUG_EPT_MISCONFIG
+	if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+		vm_get_register(ctx, *pvcpu,
+		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
+		    &ept_misconfig_gpa);
+		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
+		    &ept_misconfig_ptenum);
+		fprintf(stderr, "\tEPT misconfiguration:\n");
+		fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
+		fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
+		    ept_misconfig_ptenum, ept_misconfig_pte[0],
+		    ept_misconfig_pte[1], ept_misconfig_pte[2],
+		    ept_misconfig_pte[3]);
+	}
+#endif	/* DEBUG_EPT_MISCONFIG */
+	return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+	fprintf(stderr, "\treason\t\tSVM\n");
+	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
+	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+	fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
+	fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
+	fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
+	return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	assert(vmexit->inst_length == 0);
+
+	stats.vmexit_bogus++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	assert(vmexit->inst_length == 0);
+
+	stats.vmexit_reqidle++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_hlt++;
+
+	/*
+	 * Just continue execution with the next instruction. We use
+	 * the HLT VM exit as a way to be friendly with the host
+	 * scheduler.
+	 */
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_pause++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	assert(vmexit->inst_length == 0);
+
+	stats.vmexit_mtrap++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	int err, i;
+	struct vie *vie;
+
+	stats.vmexit_inst_emul++;
+
+	vie = &vmexit->u.inst_emul.vie;
+	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
+	    vie, &vmexit->u.inst_emul.paging);
+
+	if (err) {
+		if (err == ESRCH) {
+			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
+			    vmexit->u.inst_emul.gpa);
+		}
+
+		fprintf(stderr, "Failed to emulate instruction [");
+		for (i = 0; i < vie->num_valid; i++) {
+			fprintf(stderr, "0x%02x%s", vie->inst[i],
+			    i != (vie->num_valid - 1) ? " " : "");
+		}
+		fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
+		return (VMEXIT_ABORT);
+	}
+
+	return (VMEXIT_CONTINUE);
+}
+
+static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	enum vm_suspend_how how;
+
+	how = vmexit->u.suspended.how;
+
+	fbsdrun_deletecpu(ctx, *pvcpu);
+
+	if (*pvcpu != BSP) {
+		pthread_mutex_lock(&resetcpu_mtx);
+		pthread_cond_signal(&resetcpu_cond);
+		pthread_mutex_unlock(&resetcpu_mtx);
+		pthread_exit(NULL);
+	}
+
+	pthread_mutex_lock(&resetcpu_mtx);
+	while (!CPU_EMPTY(&cpumask)) {
+		pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
+	}
+	pthread_mutex_unlock(&resetcpu_mtx);
+
+	switch (how) {
+	case VM_SUSPEND_RESET:
+		exit(0);
+	case VM_SUSPEND_POWEROFF:
+		exit(1);
+	case VM_SUSPEND_HALT:
+		exit(2);
+	case VM_SUSPEND_TRIPLEFAULT:
+		exit(3);
+	default:
+		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
+		exit(100);
+	}
+	return (0);	/* NOTREACHED */
+}
+
+static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
+	[VM_EXITCODE_INOUT]  = vmexit_inout,
+	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
+	[VM_EXITCODE_VMX]    = vmexit_vmx,
+	[VM_EXITCODE_SVM]    = vmexit_svm,
+	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
+	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
+	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
+	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
+	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
+	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
+};
+
+static void
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
+{
+	int error, rc;
+	enum vm_exitcode exitcode;
+	cpuset_t active_cpus;
+
+	if (vcpumap[vcpu] != NULL) {
+		error = pthread_setaffinity_np(pthread_self(),
+		    sizeof(cpuset_t), vcpumap[vcpu]);
+		assert(error == 0);
+	}
+
+	error = vm_active_cpus(ctx, &active_cpus);
+	assert(CPU_ISSET(vcpu, &active_cpus));
+
+	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
+	assert(error == 0);
+
+	while (1) {
+		error = vm_run(ctx, vcpu, &vmexit[vcpu]);
+		if (error != 0)
+			break;
+
+		exitcode = vmexit[vcpu].exitcode;
+		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
+			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
+			    exitcode);
+			exit(1);
+		}
+
+                rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+
+		switch (rc) {
+		case VMEXIT_CONTINUE:
+			break;
+		case VMEXIT_ABORT:
+			abort();
+		default:
+			exit(1);
+		}
+	}
+	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
+}
+
+static int
+num_vcpus_allowed(struct vmctx *ctx)
+{
+	int tmp, error;
+
+	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
+
+	/*
+	 * The guest is allowed to spinup more than one processor only if the
+	 * UNRESTRICTED_GUEST capability is available.
+	 */
+	if (error == 0)
+		return (VM_MAXCPU);
+	else
+		return (1);
+}
+
+void
+fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
+{
+	int err, tmp;
+
+	if (fbsdrun_vmexit_on_hlt()) {
+		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
+		if (err < 0) {
+			fprintf(stderr, "VM exit on HLT not supported\n");
+			exit(1);
+		}
+		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
+		if (cpu == BSP)
+			handler[VM_EXITCODE_HLT] = vmexit_hlt;
+	}
+
+        if (fbsdrun_vmexit_on_pause()) {
+		/*
+		 * pause exit support required for this mode
+		 */
+		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
+		if (err < 0) {
+			fprintf(stderr,
+			    "SMP mux requested, no pause support\n");
+			exit(1);
+		}
+		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
+		if (cpu == BSP)
+			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
+        }
+
+	if (x2apic_mode)
+		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
+	else
+		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
+
+	if (err) {
+		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
+		exit(1);
+	}
+
+	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
+}
+
+static struct vmctx *
+do_open(const char *vmname)
+{
+	struct vmctx *ctx;
+	int error;
+	bool reinit, romboot;
+
+	reinit = romboot = false;
+
+	if (lpc_bootrom())
+		romboot = true;
+
+	error = vm_create(vmname);
+	if (error) {
+		if (errno == EEXIST) {
+			if (romboot) {
+				reinit = true;
+			} else {
+				/*
+				 * The virtual machine has been setup by the
+				 * userspace bootloader.
+				 */
+			}
+		} else {
+			perror("vm_create");
+			exit(1);
+		}
+	} else {
+		if (!romboot) {
+			/*
+			 * If the virtual machine was just created then a
+			 * bootrom must be configured to boot it.
+			 */
+			fprintf(stderr, "virtual machine cannot be booted\n");
+			exit(1);
+		}
+	}
+
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	if (reinit) {
+		error = vm_reinit(ctx);
+		if (error) {
+			perror("vm_reinit");
+			exit(1);
+		}
+	}
+	return (ctx);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int c, error, gdb_port, err, bvmcons;
+	int max_vcpus, mptgen, memflags;
+	int rtc_localtime;
+	struct vmctx *ctx;
+	uint64_t rip;
+	size_t memsize;
+	char *optstr;
+
+	bvmcons = 0;
+	progname = basename(argv[0]);
+	gdb_port = 0;
+	guest_ncpus = 1;
+	memsize = 256 * MB;
+	mptgen = 1;
+	rtc_localtime = 1;
+	memflags = 0;
+
+	optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
+	while ((c = getopt(argc, argv, optstr)) != -1) {
+		switch (c) {
+		case 'a':
+			x2apic_mode = 0;
+			break;
+		case 'A':
+			acpi = 1;
+			break;
+		case 'b':
+			bvmcons = 1;
+			break;
+		case 'p':
+                        if (pincpu_parse(optarg) != 0) {
+                            errx(EX_USAGE, "invalid vcpu pinning "
+                                 "configuration '%s'", optarg);
+                        }
+			break;
+                case 'c':
+			guest_ncpus = atoi(optarg);
+			break;
+		case 'C':
+			memflags |= VM_MEM_F_INCORE;
+			break;
+		case 'g':
+			gdb_port = atoi(optarg);
+			break;
+		case 'l':
+			if (lpc_device_parse(optarg) != 0) {
+				errx(EX_USAGE, "invalid lpc device "
+				    "configuration '%s'", optarg);
+			}
+			break;
+		case 's':
+			if (pci_parse_slot(optarg) != 0)
+				exit(1);
+			else
+				break;
+		case 'S':
+			memflags |= VM_MEM_F_WIRED;
+			break;
+                case 'm':
+			error = vm_parse_memsize(optarg, &memsize);
+			if (error)
+				errx(EX_USAGE, "invalid memsize '%s'", optarg);
+			break;
+		case 'H':
+			guest_vmexit_on_hlt = 1;
+			break;
+		case 'I':
+			/*
+			 * The "-I" option was used to add an ioapic to the
+			 * virtual machine.
+			 *
+			 * An ioapic is now provided unconditionally for each
+			 * virtual machine and this option is now deprecated.
+			 */
+			break;
+		case 'P':
+			guest_vmexit_on_pause = 1;
+			break;
+		case 'e':
+			strictio = 1;
+			break;
+		case 'u':
+			rtc_localtime = 0;
+			break;
+		case 'U':
+			guest_uuid_str = optarg;
+			break;
+		case 'w':
+			strictmsr = 0;
+			break;
+		case 'W':
+			virtio_msix = 0;
+			break;
+		case 'x':
+			x2apic_mode = 1;
+			break;
+		case 'Y':
+			mptgen = 0;
+			break;
+		case 'h':
+			usage(0);			
+		default:
+			usage(1);
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1)
+		usage(1);
+
+	vmname = argv[0];
+	ctx = do_open(vmname);
+
+	if (guest_ncpus < 1) {
+		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+		exit(1);
+	}
+
+	max_vcpus = num_vcpus_allowed(ctx);
+	if (guest_ncpus > max_vcpus) {
+		fprintf(stderr, "%d vCPUs requested but only %d available\n",
+			guest_ncpus, max_vcpus);
+		exit(1);
+	}
+
+	fbsdrun_set_capabilities(ctx, BSP);
+
+	vm_set_memflags(ctx, memflags);
+	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
+	if (err) {
+		fprintf(stderr, "Unable to setup memory (%d)\n", errno);
+		exit(1);
+	}
+
+	error = init_msr();
+	if (error) {
+		fprintf(stderr, "init_msr error %d", error);
+		exit(1);
+	}
+
+	init_mem();
+	init_inout();
+	pci_irq_init(ctx);
+	ioapic_init(ctx);
+
+	rtc_init(ctx, rtc_localtime);
+	sci_init(ctx);
+
+	/*
+	 * Exit if a device emulation finds an error in it's initilization
+	 */
+	if (init_pci(ctx) != 0)
+		exit(1);
+
+	if (gdb_port != 0)
+		init_dbgport(gdb_port);
+
+	if (bvmcons)
+		init_bvmcons();
+
+	if (lpc_bootrom()) {
+		if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
+			fprintf(stderr, "ROM boot failed: unrestricted guest "
+			    "capability not available\n");
+			exit(1);
+		}
+		error = vcpu_reset(ctx, BSP);
+		assert(error == 0);
+	}
+
+	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
+	assert(error == 0);
+
+	/*
+	 * build the guest tables, MP etc.
+	 */
+	if (mptgen) {
+		error = mptable_build(ctx, guest_ncpus);
+		if (error)
+			exit(1);
+	}
+
+	error = smbios_build(ctx);
+	assert(error == 0);
+
+	if (acpi) {
+		error = acpi_build(ctx, guest_ncpus);
+		assert(error == 0);
+	}
+
+	if (lpc_bootrom())
+		fwctl_init();
+
+	/*
+	 * Change the proc title to include the VM name.
+	 */
+	setproctitle("%s", vmname); 
+	
+	/*
+	 * Add CPU 0
+	 */
+	fbsdrun_addcpu(ctx, BSP, BSP, rip);
+
+	/*
+	 * Head off to the main event dispatch loop
+	 */
+	mevent_dispatch();
+
+	exit(1);
+}


Property changes on: trunk/usr.sbin/bhyve/bhyverun.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/bhyverun.h
===================================================================
--- trunk/usr.sbin/bhyve/bhyverun.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/bhyverun.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,50 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/bhyverun.h 302705 2016-07-13 06:09:34Z ngie $
+ */
+
+#ifndef	_FBSDRUN_H_
+#define	_FBSDRUN_H_
+
+#define	VMEXIT_CONTINUE		(0)
+#define	VMEXIT_ABORT		(-1)
+
+struct vmctx;
+extern int guest_ncpus;
+extern char *guest_uuid_str;
+extern char *vmname;
+
+void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+
+void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
+void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
+int  fbsdrun_muxed(void);
+int  fbsdrun_vmexit_on_hlt(void);
+int  fbsdrun_vmexit_on_pause(void);
+int  fbsdrun_disable_x2apic(void);
+int  fbsdrun_virtio_msix(void);
+#endif


Property changes on: trunk/usr.sbin/bhyve/bhyverun.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/block_if.c
===================================================================
--- trunk/usr.sbin/bhyve/block_if.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/block_if.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,823 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013  Peter Grehan <grehan at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/block_if.c 284900 2015-06-28 03:22:26Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/block_if.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <machine/atomic.h>
+
+#include "bhyverun.h"
+#include "mevent.h"
+#include "block_if.h"
+
+#define BLOCKIF_SIG	0xb109b109
+
+#define BLOCKIF_NUMTHR	8
+#define BLOCKIF_MAXREQ	(64 + BLOCKIF_NUMTHR)
+
+enum blockop {
+	BOP_READ,
+	BOP_WRITE,
+	BOP_FLUSH,
+	BOP_DELETE
+};
+
+enum blockstat {
+	BST_FREE,
+	BST_BLOCK,
+	BST_PEND,
+	BST_BUSY,
+	BST_DONE
+};
+
+struct blockif_elem {
+	TAILQ_ENTRY(blockif_elem) be_link;
+	struct blockif_req  *be_req;
+	enum blockop	     be_op;
+	enum blockstat	     be_status;
+	pthread_t            be_tid;
+	off_t		     be_block;
+};
+
+struct blockif_ctxt {
+	int			bc_magic;
+	int			bc_fd;
+	int			bc_ischr;
+	int			bc_isgeom;
+	int			bc_candelete;
+	int			bc_rdonly;
+	off_t			bc_size;
+	int			bc_sectsz;
+	int			bc_psectsz;
+	int			bc_psectoff;
+	int			bc_closing;
+	pthread_t		bc_btid[BLOCKIF_NUMTHR];
+        pthread_mutex_t		bc_mtx;
+        pthread_cond_t		bc_cond;
+
+	/* Request elements and free/pending/busy queues */
+	TAILQ_HEAD(, blockif_elem) bc_freeq;       
+	TAILQ_HEAD(, blockif_elem) bc_pendq;
+	TAILQ_HEAD(, blockif_elem) bc_busyq;
+	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
+};
+
+static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
+
+struct blockif_sig_elem {
+	pthread_mutex_t			bse_mtx;
+	pthread_cond_t			bse_cond;
+	int				bse_pending;
+	struct blockif_sig_elem		*bse_next;
+};
+
+static struct blockif_sig_elem *blockif_bse_head;
+
+static int
+blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	struct blockif_elem *be, *tbe;
+	off_t off;
+	int i;
+
+	be = TAILQ_FIRST(&bc->bc_freeq);
+	assert(be != NULL);
+	assert(be->be_status == BST_FREE);
+	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
+	be->be_req = breq;
+	be->be_op = op;
+	switch (op) {
+	case BOP_READ:
+	case BOP_WRITE:
+	case BOP_DELETE:
+		off = breq->br_offset;
+		for (i = 0; i < breq->br_iovcnt; i++)
+			off += breq->br_iov[i].iov_len;
+		break;
+	default:
+		off = OFF_MAX;
+	}
+	be->be_block = off;
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_block == breq->br_offset)
+			break;
+	}
+	if (tbe == NULL) {
+		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
+			if (tbe->be_block == breq->br_offset)
+				break;
+		}
+	}
+	if (tbe == NULL)
+		be->be_status = BST_PEND;
+	else
+		be->be_status = BST_BLOCK;
+	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
+	return (be->be_status == BST_PEND);
+}
+
+static int
+blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
+{
+	struct blockif_elem *be;
+
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_status == BST_PEND)
+			break;
+		assert(be->be_status == BST_BLOCK);
+	}
+	if (be == NULL)
+		return (0);
+	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	be->be_status = BST_BUSY;
+	be->be_tid = t;
+	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
+	*bep = be;
+	return (1);
+}
+
+static void
+blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
+{
+	struct blockif_elem *tbe;
+
+	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
+		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	else
+		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_req->br_offset == be->be_block)
+			tbe->be_status = BST_PEND;
+	}
+	be->be_tid = 0;
+	be->be_status = BST_FREE;
+	be->be_req = NULL;
+	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+}
+
+static void
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
+{
+	struct blockif_req *br;
+	off_t arg[2];
+	ssize_t clen, len, off, boff, voff;
+	int i, err;
+
+	br = be->be_req;
+	if (br->br_iovcnt <= 1)
+		buf = NULL;
+	err = 0;
+	switch (be->be_op) {
+	case BOP_READ:
+		if (buf == NULL) {
+			if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				   br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			if (pread(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(br->br_iov[i].iov_base + voff,
+				    buf + boff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			off += len;
+			br->br_resid -= len;
+		}
+		break;
+	case BOP_WRITE:
+		if (bc->bc_rdonly) {
+			err = EROFS;
+			break;
+		}
+		if (buf == NULL) {
+			if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				    br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(buf + boff,
+				    br->br_iov[i].iov_base + voff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			if (pwrite(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			off += len;
+			br->br_resid -= len;
+		}
+		break;
+	case BOP_FLUSH:
+		if (bc->bc_ischr) {
+			if (ioctl(bc->bc_fd, DIOCGFLUSH))
+				err = errno;
+		} else if (fsync(bc->bc_fd))
+			err = errno;
+		break;
+	case BOP_DELETE:
+		if (!bc->bc_candelete)
+			err = EOPNOTSUPP;
+		else if (bc->bc_rdonly)
+			err = EROFS;
+		else if (bc->bc_ischr) {
+			arg[0] = br->br_offset;
+			arg[1] = br->br_resid;
+			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
+				err = errno;
+			else
+				br->br_resid = 0;
+		} else
+			err = EOPNOTSUPP;
+		break;
+	default:
+		err = EINVAL;
+		break;
+	}
+
+	be->be_status = BST_DONE;
+
+	(*br->br_callback)(br, err);
+}
+
+static void *
+blockif_thr(void *arg)
+{
+	struct blockif_ctxt *bc;
+	struct blockif_elem *be;
+	pthread_t t;
+	uint8_t *buf;
+
+	bc = arg;
+	if (bc->bc_isgeom)
+		buf = malloc(MAXPHYS);
+	else
+		buf = NULL;
+	t = pthread_self();
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	for (;;) {
+		while (blockif_dequeue(bc, t, &be)) {
+			pthread_mutex_unlock(&bc->bc_mtx);
+			blockif_proc(bc, be, buf);
+			pthread_mutex_lock(&bc->bc_mtx);
+			blockif_complete(bc, be);
+		}
+		/* Check ctxt status here to see if exit requested */
+		if (bc->bc_closing)
+			break;
+		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
+	}
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	if (buf)
+		free(buf);
+	pthread_exit(NULL);
+	return (NULL);
+}
+
+static void
+blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
+{
+	struct blockif_sig_elem *bse;
+
+	for (;;) {
+		/*
+		 * Process the entire list even if not intended for
+		 * this thread.
+		 */
+		do {
+			bse = blockif_bse_head;
+			if (bse == NULL)
+				return;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)bse,
+					    (uintptr_t)bse->bse_next));
+
+		pthread_mutex_lock(&bse->bse_mtx);
+		bse->bse_pending = 0;
+		pthread_cond_signal(&bse->bse_cond);
+		pthread_mutex_unlock(&bse->bse_mtx);
+	}
+}
+
+static void
+blockif_init(void)
+{
+	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
+	(void) signal(SIGCONT, SIG_IGN);
+}
+
+struct blockif_ctxt *
+blockif_open(const char *optstr, const char *ident)
+{
+	char tname[MAXCOMLEN + 1];
+	char name[MAXPATHLEN];
+	char *nopt, *xopts, *cp;
+	struct blockif_ctxt *bc;
+	struct stat sbuf;
+	struct diocgattr_arg arg;
+	off_t size, psectsz, psectoff;
+	int extra, fd, i, sectsz;
+	int nocache, sync, ro, candelete, geom, ssopt, pssopt;
+
+	pthread_once(&blockif_once, blockif_init);
+
+	fd = -1;
+	ssopt = 0;
+	nocache = 0;
+	sync = 0;
+	ro = 0;
+
+	/*
+	 * The first element in the optstring is always a pathname.
+	 * Optional elements follow
+	 */
+	nopt = xopts = strdup(optstr);
+	while (xopts != NULL) {
+		cp = strsep(&xopts, ",");
+		if (cp == nopt)		/* file or device pathname */
+			continue;
+		else if (!strcmp(cp, "nocache"))
+			nocache = 1;
+		else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
+			sync = 1;
+		else if (!strcmp(cp, "ro"))
+			ro = 1;
+		else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
+			;
+		else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
+			pssopt = ssopt;
+		else {
+			fprintf(stderr, "Invalid device option \"%s\"\n", cp);
+			goto err;
+		}
+	}
+
+	extra = 0;
+	if (nocache)
+		extra |= O_DIRECT;
+	if (sync)
+		extra |= O_SYNC;
+
+	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
+	if (fd < 0 && !ro) {
+		/* Attempt a r/w fail with a r/o open */
+		fd = open(nopt, O_RDONLY | extra);
+		ro = 1;
+	}
+
+	if (fd < 0) {
+		perror("Could not open backing file");
+		goto err;
+	}
+
+        if (fstat(fd, &sbuf) < 0) {
+                perror("Could not stat backing file");
+		goto err;
+        }
+
+        /*
+	 * Deal with raw devices
+	 */
+        size = sbuf.st_size;
+	sectsz = DEV_BSIZE;
+	psectsz = psectoff = 0;
+	candelete = geom = 0;
+	if (S_ISCHR(sbuf.st_mode)) {
+		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
+		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
+			perror("Could not fetch dev blk/sector size");
+			goto err;
+		}
+		assert(size != 0);
+		assert(sectsz != 0);
+		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
+			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
+		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
+		arg.len = sizeof(arg.value.i);
+		if (ioctl(fd, DIOCGATTR, &arg) == 0)
+			candelete = arg.value.i;
+		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
+			geom = 1;
+	} else
+		psectsz = sbuf.st_blksize;
+
+	if (ssopt != 0) {
+		if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
+		    ssopt > pssopt) {
+			fprintf(stderr, "Invalid sector size %d/%d\n",
+			    ssopt, pssopt);
+			goto err;
+		}
+
+		/*
+		 * Some backend drivers (e.g. cd0, ada0) require that the I/O
+		 * size be a multiple of the device's sector size.
+		 *
+		 * Validate that the emulated sector size complies with this
+		 * requirement.
+		 */
+		if (S_ISCHR(sbuf.st_mode)) {
+			if (ssopt < sectsz || (ssopt % sectsz) != 0) {
+				fprintf(stderr, "Sector size %d incompatible "
+				    "with underlying device sector size %d\n",
+				    ssopt, sectsz);
+				goto err;
+			}
+		}
+
+		sectsz = ssopt;
+		psectsz = pssopt;
+		psectoff = 0;
+	}
+
+	bc = calloc(1, sizeof(struct blockif_ctxt));
+	if (bc == NULL) {
+		perror("calloc");
+		goto err;
+	}
+
+	bc->bc_magic = BLOCKIF_SIG;
+	bc->bc_fd = fd;
+	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+	bc->bc_isgeom = geom;
+	bc->bc_candelete = candelete;
+	bc->bc_rdonly = ro;
+	bc->bc_size = size;
+	bc->bc_sectsz = sectsz;
+	bc->bc_psectsz = psectsz;
+	bc->bc_psectoff = psectoff;
+	pthread_mutex_init(&bc->bc_mtx, NULL);
+	pthread_cond_init(&bc->bc_cond, NULL);
+	TAILQ_INIT(&bc->bc_freeq);
+	TAILQ_INIT(&bc->bc_pendq);
+	TAILQ_INIT(&bc->bc_busyq);
+	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
+		bc->bc_reqs[i].be_status = BST_FREE;
+		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
+	}
+
+	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
+		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
+		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
+		pthread_set_name_np(bc->bc_btid[i], tname);
+	}
+
+	return (bc);
+err:
+	if (fd >= 0)
+		close(fd);
+	return (NULL);
+}
+
+static int
+blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	int err;
+
+	err = 0;
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
+		/*
+		 * Enqueue and inform the block i/o thread
+		 * that there is work available
+		 */
+		if (blockif_enqueue(bc, breq, op))
+			pthread_cond_signal(&bc->bc_cond);
+	} else {
+		/*
+		 * Callers are not allowed to enqueue more than
+		 * the specified blockif queue limit. Return an
+		 * error to indicate that the queue length has been
+		 * exceeded.
+		 */
+		err = E2BIG;
+	}
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	return (err);
+}
+
+int
+blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_READ));
+}
+
+int
+blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_WRITE));
+}
+
+int
+blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_FLUSH));
+}
+
+int
+blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_DELETE));
+}
+
+int
+blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+	struct blockif_elem *be;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	/*
+	 * Check pending requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be != NULL) {
+		/*
+		 * Found it.
+		 */
+		blockif_complete(bc, be);
+		pthread_mutex_unlock(&bc->bc_mtx);
+
+		return (0);
+	}
+
+	/*
+	 * Check in-flight requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be == NULL) {
+		/*
+		 * Didn't find it.
+		 */
+		pthread_mutex_unlock(&bc->bc_mtx);
+		return (EINVAL);
+	}
+
+	/*
+	 * Interrupt the processing thread to force it return
+	 * prematurely via it's normal callback path.
+	 */
+	while (be->be_status == BST_BUSY) {
+		struct blockif_sig_elem bse, *old_head;
+
+		pthread_mutex_init(&bse.bse_mtx, NULL);
+		pthread_cond_init(&bse.bse_cond, NULL);
+
+		bse.bse_pending = 1;
+
+		do {
+			old_head = blockif_bse_head;
+			bse.bse_next = old_head;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)old_head,
+					    (uintptr_t)&bse));
+
+		pthread_kill(be->be_tid, SIGCONT);
+
+		pthread_mutex_lock(&bse.bse_mtx);
+		while (bse.bse_pending)
+			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
+		pthread_mutex_unlock(&bse.bse_mtx);
+	}
+
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	/*
+	 * The processing thread has been interrupted.  Since it's not
+	 * clear if the callback has been invoked yet, return EBUSY.
+	 */
+	return (EBUSY);
+}
+
+int
+blockif_close(struct blockif_ctxt *bc)
+{
+	void *jval;
+	int err, i;
+
+	err = 0;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	/*
+	 * Stop the block i/o thread
+	 */
+	pthread_mutex_lock(&bc->bc_mtx);
+	bc->bc_closing = 1;
+	pthread_mutex_unlock(&bc->bc_mtx);
+	pthread_cond_broadcast(&bc->bc_cond);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++)
+		pthread_join(bc->bc_btid[i], &jval);
+
+	/* XXX Cancel queued i/o's ??? */
+
+	/*
+	 * Release resources
+	 */
+	bc->bc_magic = 0;
+	close(bc->bc_fd);
+	free(bc);
+
+	return (0);
+}
+
+/*
+ * Return virtual C/H/S values for a given block. Use the algorithm
+ * outlined in the VHD specification to calculate values.
+ */
+void
+blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
+{
+	off_t sectors;		/* total sectors of the block dev */
+	off_t hcyl;		/* cylinders times heads */
+	uint16_t secpt;		/* sectors per track */
+	uint8_t heads;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	sectors = bc->bc_size / bc->bc_sectsz;
+
+	/* Clamp the size to the largest possible with CHS */
+	if (sectors > 65535UL*16*255)
+		sectors = 65535UL*16*255;
+
+	if (sectors >= 65536UL*16*63) {
+		secpt = 255;
+		heads = 16;
+		hcyl = sectors / secpt;
+	} else {
+		secpt = 17;
+		hcyl = sectors / secpt;
+		heads = (hcyl + 1023) / 1024;
+
+		if (heads < 4)
+			heads = 4;
+
+		if (hcyl >= (heads * 1024) || heads > 16) {
+			secpt = 31;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+		if (hcyl >= (heads * 1024)) {
+			secpt = 63;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+	}
+
+	*c = hcyl / heads;
+	*h = heads;
+	*s = secpt;
+}
+
+/*
+ * Accessors
+ */
+off_t
+blockif_size(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_size);
+}
+
+int
+blockif_sectsz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_sectsz);
+}
+
+void
+blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	*size = bc->bc_psectsz;
+	*off = bc->bc_psectoff;
+}
+
+int
+blockif_queuesz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (BLOCKIF_MAXREQ - 1);
+}
+
+int
+blockif_is_ro(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_rdonly);
+}
+
+int
+blockif_candelete(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_candelete);
+}


Property changes on: trunk/usr.sbin/bhyve/block_if.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/block_if.h
===================================================================
--- trunk/usr.sbin/bhyve/block_if.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/block_if.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,71 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013  Peter Grehan <grehan at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/block_if.h 282307 2015-05-01 17:30:59Z mav $
+ */
+
+/*
+ * The block API to be used by bhyve block-device emulations. The routines
+ * are thread safe, with no assumptions about the context of the completion
+ * callback - it may occur in the caller's context, or asynchronously in
+ * another thread.
+ */
+
+#ifndef _BLOCK_IF_H_
+#define _BLOCK_IF_H_
+
+#include <sys/uio.h>
+#include <sys/unistd.h>
+
+#define BLOCKIF_IOV_MAX		33	/* not practical to be IOV_MAX */
+
+struct blockif_req {
+	struct iovec	br_iov[BLOCKIF_IOV_MAX];
+	int		br_iovcnt;
+	off_t		br_offset;
+	ssize_t		br_resid;
+	void		(*br_callback)(struct blockif_req *req, int err);
+	void		*br_param;
+};
+
+struct blockif_ctxt;
+struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
+off_t	blockif_size(struct blockif_ctxt *bc);
+void	blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
+    uint8_t *s);
+int	blockif_sectsz(struct blockif_ctxt *bc);
+void	blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
+int	blockif_queuesz(struct blockif_ctxt *bc);
+int	blockif_is_ro(struct blockif_ctxt *bc);
+int	blockif_candelete(struct blockif_ctxt *bc);
+int	blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_close(struct blockif_ctxt *bc);
+
+#endif /* _BLOCK_IF_H_ */


Property changes on: trunk/usr.sbin/bhyve/block_if.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/bootrom.c
===================================================================
--- trunk/usr.sbin/bhyve/bootrom.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/bootrom.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,112 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/bootrom.c 295124 2016-02-01 14:56:11Z grehan $");
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <machine/vmm.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <vmmapi.h>
+#include "bhyverun.h"
+#include "bootrom.h"
+
+#define	MAX_BOOTROM_SIZE	(16 * 1024 * 1024)	/* 16 MB */
+
+int
+bootrom_init(struct vmctx *ctx, const char *romfile)
+{
+	struct stat sbuf;
+	vm_paddr_t gpa;
+	ssize_t rlen;
+	char *ptr;
+	int fd, i, rv, prot;
+
+	rv = -1;
+	fd = open(romfile, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "Error opening bootrom \"%s\": %s\n",
+		    romfile, strerror(errno));
+		goto done;
+	}
+
+        if (fstat(fd, &sbuf) < 0) {
+		fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n",
+		    romfile, strerror(errno));
+		goto done;
+        }
+
+	/*
+	 * Limit bootrom size to 16MB so it doesn't encroach into reserved
+	 * MMIO space (e.g. APIC, HPET, MSI).
+	 */
+	if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) {
+		fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size);
+		goto done;
+	}
+
+	if (sbuf.st_size & PAGE_MASK) {
+		fprintf(stderr, "Bootrom size %ld is not a multiple of the "
+		    "page size\n", sbuf.st_size);
+		goto done;
+	}
+
+	ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size);
+	if (ptr == MAP_FAILED)
+		goto done;
+
+	/* Map the bootrom into the guest address space */
+	prot = PROT_READ | PROT_EXEC;
+	gpa = (1ULL << 32) - sbuf.st_size;
+	if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0)
+		goto done;
+
+	/* Read 'romfile' into the guest address space */
+	for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) {
+		rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE);
+		if (rlen != PAGE_SIZE) {
+			fprintf(stderr, "Incomplete read of page %d of bootrom "
+			    "file %s: %ld bytes\n", i, romfile, rlen);
+			goto done;
+		}
+	}
+	rv = 0;
+done:
+	if (fd >= 0)
+		close(fd);
+	return (rv);
+}


Property changes on: trunk/usr.sbin/bhyve/bootrom.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/bootrom.h
===================================================================
--- trunk/usr.sbin/bhyve/bootrom.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/bootrom.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,39 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/bootrom.h 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+#ifndef	_BOOTROM_H_
+#define	_BOOTROM_H_
+
+#include <stdbool.h>
+
+struct vmctx;
+
+int	bootrom_init(struct vmctx *ctx, const char *romfile);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/bootrom.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/consport.c
===================================================================
--- trunk/usr.sbin/bhyve/consport.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/consport.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,154 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/consport.c 267928 2014-06-26 19:19:06Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/consport.c 267928 2014-06-26 19:19:06Z jhb $");
+
+#include <sys/types.h>
+#include <sys/select.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+
+#define	BVM_CONSOLE_PORT	0x220
+#define	BVM_CONS_SIG		('b' << 8 | 'v')
+
+static struct termios tio_orig, tio_new;
+
+static void
+ttyclose(void)
+{
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
+}
+
+static void
+ttyopen(void)
+{
+	tcgetattr(STDIN_FILENO, &tio_orig);
+
+	cfmakeraw(&tio_new);
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);	
+
+	atexit(ttyclose);
+}
+
+static bool
+tty_char_available(void)
+{
+        fd_set rfds;
+        struct timeval tv;
+
+        FD_ZERO(&rfds);
+        FD_SET(STDIN_FILENO, &rfds);
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
+		return (true);
+	} else {
+		return (false);
+	}
+}
+
+static int
+ttyread(void)
+{
+	char rb;
+
+	if (tty_char_available()) {
+		read(STDIN_FILENO, &rb, 1);
+		return (rb & 0xff);
+	} else {
+		return (-1);
+	}
+}
+
+static void
+ttywrite(unsigned char wb)
+{
+	(void) write(STDOUT_FILENO, &wb, 1);
+}
+
+static int
+console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		uint32_t *eax, void *arg)
+{
+	static int opened;
+
+	if (bytes == 2 && in) {
+		*eax = BVM_CONS_SIG;
+		return (0);
+	}
+
+	/*
+	 * Guests might probe this port to look for old ISA devices
+	 * using single-byte reads.  Return 0xff for those.
+	 */
+	if (bytes == 1 && in) {
+		*eax = 0xff;
+		return (0);
+	}
+
+	if (bytes != 4)
+		return (-1);
+
+	if (!opened) {
+		ttyopen();
+		opened = 1;
+	}
+	
+	if (in)
+		*eax = ttyread();
+	else
+		ttywrite(*eax);
+
+	return (0);
+}
+
+SYSRES_IO(BVM_CONSOLE_PORT, 4);
+
+static struct inout_port consport = {
+	"bvmcons",
+	BVM_CONSOLE_PORT,
+	1,
+	IOPORT_F_INOUT,
+	console_handler
+};
+
+void
+init_bvmcons(void)
+{
+
+	register_inout(&consport);
+}


Property changes on: trunk/usr.sbin/bhyve/consport.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/dbgport.c
===================================================================
--- trunk/usr.sbin/bhyve/dbgport.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/dbgport.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,160 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/dbgport.c 309401 2016-12-02 08:21:25Z julian $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/dbgport.c 309401 2016-12-02 08:21:25Z julian $");
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/uio.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "inout.h"
+#include "dbgport.h"
+#include "pci_lpc.h"
+
+#define	BVM_DBG_PORT	0x224
+#define	BVM_DBG_SIG	('B' << 8 | 'V')
+
+static int listen_fd, conn_fd;
+
+static struct sockaddr_in sin;
+
+static int
+dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+	    uint32_t *eax, void *arg)
+{
+	int nwritten, nread, printonce;
+	int on = 1;
+	char ch;
+
+	if (bytes == 2 && in) {
+		*eax = BVM_DBG_SIG;
+		return (0);
+	}
+
+	if (bytes != 4)
+		return (-1);
+
+again:
+	printonce = 0;
+	while (conn_fd < 0) {
+		if (!printonce) {
+			printf("Waiting for connection from gdb\r\n");
+			printonce = 1;
+		}
+		conn_fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK);
+		if (conn_fd >= 0) {
+			/* Avoid EPIPE after the client drops off. */
+			(void)setsockopt(conn_fd, SOL_SOCKET, SO_NOSIGPIPE,
+			    &on, sizeof(on));
+			/* Improve latency for one byte at a time tranfers. */
+			(void)setsockopt(conn_fd, IPPROTO_TCP, TCP_NODELAY,
+			    &on, sizeof(on));
+		} else if (errno != EINTR) {
+			perror("accept");
+		}
+	}
+
+	if (in) {
+		nread = read(conn_fd, &ch, 1);
+		if (nread == -1 && errno == EAGAIN)
+			*eax = -1;
+		else if (nread == 1)
+			*eax = ch;
+		else {
+			close(conn_fd);
+			conn_fd = -1;
+			goto again;
+		}
+	} else {
+		ch = *eax;
+		nwritten = write(conn_fd, &ch, 1);
+		if (nwritten != 1) {
+			close(conn_fd);
+			conn_fd = -1;
+			goto again;
+		}
+	}
+	return (0);
+}
+
+static struct inout_port dbgport = {
+	"bvmdbg",
+	BVM_DBG_PORT,
+	1,
+	IOPORT_F_INOUT,
+	dbg_handler
+};
+
+SYSRES_IO(BVM_DBG_PORT, 4);
+
+void
+init_dbgport(int sport)
+{
+	int reuse;
+
+	conn_fd = -1;
+
+	if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+		perror("socket");
+		exit(1);
+	}
+
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	sin.sin_port = htons(sport);
+
+	reuse = 1;
+	if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
+	    sizeof(reuse)) < 0) {
+		perror("setsockopt");
+		exit(1);
+	}
+
+	if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+		perror("bind");
+		exit(1);
+	}
+
+	if (listen(listen_fd, 1) < 0) {
+		perror("listen");
+		exit(1);
+	}
+
+	register_inout(&dbgport);
+}


Property changes on: trunk/usr.sbin/bhyve/dbgport.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/dbgport.h
===================================================================
--- trunk/usr.sbin/bhyve/dbgport.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/dbgport.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,35 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/dbgport.h 256156 2013-10-08 16:36:17Z neel $
+ */
+
+#ifndef _DBGPORT_H_
+#define	_DBGPORT_H_
+
+void	init_dbgport(int port);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/dbgport.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/fwctl.c
===================================================================
--- trunk/usr.sbin/bhyve/fwctl.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/fwctl.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,550 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015  Peter Grehan <grehan at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/fwctl.c 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+/*
+ * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does,
+ * but with a request/response messaging protocol.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/fwctl.c 295124 2016-02-01 14:56:11Z grehan $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "bhyverun.h"
+#include "inout.h"
+#include "fwctl.h"
+
+/*
+ * Messaging protocol base operations
+ */
+#define	OP_NULL		1
+#define	OP_ECHO		2
+#define	OP_GET		3
+#define	OP_GET_LEN	4
+#define	OP_SET		5
+#define	OP_MAX		OP_SET
+
+/* I/O ports */
+#define	FWCTL_OUT	0x510
+#define	FWCTL_IN	0x511
+
+/*
+ * Back-end state-machine
+ */
+enum state {
+	DORMANT,
+	IDENT_WAIT,
+	IDENT_SEND,
+	REQ,
+	RESP
+} be_state = DORMANT;
+
+static uint8_t sig[] = { 'B', 'H', 'Y', 'V' };
+static u_int ident_idx;
+
+struct op_info {
+	int op;
+	int  (*op_start)(int len);
+	void (*op_data)(uint32_t data, int len);
+	int  (*op_result)(struct iovec **data);
+	void (*op_done)(struct iovec *data);
+};
+static struct op_info *ops[OP_MAX+1];
+
+/* Return 0-padded uint32_t */
+static uint32_t
+fwctl_send_rest(uint32_t *data, size_t len)
+{
+	union {
+		uint8_t c[4];
+		uint32_t w;
+	} u;
+	uint8_t *cdata;
+	int i;
+
+	cdata = (uint8_t *) data;
+	u.w = 0;	
+
+	for (i = 0, u.w = 0; i < len; i++)
+		u.c[i] = *cdata++;
+
+	return (u.w);
+}
+
+/*
+ * error op dummy proto - drop all data sent and return an error
+*/
+static int errop_code;
+
+static void
+errop_set(int err)
+{
+
+	errop_code = err;
+}
+
+static int
+errop_start(int len)
+{
+	errop_code = ENOENT;
+
+	/* accept any length */
+	return (errop_code);
+}
+
+static void
+errop_data(uint32_t data, int len)
+{
+
+	/* ignore */
+}
+
+static int
+errop_result(struct iovec **data)
+{
+
+	/* no data to send back; always successful */
+	*data = NULL;
+	return (errop_code);
+}
+
+static void
+errop_done(struct iovec *data)
+{
+
+	/* assert data is NULL */
+}
+
+static struct op_info errop_info = {
+	.op_start  = errop_start,
+	.op_data   = errop_data,
+	.op_result = errop_result,
+	.op_done   = errop_done
+};
+
+/* OID search */
+SET_DECLARE(ctl_set, struct ctl);
+
+CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus));
+
+static struct ctl *
+ctl_locate(const char *str, int maxlen)
+{
+	struct ctl *cp, **cpp;
+
+	SET_FOREACH(cpp, ctl_set)  {
+		cp = *cpp;
+		if (!strncmp(str, cp->c_oid, maxlen))
+			return (cp);
+	}
+
+	return (NULL);
+}
+
+/* uefi-sysctl get-len */
+#define FGET_STRSZ	80
+static struct iovec fget_biov[2];
+static char fget_str[FGET_STRSZ];
+static struct {
+	size_t f_sz;
+	uint32_t f_data[1024];
+} fget_buf;
+static int fget_cnt;
+static size_t fget_size;
+
+static int
+fget_start(int len)
+{
+
+	if (len > FGET_STRSZ)
+		return(E2BIG);
+
+	fget_cnt = 0;
+
+	return (0);
+}
+
+static void
+fget_data(uint32_t data, int len)
+{
+
+	*((uint32_t *) &fget_str[fget_cnt]) = data;
+	fget_cnt += sizeof(uint32_t);
+}
+
+static int
+fget_result(struct iovec **data, int val)
+{
+	struct ctl *cp;
+	int err;
+
+	err = 0;
+
+	/* Locate the OID */
+	cp = ctl_locate(fget_str, fget_cnt);
+	if (cp == NULL) {
+		*data = NULL;
+		err = ENOENT;
+	} else {
+		if (val) {
+			/* For now, copy the len/data into a buffer */
+			memset(&fget_buf, 0, sizeof(fget_buf));
+			fget_buf.f_sz = cp->c_len;
+			memcpy(fget_buf.f_data, cp->c_data, cp->c_len);
+			fget_biov[0].iov_base = (char *)&fget_buf;
+			fget_biov[0].iov_len  = sizeof(fget_buf.f_sz) +
+				cp->c_len;
+		} else {
+			fget_size = cp->c_len;
+			fget_biov[0].iov_base = (char *)&fget_size;
+			fget_biov[0].iov_len  = sizeof(fget_size);
+		}
+
+		fget_biov[1].iov_base = NULL;
+		fget_biov[1].iov_len  = 0;
+		*data = fget_biov;
+	}
+
+	return (err);
+}
+
+static void
+fget_done(struct iovec *data)
+{
+
+	/* nothing needs to be freed */
+}
+
+static int
+fget_len_result(struct iovec **data)
+{
+	return (fget_result(data, 0));
+}
+
+static int
+fget_val_result(struct iovec **data)
+{
+	return (fget_result(data, 1));
+}
+
+static struct op_info fgetlen_info = {
+	.op_start  = fget_start,
+	.op_data   = fget_data,
+	.op_result = fget_len_result,
+	.op_done   = fget_done
+};
+
+static struct op_info fgetval_info = {
+	.op_start  = fget_start,
+	.op_data   = fget_data,
+	.op_result = fget_val_result,
+	.op_done   = fget_done
+};
+
+static struct req_info {
+	int      req_error;
+	u_int    req_count;
+	uint32_t req_size;
+	uint32_t req_type;
+	uint32_t req_txid;
+	struct op_info *req_op;
+	int	 resp_error;
+	int	 resp_count;
+	int	 resp_size;
+	int	 resp_off;
+	struct iovec *resp_biov;
+} rinfo;
+
+static void
+fwctl_response_done(void)
+{
+
+	(*rinfo.req_op->op_done)(rinfo.resp_biov);
+
+	/* reinit the req data struct */
+	memset(&rinfo, 0, sizeof(rinfo));
+}
+
+static void
+fwctl_request_done(void)
+{
+
+	rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov);
+
+	/* XXX only a single vector supported at the moment */
+	rinfo.resp_off = 0;
+	if (rinfo.resp_biov == NULL) {
+		rinfo.resp_size = 0;
+	} else {
+		rinfo.resp_size = rinfo.resp_biov[0].iov_len;
+	}
+}
+
+static int
+fwctl_request_start(void)
+{
+	int err;
+
+	/* Data size doesn't include header */
+	rinfo.req_size -= 12;
+
+	rinfo.req_op = &errop_info;
+	if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL)
+		rinfo.req_op = ops[rinfo.req_type];
+
+	err = (*rinfo.req_op->op_start)(rinfo.req_size);
+
+	if (err) {
+		errop_set(err);
+		rinfo.req_op = &errop_info;
+	}
+
+	/* Catch case of zero-length message here */
+	if (rinfo.req_size == 0) {
+		fwctl_request_done();
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+fwctl_request_data(uint32_t value)
+{
+	int remlen;
+
+	/* Make sure remaining size is >= 0 */
+	rinfo.req_size -= sizeof(uint32_t);
+	remlen = (rinfo.req_size > 0) ? rinfo.req_size: 0;
+
+	(*rinfo.req_op->op_data)(value, remlen);
+
+	if (rinfo.req_size < sizeof(uint32_t)) {
+		fwctl_request_done();
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+fwctl_request(uint32_t value)
+{
+
+	int ret;
+
+	ret = 0;
+
+	switch (rinfo.req_count) {
+	case 0:
+		/* Verify size */
+		if (value < 12) {
+			printf("msg size error");
+			exit(1);
+		}
+		rinfo.req_size = value;
+		rinfo.req_count = 1;
+		break;
+	case 1:
+		rinfo.req_type = value;
+		rinfo.req_count++;
+		break;
+	case 2:
+		rinfo.req_txid = value;
+		rinfo.req_count++;
+		ret = fwctl_request_start();
+		break;
+	default:
+		ret = fwctl_request_data(value);
+		break;
+	}
+
+	return (ret);
+}
+
+static int
+fwctl_response(uint32_t *retval)
+{
+	uint32_t *dp;
+	int remlen;
+
+	switch(rinfo.resp_count) {
+	case 0:
+		/* 4 x u32 header len + data */
+		*retval = 4*sizeof(uint32_t) +
+		    roundup(rinfo.resp_size, sizeof(uint32_t));
+		rinfo.resp_count++;
+		break;
+	case 1:
+		*retval = rinfo.req_type;
+		rinfo.resp_count++;
+		break;
+	case 2:
+		*retval = rinfo.req_txid;
+		rinfo.resp_count++;
+		break;
+	case 3:
+		*retval = rinfo.resp_error;
+		rinfo.resp_count++;
+		break;
+	default:
+		remlen = rinfo.resp_size - rinfo.resp_off;
+		dp = (uint32_t *)
+		    ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off);
+		if (remlen >= sizeof(uint32_t)) {
+			*retval = *dp;
+		} else if (remlen > 0) {
+			*retval = fwctl_send_rest(dp, remlen);
+		}
+		rinfo.resp_off += sizeof(uint32_t);
+		break;
+	}
+
+	if (rinfo.resp_count > 3 &&
+	    rinfo.resp_size - rinfo.resp_off <= 0) {
+		fwctl_response_done();
+		return (1);
+	}
+
+	return (0);
+}
+
+
+/*
+ * i/o port handling.
+ */
+static uint8_t
+fwctl_inb(void)
+{
+	uint8_t retval;
+
+	retval = 0xff;
+
+	switch (be_state) {
+	case IDENT_SEND:
+		retval = sig[ident_idx++];
+		if (ident_idx >= sizeof(sig))
+			be_state = REQ;
+		break;
+	default:
+		break;
+	}
+
+	return (retval);
+}
+
+static void
+fwctl_outw(uint16_t val)
+{
+	switch (be_state) {
+	case IDENT_WAIT:
+		if (val == 0) {
+			be_state = IDENT_SEND;
+			ident_idx = 0;
+		}
+		break;
+	default:
+		/* ignore */
+		break;
+	}
+}
+
+static uint32_t
+fwctl_inl(void)
+{
+	uint32_t retval;
+
+	switch (be_state) {
+	case RESP:
+		if (fwctl_response(&retval))
+			be_state = REQ;
+		break;
+	default:
+		retval = 0xffffffff;
+		break;
+	}
+
+	return (retval);
+}
+
+static void
+fwctl_outl(uint32_t val)
+{
+
+	switch (be_state) {
+	case REQ:
+		if (fwctl_request(val))
+			be_state = RESP;
+	default:
+		break;
+	}
+
+}
+
+static int
+fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (in) {
+		if (bytes == 1)
+			*eax = fwctl_inb();
+		else if (bytes == 4)
+			*eax = fwctl_inl();
+		else
+			*eax = 0xffff;
+	} else {
+		if (bytes == 2)
+			fwctl_outw(*eax);
+		else if (bytes == 4)
+			fwctl_outl(*eax);
+	}
+
+	return (0);
+}
+INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
+INOUT_PORT(fwctl_rreg, FWCTL_IN,  IOPORT_F_IN,    fwctl_handler);
+
+void
+fwctl_init(void)
+{
+
+	ops[OP_GET_LEN] = &fgetlen_info;
+	ops[OP_GET]     = &fgetval_info;
+
+	be_state = IDENT_WAIT;
+}


Property changes on: trunk/usr.sbin/bhyve/fwctl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/fwctl.h
===================================================================
--- trunk/usr.sbin/bhyve/fwctl.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/fwctl.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,55 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015  Peter Grehan <grehan at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/fwctl.h 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+#ifndef _FWCTL_H_
+#define _FWCTL_H_
+
+#include <sys/linker_set.h>
+
+/*
+ * Linker set api for export of information to guest firmware via
+ * a sysctl-like OID interface
+ */
+struct ctl {
+	const char *c_oid;
+	const void *c_data;
+	const int c_len;
+};
+
+#define CTL_NODE(oid, data, len)				\
+	static struct ctl __CONCAT(__ctl, __LINE__) = {		\
+		oid,						\
+		(data),						\
+		(len),						\
+	};							\
+	DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
+
+void	fwctl_init(void);
+
+#endif /* _FWCTL_H_ */


Property changes on: trunk/usr.sbin/bhyve/fwctl.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/inout.c
===================================================================
--- trunk/usr.sbin/bhyve/inout.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/inout.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,298 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/inout.c 284900 2015-06-28 03:22:26Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/inout.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/_iovec.h>
+#include <sys/mman.h>
+
+#include <x86/psl.h>
+#include <x86/segments.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "bhyverun.h"
+#include "inout.h"
+
+SET_DECLARE(inout_port_set, struct inout_port);
+
+#define	MAX_IOPORTS	(1 << 16)
+
+#define	VERIFY_IOPORT(port, size) \
+	assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
+
+static struct {
+	const char	*name;
+	int		flags;
+	inout_func_t	handler;
+	void		*arg;
+} inout_handlers[MAX_IOPORTS];
+
+static int
+default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+              uint32_t *eax, void *arg)
+{
+        if (in) {
+                switch (bytes) {
+                case 4:
+                        *eax = 0xffffffff;
+                        break;
+                case 2:
+                        *eax = 0xffff;
+                        break;
+                case 1:
+                        *eax = 0xff;
+                        break;
+                }
+        }
+        
+        return (0);
+}
+
+static void 
+register_default_iohandler(int start, int size)
+{
+	struct inout_port iop;
+	
+	VERIFY_IOPORT(start, size);
+
+	bzero(&iop, sizeof(iop));
+	iop.name = "default";
+	iop.port = start;
+	iop.size = size;
+	iop.flags = IOPORT_F_INOUT | IOPORT_F_DEFAULT;
+	iop.handler = default_inout;
+
+	register_inout(&iop);
+}
+
+int
+emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
+{
+	int addrsize, bytes, flags, in, port, prot, rep;
+	uint32_t eax, val;
+	inout_func_t handler;
+	void *arg;
+	int error, fault, retval;
+	enum vm_reg_name idxreg;
+	uint64_t gla, index, iterations, count;
+	struct vm_inout_str *vis;
+	struct iovec iov[2];
+
+	bytes = vmexit->u.inout.bytes;
+	in = vmexit->u.inout.in;
+	port = vmexit->u.inout.port;
+
+	assert(port < MAX_IOPORTS);
+	assert(bytes == 1 || bytes == 2 || bytes == 4);
+
+	handler = inout_handlers[port].handler;
+
+	if (strict && handler == default_inout)
+		return (-1);
+
+	flags = inout_handlers[port].flags;
+	arg = inout_handlers[port].arg;
+
+	if (in) {
+		if (!(flags & IOPORT_F_IN))
+			return (-1);
+	} else {
+		if (!(flags & IOPORT_F_OUT))
+			return (-1);
+	}
+
+	retval = 0;
+	if (vmexit->u.inout.string) {
+		vis = &vmexit->u.inout_str;
+		rep = vis->inout.rep;
+		addrsize = vis->addrsize;
+		prot = in ? PROT_WRITE : PROT_READ;
+		assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
+
+		/* Index register */
+		idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+		index = vis->index & vie_size2mask(addrsize);
+
+		/* Count register */
+		count = vis->count & vie_size2mask(addrsize);
+
+		/* Limit number of back-to-back in/out emulations to 16 */
+		iterations = MIN(count, 16);
+		while (iterations > 0) {
+			assert(retval == 0);
+			if (vie_calculate_gla(vis->paging.cpu_mode,
+			    vis->seg_name, &vis->seg_desc, index, bytes,
+			    addrsize, prot, &gla)) {
+				vm_inject_gp(ctx, vcpu);
+				break;
+			}
+
+			error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
+			    bytes, prot, iov, nitems(iov), &fault);
+			if (error) {
+				retval = -1;  /* Unrecoverable error */
+				break;
+			} else if (fault) {
+				retval = 0;  /* Resume guest to handle fault */
+				break;
+			}
+
+			if (vie_alignment_check(vis->paging.cpl, bytes,
+			    vis->cr0, vis->rflags, gla)) {
+				vm_inject_ac(ctx, vcpu, 0);
+				break;
+			}
+
+			val = 0;
+			if (!in)
+				vm_copyin(ctx, vcpu, iov, &val, bytes);
+
+			retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
+			if (retval != 0)
+				break;
+
+			if (in)
+				vm_copyout(ctx, vcpu, &val, iov, bytes);
+
+			/* Update index */
+			if (vis->rflags & PSL_D)
+				index -= bytes;
+			else
+				index += bytes;
+
+			count--;
+			iterations--;
+		}
+
+		/* Update index register */
+		error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
+		assert(error == 0);
+
+		/*
+		 * Update count register only if the instruction had a repeat
+		 * prefix.
+		 */
+		if (rep) {
+			error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
+			    count, addrsize);
+			assert(error == 0);
+		}
+
+		/* Restart the instruction if more iterations remain */
+		if (retval == 0 && count != 0) {
+			error = vm_restart_instruction(ctx, vcpu);
+			assert(error == 0);
+		}
+	} else {
+		eax = vmexit->u.inout.eax;
+		val = eax & vie_size2mask(bytes);
+		retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
+		if (retval == 0 && in) {
+			eax &= ~vie_size2mask(bytes);
+			eax |= val & vie_size2mask(bytes);
+			error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
+			    eax);
+			assert(error == 0);
+		}
+	}
+	return (retval);
+}
+
+void
+init_inout(void)
+{
+	struct inout_port **iopp, *iop;
+
+	/*
+	 * Set up the default handler for all ports
+	 */
+	register_default_iohandler(0, MAX_IOPORTS);
+
+	/*
+	 * Overwrite with specified handlers
+	 */
+	SET_FOREACH(iopp, inout_port_set) {
+		iop = *iopp;
+		assert(iop->port < MAX_IOPORTS);
+		inout_handlers[iop->port].name = iop->name;
+		inout_handlers[iop->port].flags = iop->flags;
+		inout_handlers[iop->port].handler = iop->handler;
+		inout_handlers[iop->port].arg = NULL;
+	}
+}
+
+int
+register_inout(struct inout_port *iop)
+{
+	int i;
+
+	VERIFY_IOPORT(iop->port, iop->size);
+
+	/*
+	 * Verify that the new registration is not overwriting an already
+	 * allocated i/o range.
+	 */
+	if ((iop->flags & IOPORT_F_DEFAULT) == 0) {
+		for (i = iop->port; i < iop->port + iop->size; i++) {
+			if ((inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0)
+				return (-1);
+		}
+	}
+
+	for (i = iop->port; i < iop->port + iop->size; i++) {
+		inout_handlers[i].name = iop->name;
+		inout_handlers[i].flags = iop->flags;
+		inout_handlers[i].handler = iop->handler;
+		inout_handlers[i].arg = iop->arg;
+	}
+
+	return (0);
+}
+
+int
+unregister_inout(struct inout_port *iop)
+{
+
+	VERIFY_IOPORT(iop->port, iop->size);
+	assert(inout_handlers[iop->port].name == iop->name);
+
+	register_default_iohandler(iop->port, iop->size);
+
+	return (0);
+}


Property changes on: trunk/usr.sbin/bhyve/inout.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/inout.h
===================================================================
--- trunk/usr.sbin/bhyve/inout.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/inout.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,80 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/inout.h 270159 2014-08-19 01:20:24Z grehan $
+ */
+
+#ifndef _INOUT_H_
+#define	_INOUT_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+struct vm_exit;
+
+/*
+ * inout emulation handlers return 0 on success and -1 on failure.
+ */
+typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
+			    int bytes, uint32_t *eax, void *arg);
+
+struct inout_port {
+	const char 	*name;
+	int		port;
+	int		size;
+	int		flags;
+	inout_func_t	handler;
+	void		*arg;
+};
+#define	IOPORT_F_IN		0x1
+#define	IOPORT_F_OUT		0x2
+#define	IOPORT_F_INOUT		(IOPORT_F_IN | IOPORT_F_OUT)
+
+/*
+ * The following flags are used internally and must not be used by
+ * device models.
+ */
+#define	IOPORT_F_DEFAULT	0x80000000	/* claimed by default handler */
+
+#define	INOUT_PORT(name, port, flags, handler)				\
+	static struct inout_port __CONCAT(__inout_port, __LINE__) = {	\
+		#name,							\
+		(port),							\
+		1,							\
+		(flags),						\
+		(handler),						\
+		0							\
+	};								\
+	DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
+	
+void	init_inout(void);
+int	emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit,
+		      int strict);
+int	register_inout(struct inout_port *iop);
+int	unregister_inout(struct inout_port *iop);
+void	init_bvmcons(void);
+
+#endif	/* _INOUT_H_ */


Property changes on: trunk/usr.sbin/bhyve/inout.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/ioapic.c
===================================================================
--- trunk/usr.sbin/bhyve/ioapic.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/ioapic.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,75 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/ioapic.c 283927 2015-06-02 19:20:39Z jhb $");
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "ioapic.h"
+
+/*
+ * Assign PCI INTx interrupts to I/O APIC pins in a round-robin
+ * fashion.  Note that we have no idea what the HPET is using, but the
+ * HPET is also programmable whereas this is intended for hardwired
+ * PCI interrupts.
+ *
+ * This assumes a single I/O APIC where pins >= 16 are permitted for
+ * PCI devices.
+ */
+static int pci_pins;
+
+void
+ioapic_init(struct vmctx *ctx)
+{
+
+	if (vm_ioapic_pincount(ctx, &pci_pins) < 0) {
+		pci_pins = 0;
+		return;
+	}
+
+	/* Ignore the first 16 pins. */
+	if (pci_pins <= 16) {
+		pci_pins = 0;
+		return;
+	}
+	pci_pins -= 16;
+}
+
+int
+ioapic_pci_alloc_irq(void)
+{
+	static int last_pin;
+
+	if (pci_pins == 0)
+		return (-1);
+	return (16 + (last_pin++ % pci_pins));
+}


Property changes on: trunk/usr.sbin/bhyve/ioapic.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/ioapic.h
===================================================================
--- trunk/usr.sbin/bhyve/ioapic.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/ioapic.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,40 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/ioapic.h 283927 2015-06-02 19:20:39Z jhb $
+ */
+
+#ifndef _IOAPIC_H_
+#define	_IOAPIC_H_
+
+/*
+ * Allocate a PCI IRQ from the I/O APIC.
+ */
+void	ioapic_init(struct vmctx *ctx);
+int	ioapic_pci_alloc_irq(void);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/ioapic.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mem.c
===================================================================
--- trunk/usr.sbin/bhyve/mem.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mem.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,292 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mem.c 270159 2014-08-19 01:20:24Z grehan $
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/mem.c 270159 2014-08-19 01:20:24Z grehan $");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "mem.h"
+
+struct mmio_rb_range {
+	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
+	struct mem_range	mr_param;
+	uint64_t                mr_base;
+	uint64_t                mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
+
+static pthread_rwlock_t mmio_rwlock;
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+	if (a->mr_end < b->mr_base)
+		return (-1);
+	else if (a->mr_base > b->mr_end)
+		return (1);
+	return (0);
+}
+
+static int
+mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
+    struct mmio_rb_range **entry)
+{
+	struct mmio_rb_range find, *res;
+
+	find.mr_base = find.mr_end = addr;
+
+	res = RB_FIND(mmio_rb_tree, rbt, &find);
+
+	if (res != NULL) {
+		*entry = res;
+		return (0);
+	}
+	
+	return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
+{
+	struct mmio_rb_range *overlap;
+
+	overlap = RB_INSERT(mmio_rb_tree, rbt, new);
+
+	if (overlap != NULL) {
+#ifdef RB_DEBUG
+		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+		       new->mr_base, new->mr_end,
+		       overlap->mr_base, overlap->mr_end);
+#endif
+
+		return (EEXIST);
+	}
+
+	return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(struct mmio_rb_tree *rbt)
+{
+	struct mmio_rb_range *np;
+
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	RB_FOREACH(np, mmio_rb_tree, rbt) {
+		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+		       np->mr_param.name);
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+static int
+mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
+			       rval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+static int
+mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
+			       &wval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
+    struct vm_guest_paging *paging)
+
+{
+	struct mmio_rb_range *entry;
+	int err, immutable;
+	
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	/*
+	 * First check the per-vCPU cache
+	 */
+	if (mmio_hint[vcpu] &&
+	    paddr >= mmio_hint[vcpu]->mr_base &&
+	    paddr <= mmio_hint[vcpu]->mr_end) {
+		entry = mmio_hint[vcpu];
+	} else
+		entry = NULL;
+
+	if (entry == NULL) {
+		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
+			/* Update the per-vCPU cache */
+			mmio_hint[vcpu] = entry;			
+		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+			pthread_rwlock_unlock(&mmio_rwlock);
+			return (ESRCH);
+		}
+	}
+
+	assert(entry != NULL);
+
+	/*
+	 * An 'immutable' memory range is guaranteed to be never removed
+	 * so there is no need to hold 'mmio_rwlock' while calling the
+	 * handler.
+	 *
+	 * XXX writes to the PCIR_COMMAND register can cause register_mem()
+	 * to be called. If the guest is using PCI extended config space
+	 * to modify the PCIR_COMMAND register then register_mem() can
+	 * deadlock on 'mmio_rwlock'. However by registering the extended
+	 * config space window as 'immutable' the deadlock can be avoided.
+	 */
+	immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
+	if (immutable)
+		pthread_rwlock_unlock(&mmio_rwlock);
+
+	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
+				      mem_read, mem_write, &entry->mr_param);
+
+	if (!immutable)
+		pthread_rwlock_unlock(&mmio_rwlock);
+
+	return (err);
+}
+
+static int
+register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
+{
+	struct mmio_rb_range *entry, *mrp;
+	int		err;
+
+	err = 0;
+
+	mrp = malloc(sizeof(struct mmio_rb_range));
+	
+	if (mrp != NULL) {
+		mrp->mr_param = *memp;
+		mrp->mr_base = memp->base;
+		mrp->mr_end = memp->base + memp->size - 1;
+		pthread_rwlock_wrlock(&mmio_rwlock);
+		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+			err = mmio_rb_add(rbt, mrp);
+		pthread_rwlock_unlock(&mmio_rwlock);
+		if (err)
+			free(mrp);
+	} else
+		err = ENOMEM;
+
+	return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_root, memp));
+}
+
+int
+register_mem_fallback(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_fallback, memp));
+}
+
+int 
+unregister_mem(struct mem_range *memp)
+{
+	struct mem_range *mr;
+	struct mmio_rb_range *entry = NULL;
+	int err, i;
+	
+	pthread_rwlock_wrlock(&mmio_rwlock);
+	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+	if (err == 0) {
+		mr = &entry->mr_param;
+		assert(mr->name == memp->name);
+		assert(mr->base == memp->base && mr->size == memp->size); 
+		assert((mr->flags & MEM_F_IMMUTABLE) == 0);
+		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+		/* flush Per-vCPU cache */	
+		for (i=0; i < VM_MAXCPU; i++) {
+			if (mmio_hint[i] == entry)
+				mmio_hint[i] = NULL;
+		}
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+
+	if (entry)
+		free(entry);
+	
+	return (err);
+}
+
+void
+init_mem(void)
+{
+
+	RB_INIT(&mmio_rb_root);
+	RB_INIT(&mmio_rb_fallback);
+	pthread_rwlock_init(&mmio_rwlock, NULL);
+}


Property changes on: trunk/usr.sbin/bhyve/mem.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mem.h
===================================================================
--- trunk/usr.sbin/bhyve/mem.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mem.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,62 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mem.h 270159 2014-08-19 01:20:24Z grehan $
+ */
+
+#ifndef _MEM_H_
+#define	_MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+	const char 	*name;
+	int		flags;
+	mem_func_t	handler;
+	void		*arg1;
+	long		arg2;
+	uint64_t  	base;
+	uint64_t  	size;
+};
+#define	MEM_F_READ		0x1
+#define	MEM_F_WRITE		0x2
+#define	MEM_F_RW		0x3
+#define	MEM_F_IMMUTABLE		0x4	/* mem_range cannot be unregistered */
+
+void	init_mem(void);
+int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
+		    struct vm_guest_paging *paging);
+		    
+int	register_mem(struct mem_range *memp);
+int	register_mem_fallback(struct mem_range *memp);
+int	unregister_mem(struct mem_range *memp);
+
+#endif	/* _MEM_H_ */


Property changes on: trunk/usr.sbin/bhyve/mem.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mevent.c
===================================================================
--- trunk/usr.sbin/bhyve/mevent.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mevent.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,457 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mevent.c 268953 2014-07-21 19:08:02Z jhb $
+ */
+
+/*
+ * Micro event library for FreeBSD, designed for a single i/o thread 
+ * using kqueue, and having events be persistent by default.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/mevent.c 268953 2014-07-21 19:08:02Z jhb $");
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/time.h>
+
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "mevent.h"
+
+#define	MEVENT_MAX	64
+
+#define	MEV_ADD		1
+#define	MEV_ENABLE	2
+#define	MEV_DISABLE	3
+#define	MEV_DEL_PENDING	4
+
+extern char *vmname;
+
+static pthread_t mevent_tid;
+static int mevent_timid = 43;
+static int mevent_pipefd[2];
+static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
+
+struct mevent {	
+	void	(*me_func)(int, enum ev_type, void *);
+#define me_msecs me_fd
+	int	me_fd;
+	int	me_timid;
+	enum ev_type me_type;
+	void    *me_param;
+	int	me_cq;
+	int	me_state;
+	int	me_closefd;
+	LIST_ENTRY(mevent) me_list;			   
+};
+
+static LIST_HEAD(listhead, mevent) global_head, change_head;
+
+static void
+mevent_qlock(void)
+{
+	pthread_mutex_lock(&mevent_lmutex);
+}
+
+static void
+mevent_qunlock(void)
+{
+	pthread_mutex_unlock(&mevent_lmutex);
+}
+
+static void
+mevent_pipe_read(int fd, enum ev_type type, void *param)
+{
+	char buf[MEVENT_MAX];
+	int status;
+
+	/*
+	 * Drain the pipe read side. The fd is non-blocking so this is
+	 * safe to do.
+	 */
+	do {
+		status = read(fd, buf, sizeof(buf));
+	} while (status == MEVENT_MAX);
+}
+
+static void
+mevent_notify(void)
+{
+	char c;
+	
+	/*
+	 * If calling from outside the i/o thread, write a byte on the
+	 * pipe to force the i/o thread to exit the blocking kevent call.
+	 */
+	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
+		write(mevent_pipefd[1], &c, 1);
+	}
+}
+
+static int
+mevent_kq_filter(struct mevent *mevp)
+{
+	int retval;
+
+	retval = 0;
+
+	if (mevp->me_type == EVF_READ)
+		retval = EVFILT_READ;
+
+	if (mevp->me_type == EVF_WRITE)
+		retval = EVFILT_WRITE;
+
+	if (mevp->me_type == EVF_TIMER)
+		retval = EVFILT_TIMER;
+
+	if (mevp->me_type == EVF_SIGNAL)
+		retval = EVFILT_SIGNAL;
+
+	return (retval);
+}
+
+static int
+mevent_kq_flags(struct mevent *mevp)
+{
+	int ret;
+
+	switch (mevp->me_state) {
+	case MEV_ADD:
+		ret = EV_ADD;		/* implicitly enabled */
+		break;
+	case MEV_ENABLE:
+		ret = EV_ENABLE;
+		break;
+	case MEV_DISABLE:
+		ret = EV_DISABLE;
+		break;
+	case MEV_DEL_PENDING:
+		ret = EV_DELETE;
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	return (ret);
+}
+
+static int
+mevent_kq_fflags(struct mevent *mevp)
+{
+	/* XXX nothing yet, perhaps EV_EOF for reads ? */
+	return (0);
+}
+
+static int
+mevent_build(int mfd, struct kevent *kev)
+{
+	struct mevent *mevp, *tmpp;
+	int i;
+
+	i = 0;
+
+	mevent_qlock();
+
+	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
+		if (mevp->me_closefd) {
+			/*
+			 * A close of the file descriptor will remove the
+			 * event
+			 */
+			close(mevp->me_fd);
+		} else {
+			if (mevp->me_type == EVF_TIMER) {
+				kev[i].ident = mevp->me_timid;
+				kev[i].data = mevp->me_msecs;
+			} else {
+				kev[i].ident = mevp->me_fd;
+				kev[i].data = 0;
+			}
+			kev[i].filter = mevent_kq_filter(mevp);
+			kev[i].flags = mevent_kq_flags(mevp);
+			kev[i].fflags = mevent_kq_fflags(mevp);
+			kev[i].udata = mevp;
+			i++;
+		}
+
+		mevp->me_cq = 0;
+		LIST_REMOVE(mevp, me_list);
+
+		if (mevp->me_state == MEV_DEL_PENDING) {
+			free(mevp);
+		} else {
+			LIST_INSERT_HEAD(&global_head, mevp, me_list);
+		}
+
+		assert(i < MEVENT_MAX);
+	}
+
+	mevent_qunlock();
+
+	return (i);
+}
+
+static void
+mevent_handle(struct kevent *kev, int numev)
+{
+	struct mevent *mevp;
+	int i;
+
+	for (i = 0; i < numev; i++) {
+		mevp = kev[i].udata;
+
+		/* XXX check for EV_ERROR ? */
+
+		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
+	}
+}
+
+struct mevent *
+mevent_add(int tfd, enum ev_type type,
+	   void (*func)(int, enum ev_type, void *), void *param)
+{
+	struct mevent *lp, *mevp;
+
+	if (tfd < 0 || func == NULL) {
+		return (NULL);
+	}
+
+	mevp = NULL;
+
+	mevent_qlock();
+
+	/*
+	 * Verify that the fd/type tuple is not present in any list
+	 */
+	LIST_FOREACH(lp, &global_head, me_list) {
+		if (type != EVF_TIMER && lp->me_fd == tfd &&
+		    lp->me_type == type) {
+			goto exit;
+		}
+	}
+
+	LIST_FOREACH(lp, &change_head, me_list) {
+		if (type != EVF_TIMER && lp->me_fd == tfd &&
+		    lp->me_type == type) {
+			goto exit;
+		}
+	}
+
+	/*
+	 * Allocate an entry, populate it, and add it to the change list.
+	 */
+	mevp = calloc(1, sizeof(struct mevent));
+	if (mevp == NULL) {
+		goto exit;
+	}
+
+	if (type == EVF_TIMER) {
+		mevp->me_msecs = tfd;
+		mevp->me_timid = mevent_timid++;
+	} else
+		mevp->me_fd = tfd;
+	mevp->me_type = type;
+	mevp->me_func = func;
+	mevp->me_param = param;
+
+	LIST_INSERT_HEAD(&change_head, mevp, me_list);
+	mevp->me_cq = 1;
+	mevp->me_state = MEV_ADD;
+	mevent_notify();
+
+exit:
+	mevent_qunlock();
+
+	return (mevp);
+}
+
+static int
+mevent_update(struct mevent *evp, int newstate)
+{
+	/*
+	 * It's not possible to enable/disable a deleted event
+	 */
+	if (evp->me_state == MEV_DEL_PENDING)
+		return (EINVAL);
+
+	/*
+	 * No update needed if state isn't changing
+	 */
+	if (evp->me_state == newstate)
+		return (0);
+	
+	mevent_qlock();
+
+	evp->me_state = newstate;
+
+	/*
+	 * Place the entry onto the changed list if not already there.
+	 */
+	if (evp->me_cq == 0) {
+		evp->me_cq = 1;
+		LIST_REMOVE(evp, me_list);
+		LIST_INSERT_HEAD(&change_head, evp, me_list);
+		mevent_notify();
+	}
+
+	mevent_qunlock();
+
+	return (0);
+}
+
+int
+mevent_enable(struct mevent *evp)
+{
+
+	return (mevent_update(evp, MEV_ENABLE));
+}
+
+int
+mevent_disable(struct mevent *evp)
+{
+
+	return (mevent_update(evp, MEV_DISABLE));
+}
+
+static int
+mevent_delete_event(struct mevent *evp, int closefd)
+{
+	mevent_qlock();
+
+	/*
+         * Place the entry onto the changed list if not already there, and
+	 * mark as to be deleted.
+         */
+        if (evp->me_cq == 0) {
+		evp->me_cq = 1;
+		LIST_REMOVE(evp, me_list);
+		LIST_INSERT_HEAD(&change_head, evp, me_list);
+		mevent_notify();
+        }
+	evp->me_state = MEV_DEL_PENDING;
+
+	if (closefd)
+		evp->me_closefd = 1;
+
+	mevent_qunlock();
+
+	return (0);
+}
+
+int
+mevent_delete(struct mevent *evp)
+{
+
+	return (mevent_delete_event(evp, 0));
+}
+
+int
+mevent_delete_close(struct mevent *evp)
+{
+
+	return (mevent_delete_event(evp, 1));
+}
+
+static void
+mevent_set_name(void)
+{
+
+	pthread_set_name_np(mevent_tid, "mevent");
+}
+
+void
+mevent_dispatch(void)
+{
+	struct kevent changelist[MEVENT_MAX];
+	struct kevent eventlist[MEVENT_MAX];
+	struct mevent *pipev;
+	int mfd;
+	int numev;
+	int ret;
+
+	mevent_tid = pthread_self();
+	mevent_set_name();
+
+	mfd = kqueue();
+	assert(mfd > 0);
+
+	/*
+	 * Open the pipe that will be used for other threads to force
+	 * the blocking kqueue call to exit by writing to it. Set the
+	 * descriptor to non-blocking.
+	 */
+	ret = pipe(mevent_pipefd);
+	if (ret < 0) {
+		perror("pipe");
+		exit(0);
+	}
+
+	/*
+	 * Add internal event handler for the pipe write fd
+	 */
+	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
+	assert(pipev != NULL);
+
+	for (;;) {
+		/*
+		 * Build changelist if required.
+		 * XXX the changelist can be put into the blocking call
+		 * to eliminate the extra syscall. Currently better for
+		 * debug.
+		 */
+		numev = mevent_build(mfd, changelist);
+		if (numev) {
+			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
+			if (ret == -1) {
+				perror("Error return from kevent change");
+			}
+		}
+
+		/*
+		 * Block awaiting events
+		 */
+		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
+		if (ret == -1 && errno != EINTR) {
+			perror("Error return from kevent monitor");
+		}
+		
+		/*
+		 * Handle reported events
+		 */
+		mevent_handle(eventlist, ret);
+	}			
+}


Property changes on: trunk/usr.sbin/bhyve/mevent.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mevent.h
===================================================================
--- trunk/usr.sbin/bhyve/mevent.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mevent.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,52 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mevent.h 261090 2014-01-23 20:35:32Z jhb $
+ */
+
+#ifndef	_MEVENT_H_
+#define	_MEVENT_H_
+
+enum ev_type {
+	EVF_READ,
+	EVF_WRITE,
+	EVF_TIMER,
+	EVF_SIGNAL
+};
+
+struct mevent;
+
+struct mevent *mevent_add(int fd, enum ev_type type, 
+			  void (*func)(int, enum ev_type, void *),
+			  void *param);
+int	mevent_enable(struct mevent *evp);
+int	mevent_disable(struct mevent *evp);
+int	mevent_delete(struct mevent *evp);
+int	mevent_delete_close(struct mevent *evp);
+
+void	mevent_dispatch(void);
+
+#endif	/* _MEVENT_H_ */


Property changes on: trunk/usr.sbin/bhyve/mevent.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mevent_test.c
===================================================================
--- trunk/usr.sbin/bhyve/mevent_test.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mevent_test.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,257 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mevent_test.c 255690 2013-09-19 04:48:26Z grehan $
+ */
+
+/*
+ * Test program for the micro event library. Set up a simple TCP echo
+ * service.
+ *
+ *  cc mevent_test.c mevent.c -lpthread
+ */
+
+#include <sys/types.h>
+#include <sys/stdint.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <machine/cpufunc.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "mevent.h"
+
+#define TEST_PORT	4321
+
+static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
+
+static struct mevent *tevp;
+
+char *vmname = "test vm";
+
+
+#define MEVENT_ECHO
+
+/* Number of timer events to capture */
+#define TEVSZ	4096
+uint64_t tevbuf[TEVSZ];
+
+static void
+timer_print(void)
+{
+	uint64_t min, max, diff, sum, tsc_freq;
+	size_t len;
+	int j;
+
+	min = UINT64_MAX;
+	max = 0;
+	sum = 0;
+
+	len = sizeof(tsc_freq);
+	sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
+
+	for (j = 1; j < TEVSZ; j++) {
+		/* Convert a tsc diff into microseconds */
+		diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
+		sum += diff;
+		if (min > diff)
+			min = diff;
+		if (max < diff)
+			max = diff;
+	}
+
+	printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max,
+	    sum/(TEVSZ - 1));
+}
+
+static void
+timer_callback(int fd, enum ev_type type, void *param)
+{
+	static int i;
+
+	if (i >= TEVSZ)
+		abort();
+
+	tevbuf[i++] = rdtsc();
+
+	if (i == TEVSZ) {
+		mevent_delete(tevp);
+		timer_print();
+	}
+}
+
+
+#ifdef MEVENT_ECHO
+struct esync {
+	pthread_mutex_t	e_mt;
+	pthread_cond_t	e_cond;       
+};
+
+static void
+echoer_callback(int fd, enum ev_type type, void *param)
+{
+	struct esync *sync = param;
+
+	pthread_mutex_lock(&sync->e_mt);
+	pthread_cond_signal(&sync->e_cond);
+	pthread_mutex_unlock(&sync->e_mt);
+}
+
+static void *
+echoer(void *param)
+{
+	struct esync sync;
+	struct mevent *mev;
+	char buf[128];
+	int fd = (int)(uintptr_t) param;
+	int len;
+
+	pthread_mutex_init(&sync.e_mt, NULL);
+	pthread_cond_init(&sync.e_cond, NULL);
+
+	pthread_mutex_lock(&sync.e_mt);
+
+	mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
+	if (mev == NULL) {
+		printf("Could not allocate echoer event\n");
+		exit(1);
+	}
+
+	while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
+		len = read(fd, buf, sizeof(buf));
+		if (len > 0) {
+			write(fd, buf, len);
+			write(0, buf, len);
+		} else {
+			break;
+		}
+	}
+
+	mevent_delete_close(mev);
+
+	pthread_mutex_unlock(&sync.e_mt);
+	pthread_mutex_destroy(&sync.e_mt);
+	pthread_cond_destroy(&sync.e_cond);
+
+	return (NULL);
+}
+
+#else
+
+static void *
+echoer(void *param)
+{
+	char buf[128];
+	int fd = (int)(uintptr_t) param;
+	int len;
+
+	while ((len = read(fd, buf, sizeof(buf))) > 0) {
+		write(1, buf, len);
+	}
+
+	return (NULL);
+}
+#endif /* MEVENT_ECHO */
+
+static void
+acceptor_callback(int fd, enum ev_type type, void *param)
+{
+	pthread_mutex_lock(&accept_mutex);
+	pthread_cond_signal(&accept_condvar);
+	pthread_mutex_unlock(&accept_mutex);
+}
+
+static void *
+acceptor(void *param)
+{
+	struct sockaddr_in sin;
+	pthread_t tid;
+	int news;
+	int s;
+	static int first;
+
+        if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+                perror("socket");
+                exit(1);
+        }
+
+        sin.sin_len = sizeof(sin);
+        sin.sin_family = AF_INET;
+        sin.sin_addr.s_addr = htonl(INADDR_ANY);
+        sin.sin_port = htons(TEST_PORT);
+
+        if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+                perror("bind");
+                exit(1);
+        }
+
+        if (listen(s, 1) < 0) {
+                perror("listen");
+                exit(1);
+        }
+
+	(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
+
+	pthread_mutex_lock(&accept_mutex);
+
+	while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
+		news = accept(s, NULL, NULL);
+		if (news < 0) {
+			perror("accept error");
+		} else {
+			static int first = 1;
+
+			if (first) {
+				/*
+				 * Start a timer
+				 */
+				first = 0;
+				tevp = mevent_add(1, EVF_TIMER, timer_callback,
+						  NULL);
+			}
+
+			printf("incoming connection, spawning thread\n");
+			pthread_create(&tid, NULL, echoer,
+				       (void *)(uintptr_t)news);
+		}
+	}
+
+	return (NULL);
+}
+
+main()
+{
+	pthread_t tid;
+
+	pthread_create(&tid, NULL, acceptor, NULL);
+
+	mevent_dispatch();
+}


Property changes on: trunk/usr.sbin/bhyve/mevent_test.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mptbl.c
===================================================================
--- trunk/usr.sbin/bhyve/mptbl.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mptbl.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,378 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mptbl.c 268972 2014-07-22 03:14:37Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/mptbl.c 268972 2014-07-22 03:14:37Z jhb $");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <x86/mptable.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include "acpi.h"
+#include "bhyverun.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+
+#define MPTABLE_BASE		0xF0000
+
+/* floating pointer length + maximum length of configuration table */
+#define	MPTABLE_MAX_LENGTH	(65536 + 16)
+
+#define LAPIC_PADDR		0xFEE00000
+#define LAPIC_VERSION 		16
+
+#define IOAPIC_PADDR		0xFEC00000
+#define IOAPIC_VERSION		0x11
+
+#define MP_SPECREV		4
+#define MPFP_SIG		"_MP_"
+
+/* Configuration header defines */
+#define MPCH_SIG		"PCMP"
+#define MPCH_OEMID		"BHyVe   "
+#define MPCH_OEMID_LEN          8
+#define MPCH_PRODID             "Hypervisor  "
+#define MPCH_PRODID_LEN         12
+
+/* Processor entry defines */
+#define MPEP_SIG_FAMILY		6	/* XXX bhyve should supply this */
+#define MPEP_SIG_MODEL		26
+#define MPEP_SIG_STEPPING	5
+#define MPEP_SIG		\
+	((MPEP_SIG_FAMILY << 8) | \
+	 (MPEP_SIG_MODEL << 4)	| \
+	 (MPEP_SIG_STEPPING))
+
+#define MPEP_FEATURES           (0xBFEBFBFF) /* XXX Intel i7 */
+
+/* Number of local intr entries */
+#define	MPEII_NUM_LOCAL_IRQ	2
+
+/* Bus entry defines */
+#define MPE_NUM_BUSES		2
+#define MPE_BUSNAME_LEN		6
+#define MPE_BUSNAME_ISA		"ISA   "
+#define MPE_BUSNAME_PCI		"PCI   "
+
+static void *oem_tbl_start;
+static int oem_tbl_size;
+
+static uint8_t
+mpt_compute_checksum(void *base, size_t len)
+{
+	uint8_t	*bytes;
+	uint8_t	sum;
+
+	for(bytes = base, sum = 0; len > 0; len--) {
+		sum += *bytes++;
+	}
+
+	return (256 - sum);
+}
+
+static void
+mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa)
+{
+
+	memset(mpfp, 0, sizeof(*mpfp));
+	memcpy(mpfp->signature, MPFP_SIG, 4);
+	mpfp->pap = gpa + sizeof(*mpfp);
+	mpfp->length = 1;
+	mpfp->spec_rev = MP_SPECREV;
+	mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp));
+}
+
+static void
+mpt_build_mpch(mpcth_t mpch)
+{
+
+	memset(mpch, 0, sizeof(*mpch));
+	memcpy(mpch->signature, MPCH_SIG, 4);
+	mpch->spec_rev = MP_SPECREV;
+	memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN);
+	memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN);
+	mpch->apic_address = LAPIC_PADDR;
+}
+
+static void
+mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
+{
+	int i;
+
+	for (i = 0; i < ncpu; i++) {
+		memset(mpep, 0, sizeof(*mpep));
+		mpep->type = MPCT_ENTRY_PROCESSOR;
+		mpep->apic_id = i; // XXX
+		mpep->apic_version = LAPIC_VERSION;
+		mpep->cpu_flags = PROCENTRY_FLAG_EN;
+		if (i == 0)
+			mpep->cpu_flags |= PROCENTRY_FLAG_BP;
+		mpep->cpu_signature = MPEP_SIG;
+		mpep->feature_flags = MPEP_FEATURES;
+		mpep++;
+	}
+}
+
+static void
+mpt_build_localint_entries(int_entry_ptr mpie)
+{
+
+	/* Hardcode LINT0 as ExtINT on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_EXTINT;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 0;
+	mpie++;
+
+	/* Hardcode LINT1 as NMI on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_NMI;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 1;
+}
+
+static void
+mpt_build_bus_entries(bus_entry_ptr mpeb)
+{
+
+	memset(mpeb, 0, sizeof(*mpeb));
+	mpeb->type = MPCT_ENTRY_BUS;
+	mpeb->bus_id = 0;
+	memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
+	mpeb++;
+
+	memset(mpeb, 0, sizeof(*mpeb));
+	mpeb->type = MPCT_ENTRY_BUS;
+	mpeb->bus_id = 1;	
+	memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
+}
+
+static void
+mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id)
+{
+
+	memset(mpei, 0, sizeof(*mpei));
+	mpei->type = MPCT_ENTRY_IOAPIC;
+	mpei->apic_id = id;
+	mpei->apic_version = IOAPIC_VERSION;
+	mpei->apic_flags = IOAPICENTRY_FLAG_EN;
+	mpei->apic_address = IOAPIC_PADDR;
+}
+
+static int
+mpt_count_ioint_entries(void)
+{
+	int bus, count;
+
+	count = 0;
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		count += pci_count_lintr(bus);
+
+	/*
+	 * Always include entries for the first 16 pins along with a entry
+	 * for each active PCI INTx pin.
+	 */
+	return (16 + count);
+}
+
+static void
+mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+	int_entry_ptr *mpiep, mpie;
+
+	mpiep = arg;
+	mpie = *mpiep;
+	memset(mpie, 0, sizeof(*mpie));
+
+	/*
+	 * This is always after another I/O interrupt entry, so cheat
+	 * and fetch the I/O APIC ID from the prior entry.
+	 */
+	mpie->type = MPCT_ENTRY_INT;
+	mpie->int_type = INTENTRY_TYPE_INT;
+	mpie->src_bus_id = bus;
+	mpie->src_bus_irq = slot << 2 | (pin - 1);
+	mpie->dst_apic_id = mpie[-1].dst_apic_id;
+	mpie->dst_apic_int = ioapic_irq;
+
+	*mpiep = mpie + 1;
+}
+
+static void
+mpt_build_ioint_entries(int_entry_ptr mpie, int id)
+{
+	int pin, bus;
+
+	/*
+	 * The following config is taken from kernel mptable.c
+	 * mptable_parse_default_config_ints(...), for now 
+	 * just use the default config, tweek later if needed.
+	 */
+
+	/* First, generate the first 16 pins. */
+	for (pin = 0; pin < 16; pin++) {
+		memset(mpie, 0, sizeof(*mpie));
+		mpie->type = MPCT_ENTRY_INT;
+		mpie->src_bus_id = 1;
+		mpie->dst_apic_id = id;
+
+		/*
+		 * All default configs route IRQs from bus 0 to the first 16
+		 * pins of the first I/O APIC with an APIC ID of 2.
+		 */
+		mpie->dst_apic_int = pin;
+		switch (pin) {
+		case 0:
+			/* Pin 0 is an ExtINT pin. */
+			mpie->int_type = INTENTRY_TYPE_EXTINT;
+			break;
+		case 2:
+			/* IRQ 0 is routed to pin 2. */
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = 0;
+			break;
+		case SCI_INT:
+			/* ACPI SCI is level triggered and active-lo. */
+			mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
+			    INTENTRY_FLAGS_TRIGGER_LEVEL;
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = SCI_INT;
+			break;
+		default:
+			/* All other pins are identity mapped. */
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = pin;
+			break;
+		}
+		mpie++;
+	}
+
+	/* Next, generate entries for any PCI INTx interrupts. */
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); 
+}
+
+void
+mptable_add_oemtbl(void *tbl, int tblsz)
+{
+
+	oem_tbl_start = tbl;
+	oem_tbl_size = tblsz;
+}
+
+int
+mptable_build(struct vmctx *ctx, int ncpu)
+{
+	mpcth_t			mpch;
+	bus_entry_ptr		mpeb;
+	io_apic_entry_ptr	mpei;
+	proc_entry_ptr		mpep;
+	mpfps_t			mpfp;
+	int_entry_ptr		mpie;
+	int			ioints, bus;
+	char 			*curraddr;
+	char 			*startaddr;
+
+	startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH);
+	if (startaddr == NULL) {
+		fprintf(stderr, "mptable requires mapped mem\n");
+		return (ENOMEM);
+	}
+
+	/*
+	 * There is no way to advertise multiple PCI hierarchies via MPtable
+	 * so require that there is no PCI hierarchy with a non-zero bus
+	 * number.
+	 */
+	for (bus = 1; bus <= PCI_BUSMAX; bus++) {
+		if (pci_bus_configured(bus)) {
+			fprintf(stderr, "MPtable is incompatible with "
+			    "multiple PCI hierarchies.\r\n");
+			fprintf(stderr, "MPtable generation can be disabled "
+			    "by passing the -Y option to bhyve(8).\r\n");
+			return (EINVAL);
+		}
+	}
+
+	curraddr = startaddr;
+	mpfp = (mpfps_t)curraddr;
+	mpt_build_mpfp(mpfp, MPTABLE_BASE);
+	curraddr += sizeof(*mpfp);
+
+	mpch = (mpcth_t)curraddr;
+	mpt_build_mpch(mpch);
+	curraddr += sizeof(*mpch);
+
+	mpep = (proc_entry_ptr)curraddr;
+	mpt_build_proc_entries(mpep, ncpu);
+	curraddr += sizeof(*mpep) * ncpu;
+	mpch->entry_count += ncpu;
+
+	mpeb = (bus_entry_ptr) curraddr;
+	mpt_build_bus_entries(mpeb);
+	curraddr += sizeof(*mpeb) * MPE_NUM_BUSES;
+	mpch->entry_count += MPE_NUM_BUSES;
+
+	mpei = (io_apic_entry_ptr)curraddr;
+	mpt_build_ioapic_entries(mpei, 0);
+	curraddr += sizeof(*mpei);
+	mpch->entry_count++;
+
+	mpie = (int_entry_ptr) curraddr;
+	ioints = mpt_count_ioint_entries();
+	mpt_build_ioint_entries(mpie, 0);
+	curraddr += sizeof(*mpie) * ioints;
+	mpch->entry_count += ioints;
+
+	mpie = (int_entry_ptr)curraddr;
+	mpt_build_localint_entries(mpie);
+	curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
+	mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
+
+	if (oem_tbl_start) {
+		mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
+		mpch->oem_table_size = oem_tbl_size;
+		memcpy(curraddr, oem_tbl_start, oem_tbl_size);
+	}
+
+	mpch->base_table_length = curraddr - (char *)mpch;
+	mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length);
+
+	return (0);
+}


Property changes on: trunk/usr.sbin/bhyve/mptbl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/mptbl.h
===================================================================
--- trunk/usr.sbin/bhyve/mptbl.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/mptbl.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,36 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/mptbl.h 259301 2013-12-13 06:59:18Z grehan $
+ */
+
+#ifndef _MPTBL_H_
+#define _MPTBL_H_
+
+int	mptable_build(struct vmctx *ctx, int ncpu);
+void	mptable_add_oemtbl(void *tbl, int tblsz);
+
+#endif /* _MPTBL_H_ */


Property changes on: trunk/usr.sbin/bhyve/mptbl.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_ahci.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_ahci.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_ahci.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,2477 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013  Zhixiang Yu <zcore at freebsd.org>
+ * Copyright (c) 2015-2016 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 317001 2017-04-16 06:00:14Z mav $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 317001 2017-04-16 06:00:14Z mav $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+#include <sys/ata.h>
+#include <sys/endian.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <inttypes.h>
+#include <md5.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "ahci.h"
+#include "block_if.h"
+
+#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
+#define	MAX_PORTS	32	/* AHCI supports 32 ports */
+
+#define	PxSIG_ATA	0x00000101 /* ATA drive */
+#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
+
+enum sata_fis_type {
+	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
+	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
+	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
+	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
+	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
+	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
+	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
+	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
+};
+
+/*
+ * SCSI opcodes
+ */
+#define	TEST_UNIT_READY		0x00
+#define	REQUEST_SENSE		0x03
+#define	INQUIRY			0x12
+#define	START_STOP_UNIT		0x1B
+#define	PREVENT_ALLOW		0x1E
+#define	READ_CAPACITY		0x25
+#define	READ_10			0x28
+#define	POSITION_TO_ELEMENT	0x2B
+#define	READ_TOC		0x43
+#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
+#define	MODE_SENSE_10		0x5A
+#define	REPORT_LUNS		0xA0
+#define	READ_12			0xA8
+#define	READ_CD			0xBE
+
+/*
+ * SCSI mode page codes
+ */
+#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
+#define	MODEPAGE_CD_CAPABILITIES	0x2A
+
+/*
+ * ATA commands
+ */
+#define	ATA_SF_ENAB_SATA_SF		0x10
+#define		ATA_SATA_SF_AN		0x05
+#define	ATA_SF_DIS_SATA_SF		0x90
+
+/*
+ * Debug printf
+ */
+#ifdef AHCI_DEBUG
+static FILE *dbg;
+#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
+#else
+#define DPRINTF(format, arg...)
+#endif
+#define WPRINTF(format, arg...) printf(format, ##arg)
+
+struct ahci_ioreq {
+	struct blockif_req io_req;
+	struct ahci_port *io_pr;
+	STAILQ_ENTRY(ahci_ioreq) io_flist;
+	TAILQ_ENTRY(ahci_ioreq) io_blist;
+	uint8_t *cfis;
+	uint32_t len;
+	uint32_t done;
+	int slot;
+	int more;
+};
+
+struct ahci_port {
+	struct blockif_ctxt *bctx;
+	struct pci_ahci_softc *pr_sc;
+	uint8_t *cmd_lst;
+	uint8_t *rfis;
+	char ident[20 + 1];
+	int port;
+	int atapi;
+	int reset;
+	int waitforclear;
+	int mult_sectors;
+	uint8_t xfermode;
+	uint8_t err_cfis[20];
+	uint8_t sense_key;
+	uint8_t asc;
+	u_int ccs;
+	uint32_t pending;
+
+	uint32_t clb;
+	uint32_t clbu;
+	uint32_t fb;
+	uint32_t fbu;
+	uint32_t is;
+	uint32_t ie;
+	uint32_t cmd;
+	uint32_t unused0;
+	uint32_t tfd;
+	uint32_t sig;
+	uint32_t ssts;
+	uint32_t sctl;
+	uint32_t serr;
+	uint32_t sact;
+	uint32_t ci;
+	uint32_t sntf;
+	uint32_t fbs;
+
+	/*
+	 * i/o request info
+	 */
+	struct ahci_ioreq *ioreq;
+	int ioqsz;
+	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
+	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
+};
+
+struct ahci_cmd_hdr {
+	uint16_t flags;
+	uint16_t prdtl;
+	uint32_t prdbc;
+	uint64_t ctba;
+	uint32_t reserved[4];
+};
+
+struct ahci_prdt_entry {
+	uint64_t dba;
+	uint32_t reserved;
+#define	DBCMASK		0x3fffff
+	uint32_t dbc;
+};
+
+struct pci_ahci_softc {
+	struct pci_devinst *asc_pi;
+	pthread_mutex_t	mtx;
+	int ports;
+	uint32_t cap;
+	uint32_t ghc;
+	uint32_t is;
+	uint32_t pi;
+	uint32_t vs;
+	uint32_t ccc_ctl;
+	uint32_t ccc_pts;
+	uint32_t em_loc;
+	uint32_t em_ctl;
+	uint32_t cap2;
+	uint32_t bohc;
+	uint32_t lintr;
+	struct ahci_port port[MAX_PORTS];
+};
+#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
+
+static void ahci_handle_port(struct ahci_port *p);
+
+static inline void lba_to_msf(uint8_t *buf, int lba)
+{
+	lba += 150;
+	buf[0] = (lba / 75) / 60;
+	buf[1] = (lba / 75) % 60;
+	buf[2] = lba % 75;
+}
+
+/*
+ * Generate HBA interrupts on global IS register write.
+ */
+static void
+ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
+{
+	struct pci_devinst *pi = sc->asc_pi;
+	struct ahci_port *p;
+	int i, nmsg;
+	uint32_t mmask;
+
+	/* Update global IS from PxIS/PxIE. */
+	for (i = 0; i < sc->ports; i++) {
+		p = &sc->port[i];
+		if (p->is & p->ie)
+			sc->is |= (1 << i);
+	}
+	DPRINTF("%s(%08x) %08x\n", __func__, mask, sc->is);
+
+	/* If there is nothing enabled -- clear legacy interrupt and exit. */
+	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
+		if (sc->lintr) {
+			pci_lintr_deassert(pi);
+			sc->lintr = 0;
+		}
+		return;
+	}
+
+	/* If there is anything and no MSI -- assert legacy interrupt. */
+	nmsg = pci_msi_maxmsgnum(pi);
+	if (nmsg == 0) {
+		if (!sc->lintr) {
+			sc->lintr = 1;
+			pci_lintr_assert(pi);
+		}
+		return;
+	}
+
+	/* Assert respective MSIs for ports that were touched. */
+	for (i = 0; i < nmsg; i++) {
+		if (sc->ports <= nmsg || i < nmsg - 1)
+			mmask = 1 << i;
+		else
+			mmask = 0xffffffff << i;
+		if (sc->is & mask && mmask & mask)
+			pci_generate_msi(pi, i);
+	}
+}
+
+/*
+ * Generate HBA interrupt on specific port event.
+ */
+static void
+ahci_port_intr(struct ahci_port *p)
+{
+	struct pci_ahci_softc *sc = p->pr_sc;
+	struct pci_devinst *pi = sc->asc_pi;
+	int nmsg;
+
+	DPRINTF("%s(%d) %08x/%08x %08x\n", __func__,
+	    p->port, p->is, p->ie, sc->is);
+
+	/* If there is nothing enabled -- we are done. */
+	if ((p->is & p->ie) == 0)
+		return;
+
+	/* In case of non-shared MSI always generate interrupt. */
+	nmsg = pci_msi_maxmsgnum(pi);
+	if (sc->ports <= nmsg || p->port < nmsg - 1) {
+		sc->is |= (1 << p->port);
+		if ((sc->ghc & AHCI_GHC_IE) == 0)
+			return;
+		pci_generate_msi(pi, p->port);
+		return;
+	}
+
+	/* If IS for this port is already set -- do nothing. */
+	if (sc->is & (1 << p->port))
+		return;
+
+	sc->is |= (1 << p->port);
+
+	/* If interrupts are enabled -- generate one. */
+	if ((sc->ghc & AHCI_GHC_IE) == 0)
+		return;
+	if (nmsg > 0) {
+		pci_generate_msi(pi, nmsg - 1);
+	} else if (!sc->lintr) {
+		sc->lintr = 1;
+		pci_lintr_assert(pi);
+	}
+}
+
+static void
+ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
+{
+	int offset, len, irq;
+
+	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
+		return;
+
+	switch (ft) {
+	case FIS_TYPE_REGD2H:
+		offset = 0x40;
+		len = 20;
+		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
+		break;
+	case FIS_TYPE_SETDEVBITS:
+		offset = 0x58;
+		len = 8;
+		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
+		break;
+	case FIS_TYPE_PIOSETUP:
+		offset = 0x20;
+		len = 20;
+		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
+		break;
+	default:
+		WPRINTF("unsupported fis type %d\n", ft);
+		return;
+	}
+	if (fis[2] & ATA_S_ERROR) {
+		p->waitforclear = 1;
+		irq |= AHCI_P_IX_TFE;
+	}
+	memcpy(p->rfis + offset, fis, len);
+	if (irq) {
+		if (~p->is & irq) {
+			p->is |= irq;
+			ahci_port_intr(p);
+		}
+	}
+}
+
+static void
+ahci_write_fis_piosetup(struct ahci_port *p)
+{
+	uint8_t fis[20];
+
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_PIOSETUP;
+	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
+}
+
+static void
+ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
+{
+	uint8_t fis[8];
+	uint8_t error;
+
+	error = (tfd >> 8) & 0xff;
+	tfd &= 0x77;
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_SETDEVBITS;
+	fis[1] = (1 << 6);
+	fis[2] = tfd;
+	fis[3] = error;
+	if (fis[2] & ATA_S_ERROR) {
+		p->err_cfis[0] = slot;
+		p->err_cfis[2] = tfd;
+		p->err_cfis[3] = error;
+		memcpy(&p->err_cfis[4], cfis + 4, 16);
+	} else {
+		*(uint32_t *)(fis + 4) = (1 << slot);
+		p->sact &= ~(1 << slot);
+	}
+	p->tfd &= ~0x77;
+	p->tfd |= tfd;
+	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
+}
+
+static void
+ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
+{
+	uint8_t fis[20];
+	uint8_t error;
+
+	error = (tfd >> 8) & 0xff;
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_REGD2H;
+	fis[1] = (1 << 6);
+	fis[2] = tfd & 0xff;
+	fis[3] = error;
+	fis[4] = cfis[4];
+	fis[5] = cfis[5];
+	fis[6] = cfis[6];
+	fis[7] = cfis[7];
+	fis[8] = cfis[8];
+	fis[9] = cfis[9];
+	fis[10] = cfis[10];
+	fis[11] = cfis[11];
+	fis[12] = cfis[12];
+	fis[13] = cfis[13];
+	if (fis[2] & ATA_S_ERROR) {
+		p->err_cfis[0] = 0x80;
+		p->err_cfis[2] = tfd & 0xff;
+		p->err_cfis[3] = error;
+		memcpy(&p->err_cfis[4], cfis + 4, 16);
+	} else
+		p->ci &= ~(1 << slot);
+	p->tfd = tfd;
+	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
+ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
+{
+	uint8_t fis[20];
+
+	p->tfd = ATA_S_READY | ATA_S_DSC;
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_REGD2H;
+	fis[1] = 0;			/* No interrupt */
+	fis[2] = p->tfd;		/* Status */
+	fis[3] = 0;			/* No error */
+	p->ci &= ~(1 << slot);
+	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
+ahci_write_reset_fis_d2h(struct ahci_port *p)
+{
+	uint8_t fis[20];
+
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_REGD2H;
+	fis[3] = 1;
+	fis[4] = 1;
+	if (p->atapi) {
+		fis[5] = 0x14;
+		fis[6] = 0xeb;
+	}
+	fis[12] = 1;
+	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
+ahci_check_stopped(struct ahci_port *p)
+{
+	/*
+	 * If we are no longer processing the command list and nothing
+	 * is in-flight, clear the running bit, the current command
+	 * slot, the command issue and active bits.
+	 */
+	if (!(p->cmd & AHCI_P_CMD_ST)) {
+		if (p->pending == 0) {
+			p->ccs = 0;
+			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
+			p->ci = 0;
+			p->sact = 0;
+			p->waitforclear = 0;
+		}
+	}
+}
+
+static void
+ahci_port_stop(struct ahci_port *p)
+{
+	struct ahci_ioreq *aior;
+	uint8_t *cfis;
+	int slot;
+	int ncq;
+	int error;
+
+	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
+
+	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
+		/*
+		 * Try to cancel the outstanding blockif request.
+		 */
+		error = blockif_cancel(p->bctx, &aior->io_req);
+		if (error != 0)
+			continue;
+
+		slot = aior->slot;
+		cfis = aior->cfis;
+		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
+		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
+			ncq = 1;
+
+		if (ncq)
+			p->sact &= ~(1 << slot);
+		else
+			p->ci &= ~(1 << slot);
+
+		/*
+		 * This command is now done.
+		 */
+		p->pending &= ~(1 << slot);
+
+		/*
+		 * Delete the blockif request from the busy list
+		 */
+		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+		/*
+		 * Move the blockif request back to the free list
+		 */
+		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+	}
+
+	ahci_check_stopped(p);
+}
+
+static void
+ahci_port_reset(struct ahci_port *pr)
+{
+	pr->serr = 0;
+	pr->sact = 0;
+	pr->xfermode = ATA_UDMA6;
+	pr->mult_sectors = 128;
+
+	if (!pr->bctx) {
+		pr->ssts = ATA_SS_DET_NO_DEVICE;
+		pr->sig = 0xFFFFFFFF;
+		pr->tfd = 0x7F;
+		return;
+	}
+	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
+	if (pr->sctl & ATA_SC_SPD_MASK)
+		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
+	else
+		pr->ssts |= ATA_SS_SPD_GEN3;
+	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
+	if (!pr->atapi) {
+		pr->sig = PxSIG_ATA;
+		pr->tfd |= ATA_S_READY;
+	} else
+		pr->sig = PxSIG_ATAPI;
+	ahci_write_reset_fis_d2h(pr);
+}
+
+static void
+ahci_reset(struct pci_ahci_softc *sc)
+{
+	int i;
+
+	sc->ghc = AHCI_GHC_AE;
+	sc->is = 0;
+
+	if (sc->lintr) {
+		pci_lintr_deassert(sc->asc_pi);
+		sc->lintr = 0;
+	}
+
+	for (i = 0; i < sc->ports; i++) {
+		sc->port[i].ie = 0;
+		sc->port[i].is = 0;
+		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
+		if (sc->port[i].bctx)
+			sc->port[i].cmd |= AHCI_P_CMD_CPS;
+		sc->port[i].sctl = 0;
+		ahci_port_reset(&sc->port[i]);
+	}
+}
+
+static void
+ata_string(uint8_t *dest, const char *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (*src)
+			dest[i ^ 1] = *src++;
+		else
+			dest[i ^ 1] = ' ';
+	}
+}
+
+static void
+atapi_string(uint8_t *dest, const char *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (*src)
+			dest[i] = *src++;
+		else
+			dest[i] = ' ';
+	}
+}
+
+/*
+ * Build up the iovec based on the PRDT, 'done' and 'len'.
+ */
+static void
+ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
+    struct ahci_prdt_entry *prdt, uint16_t prdtl)
+{
+	struct blockif_req *breq = &aior->io_req;
+	int i, j, skip, todo, left, extra;
+	uint32_t dbcsz;
+
+	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
+	skip = aior->done;
+	left = aior->len - aior->done;
+	todo = 0;
+	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
+	    i++, prdt++) {
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		/* Skip already done part of the PRDT */
+		if (dbcsz <= skip) {
+			skip -= dbcsz;
+			continue;
+		}
+		dbcsz -= skip;
+		if (dbcsz > left)
+			dbcsz = left;
+		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
+		    prdt->dba + skip, dbcsz);
+		breq->br_iov[j].iov_len = dbcsz;
+		todo += dbcsz;
+		left -= dbcsz;
+		skip = 0;
+		j++;
+	}
+
+	/* If we got limited by IOV length, round I/O down to sector size. */
+	if (j == BLOCKIF_IOV_MAX) {
+		extra = todo % blockif_sectsz(p->bctx);
+		todo -= extra;
+		assert(todo > 0);
+		while (extra > 0) {
+			if (breq->br_iov[j - 1].iov_len > extra) {
+				breq->br_iov[j - 1].iov_len -= extra;
+				break;
+			}
+			extra -= breq->br_iov[j - 1].iov_len;
+			j--;
+		}
+	}
+
+	breq->br_iovcnt = j;
+	breq->br_resid = todo;
+	aior->done += todo;
+	aior->more = (aior->done < aior->len && i < prdtl);
+}
+
+static void
+ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	struct ahci_prdt_entry *prdt;
+	struct ahci_cmd_hdr *hdr;
+	uint64_t lba;
+	uint32_t len;
+	int err, first, ncq, readop;
+
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	ncq = 0;
+	readop = 1;
+	first = (done == 0);
+
+	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
+	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
+	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
+	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
+		readop = 0;
+
+	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
+		lba = ((uint64_t)cfis[10] << 40) |
+			((uint64_t)cfis[9] << 32) |
+			((uint64_t)cfis[8] << 24) |
+			((uint64_t)cfis[6] << 16) |
+			((uint64_t)cfis[5] << 8) |
+			cfis[4];
+		len = cfis[11] << 8 | cfis[3];
+		if (!len)
+			len = 65536;
+		ncq = 1;
+	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
+	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
+	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
+		lba = ((uint64_t)cfis[10] << 40) |
+			((uint64_t)cfis[9] << 32) |
+			((uint64_t)cfis[8] << 24) |
+			((uint64_t)cfis[6] << 16) |
+			((uint64_t)cfis[5] << 8) |
+			cfis[4];
+		len = cfis[13] << 8 | cfis[12];
+		if (!len)
+			len = 65536;
+	} else {
+		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
+			(cfis[5] << 8) | cfis[4];
+		len = cfis[12];
+		if (!len)
+			len = 256;
+	}
+	lba *= blockif_sectsz(p->bctx);
+	len *= blockif_sectsz(p->bctx);
+
+	/* Pull request off free list */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	breq = &aior->io_req;
+	breq->br_offset = lba + done;
+	ahci_build_iov(p, aior, prdt, hdr->prdtl);
+
+	/* Mark this command in-flight. */
+	p->pending |= 1 << slot;
+
+	/* Stuff request onto busy list. */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	if (ncq && first)
+		ahci_write_fis_d2h_ncq(p, slot);
+
+	if (readop)
+		err = blockif_read(p->bctx, breq);
+	else
+		err = blockif_write(p->bctx, breq);
+	assert(err == 0);
+}
+
+static void
+ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	int err;
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = 0;
+	aior->done = 0;
+	aior->more = 0;
+	breq = &aior->io_req;
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	err = blockif_flush(p->bctx, breq);
+	assert(err == 0);
+}
+
+static inline void
+read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
+		void *buf, int size)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	void *to;
+	int i, len;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	len = size;
+	to = buf;
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	for (i = 0; i < hdr->prdtl && len; i++) {
+		uint8_t *ptr;
+		uint32_t dbcsz;
+		int sublen;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
+		sublen = len < dbcsz ? len : dbcsz;
+		memcpy(to, ptr, sublen);
+		len -= sublen;
+		to += sublen;
+		prdt++;
+	}
+}
+
+static void
+ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	uint8_t *entry;
+	uint64_t elba;
+	uint32_t len, elen;
+	int err, first, ncq;
+	uint8_t buf[512];
+
+	first = (done == 0);
+	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
+		len = (uint16_t)cfis[13] << 8 | cfis[12];
+		len *= 512;
+		ncq = 0;
+	} else { /* ATA_SEND_FPDMA_QUEUED */
+		len = (uint16_t)cfis[11] << 8 | cfis[3];
+		len *= 512;
+		ncq = 1;
+	}
+	read_prdt(p, slot, cfis, buf, sizeof(buf));
+
+next:
+	entry = &buf[done];
+	elba = ((uint64_t)entry[5] << 40) |
+		((uint64_t)entry[4] << 32) |
+		((uint64_t)entry[3] << 24) |
+		((uint64_t)entry[2] << 16) |
+		((uint64_t)entry[1] << 8) |
+		entry[0];
+	elen = (uint16_t)entry[7] << 8 | entry[6];
+	done += 8;
+	if (elen == 0) {
+		if (done >= len) {
+			if (ncq) {
+				if (first)
+					ahci_write_fis_d2h_ncq(p, slot);
+				ahci_write_fis_sdb(p, slot, cfis,
+				    ATA_S_READY | ATA_S_DSC);
+			} else {
+				ahci_write_fis_d2h(p, slot, cfis,
+				    ATA_S_READY | ATA_S_DSC);
+			}
+			p->pending &= ~(1 << slot);
+			ahci_check_stopped(p);
+			if (!first)
+				ahci_handle_port(p);
+			return;
+		}
+		goto next;
+	}
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	aior->more = (len != done);
+
+	breq = &aior->io_req;
+	breq->br_offset = elba * blockif_sectsz(p->bctx);
+	breq->br_resid = elen * blockif_sectsz(p->bctx);
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	if (ncq && first)
+		ahci_write_fis_d2h_ncq(p, slot);
+
+	err = blockif_delete(p->bctx, breq);
+	assert(err == 0);
+}
+
+static inline void
+write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
+		void *buf, int size)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	void *from;
+	int i, len;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	len = size;
+	from = buf;
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	for (i = 0; i < hdr->prdtl && len; i++) {
+		uint8_t *ptr;
+		uint32_t dbcsz;
+		int sublen;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
+		sublen = len < dbcsz ? len : dbcsz;
+		memcpy(ptr, from, sublen);
+		len -= sublen;
+		from += sublen;
+		prdt++;
+	}
+	hdr->prdbc = size - len;
+}
+
+static void
+ahci_checksum(uint8_t *buf, int size)
+{
+	int i;
+	uint8_t sum = 0;
+
+	for (i = 0; i < size - 1; i++)
+		sum += buf[i];
+	buf[size - 1] = 0x100 - sum;
+}
+
+static void
+ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	struct ahci_cmd_hdr *hdr;
+	uint32_t buf[128];
+	uint8_t *buf8 = (uint8_t *)buf;
+	uint16_t *buf16 = (uint16_t *)buf;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
+	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		return;
+	}
+
+	memset(buf, 0, sizeof(buf));
+	if (cfis[4] == 0x00) {	/* Log directory */
+		buf16[0x00] = 1; /* Version -- 1 */
+		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
+		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
+	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
+		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
+		ahci_checksum(buf8, sizeof(buf));
+	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
+		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
+			buf[0x00] = 1;	/* SFQ DSM supported */
+			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
+		}
+	} else {
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		return;
+	}
+
+	if (cfis[2] == ATA_READ_LOG_EXT)
+		ahci_write_fis_piosetup(p);
+	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
+}
+
+static void
+handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	struct ahci_cmd_hdr *hdr;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	if (p->atapi || hdr->prdtl == 0) {
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+	} else {
+		uint16_t buf[256];
+		uint64_t sectors;
+		int sectsz, psectsz, psectoff, candelete, ro;
+		uint16_t cyl;
+		uint8_t sech, heads;
+
+		ro = blockif_is_ro(p->bctx);
+		candelete = blockif_candelete(p->bctx);
+		sectsz = blockif_sectsz(p->bctx);
+		sectors = blockif_size(p->bctx) / sectsz;
+		blockif_chs(p->bctx, &cyl, &heads, &sech);
+		blockif_psectsz(p->bctx, &psectsz, &psectoff);
+		memset(buf, 0, sizeof(buf));
+		buf[0] = 0x0040;
+		buf[1] = cyl;
+		buf[3] = heads;
+		buf[6] = sech;
+		ata_string((uint8_t *)(buf+10), p->ident, 20);
+		ata_string((uint8_t *)(buf+23), "001", 8);
+		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
+		buf[47] = (0x8000 | 128);
+		buf[48] = 0;
+		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
+		buf[50] = (1 << 14);
+		buf[53] = (1 << 1 | 1 << 2);
+		if (p->mult_sectors)
+			buf[59] = (0x100 | p->mult_sectors);
+		if (sectors <= 0x0fffffff) {
+			buf[60] = sectors;
+			buf[61] = (sectors >> 16);
+		} else {
+			buf[60] = 0xffff;
+			buf[61] = 0x0fff;
+		}
+		buf[63] = 0x7;
+		if (p->xfermode & ATA_WDMA0)
+			buf[63] |= (1 << ((p->xfermode & 7) + 8));
+		buf[64] = 0x3;
+		buf[65] = 120;
+		buf[66] = 120;
+		buf[67] = 120;
+		buf[68] = 120;
+		buf[69] = 0;
+		buf[75] = 31;
+		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
+			   ATA_SUPPORT_NCQ);
+		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
+			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
+		buf[80] = 0x3f0;
+		buf[81] = 0x28;
+		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
+			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
+		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
+			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
+		buf[84] = (1 << 14);
+		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
+			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
+		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
+			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
+		buf[87] = (1 << 14);
+		buf[88] = 0x7f;
+		if (p->xfermode & ATA_UDMA0)
+			buf[88] |= (1 << ((p->xfermode & 7) + 8));
+		buf[100] = sectors;
+		buf[101] = (sectors >> 16);
+		buf[102] = (sectors >> 32);
+		buf[103] = (sectors >> 48);
+		if (candelete && !ro) {
+			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
+			buf[105] = 1;
+			buf[169] = ATA_SUPPORT_DSM_TRIM;
+		}
+		buf[106] = 0x4000;
+		buf[209] = 0x4000;
+		if (psectsz > sectsz) {
+			buf[106] |= 0x2000;
+			buf[106] |= ffsl(psectsz / sectsz) - 1;
+			buf[209] |= (psectoff / sectsz);
+		}
+		if (sectsz > 512) {
+			buf[106] |= 0x1000;
+			buf[117] = sectsz / 2;
+			buf[118] = ((sectsz / 2) >> 16);
+		}
+		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
+		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
+		buf[222] = 0x1020;
+		buf[255] = 0x00a5;
+		ahci_checksum((uint8_t *)buf, sizeof(buf));
+		ahci_write_fis_piosetup(p);
+		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
+	}
+}
+
+static void
+handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	if (!p->atapi) {
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+	} else {
+		uint16_t buf[256];
+
+		memset(buf, 0, sizeof(buf));
+		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
+		ata_string((uint8_t *)(buf+10), p->ident, 20);
+		ata_string((uint8_t *)(buf+23), "001", 8);
+		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
+		buf[49] = (1 << 9 | 1 << 8);
+		buf[50] = (1 << 14 | 1);
+		buf[53] = (1 << 2 | 1 << 1);
+		buf[62] = 0x3f;
+		buf[63] = 7;
+		if (p->xfermode & ATA_WDMA0)
+			buf[63] |= (1 << ((p->xfermode & 7) + 8));
+		buf[64] = 3;
+		buf[65] = 120;
+		buf[66] = 120;
+		buf[67] = 120;
+		buf[68] = 120;
+		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
+		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
+		buf[78] = (1 << 5);
+		buf[80] = 0x3f0;
+		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
+			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
+		buf[83] = (1 << 14);
+		buf[84] = (1 << 14);
+		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
+			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
+		buf[87] = (1 << 14);
+		buf[88] = 0x7f;
+		if (p->xfermode & ATA_UDMA0)
+			buf[88] |= (1 << ((p->xfermode & 7) + 8));
+		buf[222] = 0x1020;
+		buf[255] = 0x00a5;
+		ahci_checksum((uint8_t *)buf, sizeof(buf));
+		ahci_write_fis_piosetup(p);
+		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
+	}
+}
+
+static void
+atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[36];
+	uint8_t *acmd;
+	int len;
+	uint32_t tfd;
+
+	acmd = cfis + 0x40;
+
+	if (acmd[1] & 1) {		/* VPD */
+		if (acmd[2] == 0) {	/* Supported VPD pages */
+			buf[0] = 0x05;
+			buf[1] = 0;
+			buf[2] = 0;
+			buf[3] = 1;
+			buf[4] = 0;
+			len = 4 + buf[3];
+		} else {
+			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+			p->asc = 0x24;
+			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+			ahci_write_fis_d2h(p, slot, cfis, tfd);
+			return;
+		}
+	} else {
+		buf[0] = 0x05;
+		buf[1] = 0x80;
+		buf[2] = 0x00;
+		buf[3] = 0x21;
+		buf[4] = 31;
+		buf[5] = 0;
+		buf[6] = 0;
+		buf[7] = 0;
+		atapi_string(buf + 8, "BHYVE", 8);
+		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
+		atapi_string(buf + 32, "001", 4);
+		len = sizeof(buf);
+	}
+
+	if (len > acmd[4])
+		len = acmd[4];
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	write_prdt(p, slot, cfis, buf, len);
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[8];
+	uint64_t sectors;
+
+	sectors = blockif_size(p->bctx) / 2048;
+	be32enc(buf, sectors - 1);
+	be32enc(buf + 4, 2048);
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	write_prdt(p, slot, cfis, buf, sizeof(buf));
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint8_t format;
+	int len;
+
+	acmd = cfis + 0x40;
+
+	len = be16dec(acmd + 7);
+	format = acmd[9] >> 6;
+	switch (format) {
+	case 0:
+	{
+		int msf, size;
+		uint64_t sectors;
+		uint8_t start_track, buf[20], *bp;
+
+		msf = (acmd[1] >> 1) & 1;
+		start_track = acmd[6];
+		if (start_track > 1 && start_track != 0xaa) {
+			uint32_t tfd;
+			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+			p->asc = 0x24;
+			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+			ahci_write_fis_d2h(p, slot, cfis, tfd);
+			return;
+		}
+		bp = buf + 2;
+		*bp++ = 1;
+		*bp++ = 1;
+		if (start_track <= 1) {
+			*bp++ = 0;
+			*bp++ = 0x14;
+			*bp++ = 1;
+			*bp++ = 0;
+			if (msf) {
+				*bp++ = 0;
+				lba_to_msf(bp, 0);
+				bp += 3;
+			} else {
+				*bp++ = 0;
+				*bp++ = 0;
+				*bp++ = 0;
+				*bp++ = 0;
+			}
+		}
+		*bp++ = 0;
+		*bp++ = 0x14;
+		*bp++ = 0xaa;
+		*bp++ = 0;
+		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+		sectors >>= 2;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, sectors);
+			bp += 3;
+		} else {
+			be32enc(bp, sectors);
+			bp += 4;
+		}
+		size = bp - buf;
+		be16enc(buf, size - 2);
+		if (len > size)
+			len = size;
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	case 1:
+	{
+		uint8_t buf[12];
+
+		memset(buf, 0, sizeof(buf));
+		buf[1] = 0xa;
+		buf[2] = 0x1;
+		buf[3] = 0x1;
+		if (len > sizeof(buf))
+			len = sizeof(buf);
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	case 2:
+	{
+		int msf, size;
+		uint64_t sectors;
+		uint8_t start_track, *bp, buf[50];
+
+		msf = (acmd[1] >> 1) & 1;
+		start_track = acmd[6];
+		bp = buf + 2;
+		*bp++ = 1;
+		*bp++ = 1;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa1;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa2;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+		sectors >>= 2;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, sectors);
+			bp += 3;
+		} else {
+			be32enc(bp, sectors);
+			bp += 4;
+		}
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, 0);
+			bp += 3;
+		} else {
+			*bp++ = 0;
+			*bp++ = 0;
+			*bp++ = 0;
+			*bp++ = 0;
+		}
+
+		size = bp - buf;
+		be16enc(buf, size - 2);
+		if (len > size)
+			len = size;
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	default:
+	{
+		uint32_t tfd;
+
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, tfd);
+		break;
+	}
+	}
+}
+
+static void
+atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[16];
+
+	memset(buf, 0, sizeof(buf));
+	buf[3] = 8;
+
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	write_prdt(p, slot, cfis, buf, sizeof(buf));
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
+	struct ahci_ioreq *aior;
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	struct blockif_req *breq;
+	struct pci_ahci_softc *sc;
+	uint8_t *acmd;
+	uint64_t lba;
+	uint32_t len;
+	int err;
+
+	sc = p->pr_sc;
+	acmd = cfis + 0x40;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+
+	lba = be32dec(acmd + 2);
+	if (acmd[0] == READ_10)
+		len = be16dec(acmd + 7);
+	else
+		len = be32dec(acmd + 6);
+	if (len == 0) {
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+	}
+	lba *= 2048;
+	len *= 2048;
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	breq = &aior->io_req;
+	breq->br_offset = lba + done;
+	ahci_build_iov(p, aior, prdt, hdr->prdtl);
+
+	/* Mark this command in-flight. */
+	p->pending |= 1 << slot;
+
+	/* Stuff request onto busy list. */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	err = blockif_read(p->bctx, breq);
+	assert(err == 0);
+}
+
+static void
+atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[64];
+	uint8_t *acmd;
+	int len;
+
+	acmd = cfis + 0x40;
+	len = acmd[4];
+	if (len > sizeof(buf))
+		len = sizeof(buf);
+	memset(buf, 0, len);
+	buf[0] = 0x70 | (1 << 7);
+	buf[2] = p->sense_key;
+	buf[7] = 10;
+	buf[12] = p->asc;
+	write_prdt(p, slot, cfis, buf, len);
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd = cfis + 0x40;
+	uint32_t tfd;
+
+	switch (acmd[4] & 3) {
+	case 0:
+	case 1:
+	case 3:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		tfd = ATA_S_READY | ATA_S_DSC;
+		break;
+	case 2:
+		/* TODO eject media */
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x53;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+	}
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint32_t tfd;
+	uint8_t pc, code;
+	int len;
+
+	acmd = cfis + 0x40;
+	len = be16dec(acmd + 7);
+	pc = acmd[2] >> 6;
+	code = acmd[2] & 0x3f;
+
+	switch (pc) {
+	case 0:
+		switch (code) {
+		case MODEPAGE_RW_ERROR_RECOVERY:
+		{
+			uint8_t buf[16];
+
+			if (len > sizeof(buf))
+				len = sizeof(buf);
+
+			memset(buf, 0, sizeof(buf));
+			be16enc(buf, 16 - 2);
+			buf[2] = 0x70;
+			buf[8] = 0x01;
+			buf[9] = 16 - 10;
+			buf[11] = 0x05;
+			write_prdt(p, slot, cfis, buf, len);
+			tfd = ATA_S_READY | ATA_S_DSC;
+			break;
+		}
+		case MODEPAGE_CD_CAPABILITIES:
+		{
+			uint8_t buf[30];
+
+			if (len > sizeof(buf))
+				len = sizeof(buf);
+
+			memset(buf, 0, sizeof(buf));
+			be16enc(buf, 30 - 2);
+			buf[2] = 0x70;
+			buf[8] = 0x2A;
+			buf[9] = 30 - 10;
+			buf[10] = 0x08;
+			buf[12] = 0x71;
+			be16enc(&buf[18], 2);
+			be16enc(&buf[20], 512);
+			write_prdt(p, slot, cfis, buf, len);
+			tfd = ATA_S_READY | ATA_S_DSC;
+			break;
+		}
+		default:
+			goto error;
+			break;
+		}
+		break;
+	case 3:
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x39;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+error:
+	case 1:
+	case 2:
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+	}
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+atapi_get_event_status_notification(struct ahci_port *p, int slot,
+    uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint32_t tfd;
+
+	acmd = cfis + 0x40;
+
+	/* we don't support asynchronous operation */
+	if (!(acmd[1] & 1)) {
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+	} else {
+		uint8_t buf[8];
+		int len;
+
+		len = be16dec(acmd + 7);
+		if (len > sizeof(buf))
+			len = sizeof(buf);
+
+		memset(buf, 0, sizeof(buf));
+		be16enc(buf, 8 - 2);
+		buf[2] = 0x04;
+		buf[3] = 0x10;
+		buf[5] = 0x02;
+		write_prdt(p, slot, cfis, buf, len);
+		tfd = ATA_S_READY | ATA_S_DSC;
+	}
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+
+	acmd = cfis + 0x40;
+
+#ifdef AHCI_DEBUG
+	{
+		int i;
+		DPRINTF("ACMD:");
+		for (i = 0; i < 16; i++)
+			DPRINTF("%02x ", acmd[i]);
+		DPRINTF("\n");
+	}
+#endif
+
+	switch (acmd[0]) {
+	case TEST_UNIT_READY:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case INQUIRY:
+		atapi_inquiry(p, slot, cfis);
+		break;
+	case READ_CAPACITY:
+		atapi_read_capacity(p, slot, cfis);
+		break;
+	case PREVENT_ALLOW:
+		/* TODO */
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case READ_TOC:
+		atapi_read_toc(p, slot, cfis);
+		break;
+	case REPORT_LUNS:
+		atapi_report_luns(p, slot, cfis);
+		break;
+	case READ_10:
+	case READ_12:
+		atapi_read(p, slot, cfis, 0);
+		break;
+	case REQUEST_SENSE:
+		atapi_request_sense(p, slot, cfis);
+		break;
+	case START_STOP_UNIT:
+		atapi_start_stop_unit(p, slot, cfis);
+		break;
+	case MODE_SENSE_10:
+		atapi_mode_sense(p, slot, cfis);
+		break;
+	case GET_EVENT_STATUS_NOTIFICATION:
+		atapi_get_event_status_notification(p, slot, cfis);
+		break;
+	default:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x20;
+		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
+				ATA_S_READY | ATA_S_ERROR);
+		break;
+	}
+}
+
+static void
+ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+
+	p->tfd |= ATA_S_BUSY;
+	switch (cfis[2]) {
+	case ATA_ATA_IDENTIFY:
+		handle_identify(p, slot, cfis);
+		break;
+	case ATA_SETFEATURES:
+	{
+		switch (cfis[3]) {
+		case ATA_SF_ENAB_SATA_SF:
+			switch (cfis[12]) {
+			case ATA_SATA_SF_AN:
+				p->tfd = ATA_S_DSC | ATA_S_READY;
+				break;
+			default:
+				p->tfd = ATA_S_ERROR | ATA_S_READY;
+				p->tfd |= (ATA_ERROR_ABORT << 8);
+				break;
+			}
+			break;
+		case ATA_SF_ENAB_WCACHE:
+		case ATA_SF_DIS_WCACHE:
+		case ATA_SF_ENAB_RCACHE:
+		case ATA_SF_DIS_RCACHE:
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+			break;
+		case ATA_SF_SETXFER:
+		{
+			switch (cfis[12] & 0xf8) {
+			case ATA_PIO:
+			case ATA_PIO0:
+				break;
+			case ATA_WDMA0:
+			case ATA_UDMA0:
+				p->xfermode = (cfis[12] & 0x7);
+				break;
+			}
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+			break;
+		}
+		default:
+			p->tfd = ATA_S_ERROR | ATA_S_READY;
+			p->tfd |= (ATA_ERROR_ABORT << 8);
+			break;
+		}
+		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
+		break;
+	}
+	case ATA_SET_MULTI:
+		if (cfis[12] != 0 &&
+			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
+			p->tfd = ATA_S_ERROR | ATA_S_READY;
+			p->tfd |= (ATA_ERROR_ABORT << 8);
+		} else {
+			p->mult_sectors = cfis[12];
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+		}
+		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
+		break;
+	case ATA_READ:
+	case ATA_WRITE:
+	case ATA_READ48:
+	case ATA_WRITE48:
+	case ATA_READ_MUL:
+	case ATA_WRITE_MUL:
+	case ATA_READ_MUL48:
+	case ATA_WRITE_MUL48:
+	case ATA_READ_DMA:
+	case ATA_WRITE_DMA:
+	case ATA_READ_DMA48:
+	case ATA_WRITE_DMA48:
+	case ATA_READ_FPDMA_QUEUED:
+	case ATA_WRITE_FPDMA_QUEUED:
+		ahci_handle_rw(p, slot, cfis, 0);
+		break;
+	case ATA_FLUSHCACHE:
+	case ATA_FLUSHCACHE48:
+		ahci_handle_flush(p, slot, cfis);
+		break;
+	case ATA_DATA_SET_MANAGEMENT:
+		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
+		    cfis[13] == 0 && cfis[12] == 1) {
+			ahci_handle_dsm_trim(p, slot, cfis, 0);
+			break;
+		}
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		break;
+	case ATA_SEND_FPDMA_QUEUED:
+		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
+		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
+		    cfis[11] == 0 && cfis[3] == 1) {
+			ahci_handle_dsm_trim(p, slot, cfis, 0);
+			break;
+		}
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		break;
+	case ATA_READ_LOG_EXT:
+	case ATA_READ_LOG_DMA_EXT:
+		ahci_handle_read_log(p, slot, cfis);
+		break;
+	case ATA_SECURITY_FREEZE_LOCK:
+	case ATA_SMART_CMD:
+	case ATA_NOP:
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		break;
+	case ATA_CHECK_POWER_MODE:
+		cfis[12] = 0xff;	/* always on */
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case ATA_STANDBY_CMD:
+	case ATA_STANDBY_IMMEDIATE:
+	case ATA_IDLE_CMD:
+	case ATA_IDLE_IMMEDIATE:
+	case ATA_SLEEP:
+	case ATA_READ_VERIFY:
+	case ATA_READ_VERIFY48:
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case ATA_ATAPI_IDENTIFY:
+		handle_atapi_identify(p, slot, cfis);
+		break;
+	case ATA_PACKET_CMD:
+		if (!p->atapi) {
+			ahci_write_fis_d2h(p, slot, cfis,
+			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		} else
+			handle_packet_cmd(p, slot, cfis);
+		break;
+	default:
+		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		break;
+	}
+}
+
+static void
+ahci_handle_slot(struct ahci_port *p, int slot)
+{
+	struct ahci_cmd_hdr *hdr;
+#ifdef AHCI_DEBUG
+	struct ahci_prdt_entry *prdt;
+#endif
+	struct pci_ahci_softc *sc;
+	uint8_t *cfis;
+#ifdef AHCI_DEBUG
+	int cfl, i;
+#endif
+
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+#ifdef AHCI_DEBUG
+	cfl = (hdr->flags & 0x1f) * 4;
+#endif
+	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
+			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
+#ifdef AHCI_DEBUG
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+
+	DPRINTF("\ncfis:");
+	for (i = 0; i < cfl; i++) {
+		if (i % 10 == 0)
+			DPRINTF("\n");
+		DPRINTF("%02x ", cfis[i]);
+	}
+	DPRINTF("\n");
+
+	for (i = 0; i < hdr->prdtl; i++) {
+		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
+		prdt++;
+	}
+#endif
+
+	if (cfis[0] != FIS_TYPE_REGH2D) {
+		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
+		return;
+	}
+
+	if (cfis[1] & 0x80) {
+		ahci_handle_cmd(p, slot, cfis);
+	} else {
+		if (cfis[15] & (1 << 2))
+			p->reset = 1;
+		else if (p->reset) {
+			p->reset = 0;
+			ahci_port_reset(p);
+		}
+		p->ci &= ~(1 << slot);
+	}
+}
+
+static void
+ahci_handle_port(struct ahci_port *p)
+{
+
+	if (!(p->cmd & AHCI_P_CMD_ST))
+		return;
+
+	/*
+	 * Search for any new commands to issue ignoring those that
+	 * are already in-flight.  Stop if device is busy or in error.
+	 */
+	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
+		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
+			break;
+		if (p->waitforclear)
+			break;
+		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
+			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
+			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
+			ahci_handle_slot(p, p->ccs);
+		}
+	}
+}
+
+/*
+ * blockif callback routine - this runs in the context of the blockif
+ * i/o thread, so the mutex needs to be acquired.
+ */
+static void
+ata_ioreq_cb(struct blockif_req *br, int err)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_ioreq *aior;
+	struct ahci_port *p;
+	struct pci_ahci_softc *sc;
+	uint32_t tfd;
+	uint8_t *cfis;
+	int slot, ncq, dsm;
+
+	DPRINTF("%s %d\n", __func__, err);
+
+	ncq = dsm = 0;
+	aior = br->br_param;
+	p = aior->io_pr;
+	cfis = aior->cfis;
+	slot = aior->slot;
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+
+	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
+	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
+		ncq = 1;
+	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
+	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
+	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
+		dsm = 1;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Move the blockif request back to the free list
+	 */
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+
+	if (!err)
+		hdr->prdbc = aior->done;
+
+	if (!err && aior->more) {
+		if (dsm)
+			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
+		else 
+			ahci_handle_rw(p, slot, cfis, aior->done);
+		goto out;
+	}
+
+	if (!err)
+		tfd = ATA_S_READY | ATA_S_DSC;
+	else
+		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+	if (ncq)
+		ahci_write_fis_sdb(p, slot, cfis, tfd);
+	else
+		ahci_write_fis_d2h(p, slot, cfis, tfd);
+
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
+	ahci_handle_port(p);
+out:
+	pthread_mutex_unlock(&sc->mtx);
+	DPRINTF("%s exit\n", __func__);
+}
+
+static void
+atapi_ioreq_cb(struct blockif_req *br, int err)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_ioreq *aior;
+	struct ahci_port *p;
+	struct pci_ahci_softc *sc;
+	uint8_t *cfis;
+	uint32_t tfd;
+	int slot;
+
+	DPRINTF("%s %d\n", __func__, err);
+
+	aior = br->br_param;
+	p = aior->io_pr;
+	cfis = aior->cfis;
+	slot = aior->slot;
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Move the blockif request back to the free list
+	 */
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+
+	if (!err)
+		hdr->prdbc = aior->done;
+
+	if (!err && aior->more) {
+		atapi_read(p, slot, cfis, aior->done);
+		goto out;
+	}
+
+	if (!err) {
+		tfd = ATA_S_READY | ATA_S_DSC;
+	} else {
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x21;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+	}
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
+	ahci_handle_port(p);
+out:
+	pthread_mutex_unlock(&sc->mtx);
+	DPRINTF("%s exit\n", __func__);
+}
+
+static void
+pci_ahci_ioreq_init(struct ahci_port *pr)
+{
+	struct ahci_ioreq *vr;
+	int i;
+
+	pr->ioqsz = blockif_queuesz(pr->bctx);
+	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
+	STAILQ_INIT(&pr->iofhd);
+
+	/*
+	 * Add all i/o request entries to the free queue
+	 */
+	for (i = 0; i < pr->ioqsz; i++) {
+		vr = &pr->ioreq[i];
+		vr->io_pr = pr;
+		if (!pr->atapi)
+			vr->io_req.br_callback = ata_ioreq_cb;
+		else
+			vr->io_req.br_callback = atapi_ioreq_cb;
+		vr->io_req.br_param = vr;
+		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
+	}
+
+	TAILQ_INIT(&pr->iobhd);
+}
+
+static void
+pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
+{
+	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
+	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
+	struct ahci_port *p = &sc->port[port];
+
+	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
+		port, offset, value);
+
+	switch (offset) {
+	case AHCI_P_CLB:
+		p->clb = value;
+		break;
+	case AHCI_P_CLBU:
+		p->clbu = value;
+		break;
+	case AHCI_P_FB:
+		p->fb = value;
+		break;
+	case AHCI_P_FBU:
+		p->fbu = value;
+		break;
+	case AHCI_P_IS:
+		p->is &= ~value;
+		ahci_port_intr(p);
+		break;
+	case AHCI_P_IE:
+		p->ie = value & 0xFDC000FF;
+		ahci_port_intr(p);
+		break;
+	case AHCI_P_CMD:
+	{
+		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
+		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
+		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
+		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
+		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
+		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
+		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
+		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
+
+		if (!(value & AHCI_P_CMD_ST)) {
+			ahci_port_stop(p);
+		} else {
+			uint64_t clb;
+
+			p->cmd |= AHCI_P_CMD_CR;
+			clb = (uint64_t)p->clbu << 32 | p->clb;
+			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
+					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
+		}
+
+		if (value & AHCI_P_CMD_FRE) {
+			uint64_t fb;
+
+			p->cmd |= AHCI_P_CMD_FR;
+			fb = (uint64_t)p->fbu << 32 | p->fb;
+			/* we don't support FBSCP, so rfis size is 256Bytes */
+			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
+		} else {
+			p->cmd &= ~AHCI_P_CMD_FR;
+		}
+
+		if (value & AHCI_P_CMD_CLO) {
+			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
+			p->cmd &= ~AHCI_P_CMD_CLO;
+		}
+
+		if (value & AHCI_P_CMD_ICC_MASK) {
+			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
+		}
+
+		ahci_handle_port(p);
+		break;
+	}
+	case AHCI_P_TFD:
+	case AHCI_P_SIG:
+	case AHCI_P_SSTS:
+		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
+		break;
+	case AHCI_P_SCTL:
+		p->sctl = value;
+		if (!(p->cmd & AHCI_P_CMD_ST)) {
+			if (value & ATA_SC_DET_RESET)
+				ahci_port_reset(p);
+		}
+		break;
+	case AHCI_P_SERR:
+		p->serr &= ~value;
+		break;
+	case AHCI_P_SACT:
+		p->sact |= value;
+		break;
+	case AHCI_P_CI:
+		p->ci |= value;
+		ahci_handle_port(p);
+		break;
+	case AHCI_P_SNTF:
+	case AHCI_P_FBS:
+	default:
+		break;
+	}
+}
+
+static void
+pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
+{
+	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
+		offset, value);
+
+	switch (offset) {
+	case AHCI_CAP:
+	case AHCI_PI:
+	case AHCI_VS:
+	case AHCI_CAP2:
+		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
+		break;
+	case AHCI_GHC:
+		if (value & AHCI_GHC_HR) {
+			ahci_reset(sc);
+			break;
+		}
+		if (value & AHCI_GHC_IE)
+			sc->ghc |= AHCI_GHC_IE;
+		else
+			sc->ghc &= ~AHCI_GHC_IE;
+		ahci_generate_intr(sc, 0xffffffff);
+		break;
+	case AHCI_IS:
+		sc->is &= ~value;
+		ahci_generate_intr(sc, value);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct pci_ahci_softc *sc = pi->pi_arg;
+
+	assert(baridx == 5);
+	assert((offset % 4) == 0 && size == 4);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	if (offset < AHCI_OFFSET)
+		pci_ahci_host_write(sc, offset, value);
+	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
+		pci_ahci_port_write(sc, offset, value);
+	else
+		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+static uint64_t
+pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
+{
+	uint32_t value;
+
+	switch (offset) {
+	case AHCI_CAP:
+	case AHCI_GHC:
+	case AHCI_IS:
+	case AHCI_PI:
+	case AHCI_VS:
+	case AHCI_CCCC:
+	case AHCI_CCCP:
+	case AHCI_EM_LOC:
+	case AHCI_EM_CTL:
+	case AHCI_CAP2:
+	{
+		uint32_t *p = &sc->cap;
+		p += (offset - AHCI_CAP) / sizeof(uint32_t);
+		value = *p;
+		break;
+	}
+	default:
+		value = 0;
+		break;
+	}
+	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
+		offset, value);
+
+	return (value);
+}
+
+static uint64_t
+pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
+{
+	uint32_t value;
+	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
+	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
+
+	switch (offset) {
+	case AHCI_P_CLB:
+	case AHCI_P_CLBU:
+	case AHCI_P_FB:
+	case AHCI_P_FBU:
+	case AHCI_P_IS:
+	case AHCI_P_IE:
+	case AHCI_P_CMD:
+	case AHCI_P_TFD:
+	case AHCI_P_SIG:
+	case AHCI_P_SSTS:
+	case AHCI_P_SCTL:
+	case AHCI_P_SERR:
+	case AHCI_P_SACT:
+	case AHCI_P_CI:
+	case AHCI_P_SNTF:
+	case AHCI_P_FBS:
+	{
+		uint32_t *p= &sc->port[port].clb;
+		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
+		value = *p;
+		break;
+	}
+	default:
+		value = 0;
+		break;
+	}
+
+	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
+		port, offset, value);
+
+	return value;
+}
+
+static uint64_t
+pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+    uint64_t regoff, int size)
+{
+	struct pci_ahci_softc *sc = pi->pi_arg;
+	uint64_t offset;
+	uint32_t value;
+
+	assert(baridx == 5);
+	assert(size == 1 || size == 2 || size == 4);
+	assert((regoff & (size - 1)) == 0);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
+	if (offset < AHCI_OFFSET)
+		value = pci_ahci_host_read(sc, offset);
+	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
+		value = pci_ahci_port_read(sc, offset);
+	else {
+		value = 0;
+		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
+		    regoff);
+	}
+	value >>= 8 * (regoff & 0x3);
+
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (value);
+}
+
+static int
+pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
+{
+	char bident[sizeof("XX:XX:XX")];
+	struct blockif_ctxt *bctxt;
+	struct pci_ahci_softc *sc;
+	int ret, slots, p;
+	MD5_CTX mdctx;
+	u_char digest[16];
+	char *next, *next2;
+
+	ret = 0;
+
+#ifdef AHCI_DEBUG
+	dbg = fopen("/tmp/log", "w+");
+#endif
+
+	sc = calloc(1, sizeof(struct pci_ahci_softc));
+	pi->pi_arg = sc;
+	sc->asc_pi = pi;
+	pthread_mutex_init(&sc->mtx, NULL);
+	sc->ports = 0;
+	sc->pi = 0;
+	slots = 32;
+
+	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
+		/* Identify and cut off type of present port. */
+		if (strncmp(opts, "hd:", 3) == 0) {
+			atapi = 0;
+			opts += 3;
+		} else if (strncmp(opts, "cd:", 3) == 0) {
+			atapi = 1;
+			opts += 3;
+		}
+
+		/* Find and cut off the next port options. */
+		next = strstr(opts, ",hd:");
+		next2 = strstr(opts, ",cd:");
+		if (next == NULL || (next2 != NULL && next2 < next))
+			next = next2;
+		if (next != NULL) {
+			next[0] = 0;
+			next++;
+		}
+
+		if (opts[0] == 0)
+			continue;
+
+		/*
+		 * Attempt to open the backing image. Use the PCI slot/func
+		 * and the port number for the identifier string.
+		 */
+		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
+		    pi->pi_func, p);
+		bctxt = blockif_open(opts, bident);
+		if (bctxt == NULL) {
+			sc->ports = p;
+			ret = 1;
+			goto open_fail;
+		}	
+		sc->port[p].bctx = bctxt;
+		sc->port[p].pr_sc = sc;
+		sc->port[p].port = p;
+		sc->port[p].atapi = atapi;
+
+		/*
+		 * Create an identifier for the backing file.
+		 * Use parts of the md5 sum of the filename
+		 */
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, opts, strlen(opts));
+		MD5Final(digest, &mdctx);
+		sprintf(sc->port[p].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
+		    digest[0], digest[1], digest[2], digest[3], digest[4],
+		    digest[5]);
+
+		/*
+		 * Allocate blockif request structures and add them
+		 * to the free list
+		 */
+		pci_ahci_ioreq_init(&sc->port[p]);
+
+		sc->pi |= (1 << p);
+		if (sc->port[p].ioqsz < slots)
+			slots = sc->port[p].ioqsz;
+	}
+	sc->ports = p;
+
+	/* Intel ICH8 AHCI */
+	--slots;
+	if (sc->ports < DEF_PORTS)
+		sc->ports = DEF_PORTS;
+	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
+	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
+	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
+	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
+	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
+
+	sc->vs = 0x10300;
+	sc->cap2 = AHCI_CAP2_APST;
+	ahci_reset(sc);
+
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
+	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
+	p = MIN(sc->ports, 16);
+	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
+	pci_emul_add_msicap(pi, 1 << p);
+	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
+	    AHCI_OFFSET + sc->ports * AHCI_STEP);
+
+	pci_lintr_request(pi);
+
+open_fail:
+	if (ret) {
+		for (p = 0; p < sc->ports; p++) {
+			if (sc->port[p].bctx != NULL)
+				blockif_close(sc->port[p].bctx);
+		}
+		free(sc);
+	}
+
+	return (ret);
+}
+
+static int
+pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	return (pci_ahci_init(ctx, pi, opts, 0));
+}
+
+static int
+pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	return (pci_ahci_init(ctx, pi, opts, 1));
+}
+
+/*
+ * Use separate emulation names to distinguish drive and atapi devices
+ */
+struct pci_devemu pci_de_ahci = {
+	.pe_emu =	"ahci",
+	.pe_init =	pci_ahci_hd_init,
+	.pe_barwrite =	pci_ahci_write,
+	.pe_barread =	pci_ahci_read
+};
+PCI_EMUL_SET(pci_de_ahci);
+
+struct pci_devemu pci_de_ahci_hd = {
+	.pe_emu =	"ahci-hd",
+	.pe_init =	pci_ahci_hd_init,
+	.pe_barwrite =	pci_ahci_write,
+	.pe_barread =	pci_ahci_read
+};
+PCI_EMUL_SET(pci_de_ahci_hd);
+
+struct pci_devemu pci_de_ahci_cd = {
+	.pe_emu =	"ahci-cd",
+	.pe_init =	pci_ahci_atapi_init,
+	.pe_barwrite =	pci_ahci_write,
+	.pe_barread =	pci_ahci_read
+};
+PCI_EMUL_SET(pci_de_ahci_cd);


Property changes on: trunk/usr.sbin/bhyve/pci_ahci.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_e82545.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_e82545.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_e82545.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,2373 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * Copyright (c) 2015 Peter Grehan <grehan at freebsd.org>
+ * Copyright (c) 2013 Jeremiah Lott, Avere Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_e82545.c 304425 2016-08-18 11:56:07Z mav $");
+
+#include <sys/types.h>
+#include <sys/limits.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <md5.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "e1000_regs.h"
+#include "e1000_defines.h"
+#include "mii.h"
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "mevent.h"
+
+/* Hardware/register definitions XXX: move some to common code. */
+#define E82545_VENDOR_ID_INTEL			0x8086
+#define E82545_DEV_ID_82545EM_COPPER		0x100F
+#define E82545_SUBDEV_ID			0x1008
+
+#define E82545_REVISION_4			4
+
+#define E82545_MDIC_DATA_MASK			0x0000FFFF
+#define E82545_MDIC_OP_MASK			0x0c000000
+#define E82545_MDIC_IE				0x20000000
+
+#define E82545_EECD_FWE_DIS	0x00000010 /* Flash writes disabled */
+#define E82545_EECD_FWE_EN	0x00000020 /* Flash writes enabled */
+#define E82545_EECD_FWE_MASK	0x00000030 /* Flash writes mask */
+
+#define E82545_BAR_REGISTER			0
+#define E82545_BAR_REGISTER_LEN			(128*1024)
+#define E82545_BAR_FLASH			1
+#define E82545_BAR_FLASH_LEN			(64*1024)
+#define E82545_BAR_IO				2
+#define E82545_BAR_IO_LEN			8
+
+#define E82545_IOADDR				0x00000000
+#define E82545_IODATA				0x00000004
+#define E82545_IO_REGISTER_MAX			0x0001FFFF
+#define E82545_IO_FLASH_BASE			0x00080000
+#define E82545_IO_FLASH_MAX			0x000FFFFF
+
+#define E82545_ARRAY_ENTRY(reg, offset)		(reg + (offset<<2))
+#define E82545_RAR_MAX				15
+#define E82545_MTA_MAX				127
+#define E82545_VFTA_MAX				127
+
+/* Slightly modified from the driver versions, hardcoded for 3 opcode bits,
+ * followed by 6 address bits.
+ * TODO: make opcode bits and addr bits configurable?
+ * NVM Commands - Microwire */
+#define E82545_NVM_OPCODE_BITS	3
+#define E82545_NVM_ADDR_BITS	6
+#define E82545_NVM_DATA_BITS	16
+#define E82545_NVM_OPADDR_BITS	(E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS)
+#define E82545_NVM_ADDR_MASK	((1 << E82545_NVM_ADDR_BITS)-1)
+#define E82545_NVM_OPCODE_MASK	\
+    (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS)
+#define E82545_NVM_OPCODE_READ	(0x6 << E82545_NVM_ADDR_BITS)	/* read */
+#define E82545_NVM_OPCODE_WRITE	(0x5 << E82545_NVM_ADDR_BITS)	/* write */
+#define E82545_NVM_OPCODE_ERASE	(0x7 << E82545_NVM_ADDR_BITS)	/* erase */
+#define	E82545_NVM_OPCODE_EWEN	(0x4 << E82545_NVM_ADDR_BITS)	/* wr-enable */
+
+#define	E82545_NVM_EEPROM_SIZE	64 /* 64 * 16-bit values == 128K */
+
+#define E1000_ICR_SRPD		0x00010000
+
+/* This is an arbitrary number.  There is no hard limit on the chip. */
+#define I82545_MAX_TXSEGS	64
+
+/* Legacy receive descriptor */
+struct e1000_rx_desc {
+	uint64_t buffer_addr;	/* Address of the descriptor's data buffer */
+	uint16_t length;	/* Length of data DMAed into data buffer */
+	uint16_t csum;		/* Packet checksum */
+	uint8_t	 status;       	/* Descriptor status */
+	uint8_t  errors;	/* Descriptor Errors */
+	uint16_t special;
+};
+
+/* Transmit descriptor types */
+#define	E1000_TXD_MASK		(E1000_TXD_CMD_DEXT | 0x00F00000)
+#define E1000_TXD_TYP_L		(0)
+#define E1000_TXD_TYP_C		(E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C)
+#define E1000_TXD_TYP_D		(E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)
+
+/* Legacy transmit descriptor */
+struct e1000_tx_desc {
+	uint64_t buffer_addr;   /* Address of the descriptor's data buffer */
+	union {
+		uint32_t data;
+		struct {
+			uint16_t length;  /* Data buffer length */
+			uint8_t  cso;  /* Checksum offset */
+			uint8_t  cmd;  /* Descriptor control */
+		} flags;
+	} lower;
+	union {
+		uint32_t data;
+		struct {
+			uint8_t status; /* Descriptor status */
+			uint8_t css;  /* Checksum start */
+			uint16_t special;
+		} fields;
+	} upper;
+};
+
+/* Context descriptor */
+struct e1000_context_desc {
+	union {
+		uint32_t ip_config;
+		struct {
+			uint8_t ipcss;  /* IP checksum start */
+			uint8_t ipcso;  /* IP checksum offset */
+			uint16_t ipcse;  /* IP checksum end */
+		} ip_fields;
+	} lower_setup;
+	union {
+		uint32_t tcp_config;
+		struct {
+			uint8_t tucss;  /* TCP checksum start */
+			uint8_t tucso;  /* TCP checksum offset */
+			uint16_t tucse;  /* TCP checksum end */
+		} tcp_fields;
+	} upper_setup;
+	uint32_t cmd_and_length;
+	union {
+		uint32_t data;
+		struct {
+			uint8_t status;  /* Descriptor status */
+			uint8_t hdr_len;  /* Header length */
+			uint16_t mss;  /* Maximum segment size */
+		} fields;
+	} tcp_seg_setup;
+};
+
+/* Data descriptor */
+struct e1000_data_desc {
+	uint64_t buffer_addr;  /* Address of the descriptor's buffer address */
+	union {
+		uint32_t data;
+		struct {
+			uint16_t length;  /* Data buffer length */
+			uint8_t typ_len_ext;
+			uint8_t cmd;
+		} flags;
+	} lower;
+	union {
+		uint32_t data;
+		struct {
+			uint8_t status;  /* Descriptor status */
+			uint8_t popts;  /* Packet Options */
+			uint16_t special;
+		} fields;
+	} upper;
+};
+
+union e1000_tx_udesc {
+	struct e1000_tx_desc td;
+	struct e1000_context_desc cd;
+	struct e1000_data_desc dd;
+};
+
+/* Tx checksum info for a packet. */
+struct ck_info {
+	int	ck_valid;	/* ck_info is valid */
+	uint8_t	ck_start;	/* start byte of cksum calcuation */
+	uint8_t	ck_off;		/* offset of cksum insertion */
+	uint16_t ck_len;	/* length of cksum calc: 0 is to packet-end */
+};
+
+/*
+ * Debug printf
+ */
+static int e82545_debug = 0;
+#define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params)
+#define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params)
+
+#define	MIN(a,b) (((a)<(b))?(a):(b))
+#define	MAX(a,b) (((a)>(b))?(a):(b))
+
+/* s/w representation of the RAL/RAH regs */
+struct  eth_uni {
+	int		eu_valid;
+	int		eu_addrsel;
+	struct ether_addr eu_eth;
+};
+
+
+struct e82545_softc {
+	struct pci_devinst *esc_pi;
+	struct vmctx	*esc_ctx;
+	struct mevent   *esc_mevp;
+	struct mevent   *esc_mevpitr;
+	pthread_mutex_t	esc_mtx;
+	struct ether_addr esc_mac;
+	int		esc_tapfd;
+
+	/* General */
+	uint32_t	esc_CTRL;	/* x0000 device ctl */
+	uint32_t	esc_FCAL;	/* x0028 flow ctl addr lo */
+	uint32_t	esc_FCAH;	/* x002C flow ctl addr hi */
+	uint32_t	esc_FCT;	/* x0030 flow ctl type */
+	uint32_t	esc_VET;	/* x0038 VLAN eth type */
+	uint32_t	esc_FCTTV;	/* x0170 flow ctl tx timer */
+	uint32_t	esc_LEDCTL;	/* x0E00 LED control */
+	uint32_t	esc_PBA;	/* x1000 pkt buffer allocation */
+	
+	/* Interrupt control */
+	int		esc_irq_asserted;
+	uint32_t	esc_ICR;	/* x00C0 cause read/clear */
+	uint32_t	esc_ITR;	/* x00C4 intr throttling */
+	uint32_t	esc_ICS;	/* x00C8 cause set */
+	uint32_t	esc_IMS;	/* x00D0 mask set/read */
+	uint32_t	esc_IMC;	/* x00D8 mask clear */
+
+	/* Transmit */
+	union e1000_tx_udesc *esc_txdesc;
+	struct e1000_context_desc esc_txctx;
+	pthread_t	esc_tx_tid;
+	pthread_cond_t	esc_tx_cond;
+	int		esc_tx_enabled;
+	int		esc_tx_active;
+	uint32_t	esc_TXCW;	/* x0178 transmit config */
+	uint32_t	esc_TCTL;	/* x0400 transmit ctl */
+	uint32_t	esc_TIPG;	/* x0410 inter-packet gap */
+	uint16_t	esc_AIT;	/* x0458 Adaptive Interframe Throttle */
+	uint64_t	esc_tdba;      	/* verified 64-bit desc table addr */
+	uint32_t	esc_TDBAL;	/* x3800 desc table addr, low bits */
+	uint32_t	esc_TDBAH;	/* x3804 desc table addr, hi 32-bits */
+	uint32_t	esc_TDLEN;	/* x3808 # descriptors in bytes */
+	uint16_t	esc_TDH;	/* x3810 desc table head idx */
+	uint16_t	esc_TDHr;	/* internal read version of TDH */
+	uint16_t	esc_TDT;	/* x3818 desc table tail idx */
+	uint32_t	esc_TIDV;	/* x3820 intr delay */
+	uint32_t	esc_TXDCTL;	/* x3828 desc control */
+	uint32_t	esc_TADV;	/* x382C intr absolute delay */
+	
+	/* L2 frame acceptance */
+	struct eth_uni	esc_uni[16];	/* 16 x unicast MAC addresses */
+	uint32_t	esc_fmcast[128]; /* Multicast filter bit-match */
+	uint32_t	esc_fvlan[128]; /* VLAN 4096-bit filter */
+	
+	/* Receive */
+	struct e1000_rx_desc *esc_rxdesc;
+	pthread_cond_t	esc_rx_cond;
+	int		esc_rx_enabled;
+	int		esc_rx_active;
+	int		esc_rx_loopback;
+	uint32_t	esc_RCTL;	/* x0100 receive ctl */
+	uint32_t	esc_FCRTL;	/* x2160 flow cntl thresh, low */
+	uint32_t	esc_FCRTH;	/* x2168 flow cntl thresh, hi */
+	uint64_t	esc_rdba;	/* verified 64-bit desc table addr */
+	uint32_t	esc_RDBAL;	/* x2800 desc table addr, low bits */
+	uint32_t	esc_RDBAH;	/* x2804 desc table addr, hi 32-bits*/
+	uint32_t	esc_RDLEN;	/* x2808 #descriptors */
+	uint16_t	esc_RDH;	/* x2810 desc table head idx */
+	uint16_t	esc_RDT;	/* x2818 desc table tail idx */
+	uint32_t	esc_RDTR;	/* x2820 intr delay */
+	uint32_t	esc_RXDCTL;	/* x2828 desc control */
+	uint32_t	esc_RADV;	/* x282C intr absolute delay */
+	uint32_t	esc_RSRPD;	/* x2C00 recv small packet detect */
+	uint32_t	esc_RXCSUM;     /* x5000 receive cksum ctl */
+	
+	/* IO Port register access */
+	uint32_t io_addr;
+
+	/* Shadow copy of MDIC */
+	uint32_t mdi_control;
+	/* Shadow copy of EECD */
+	uint32_t eeprom_control;
+	/* Latest NVM in/out */
+	uint16_t nvm_data;
+	uint16_t nvm_opaddr;
+	/* stats */
+	uint32_t missed_pkt_count; /* dropped for no room in rx queue */
+	uint32_t pkt_rx_by_size[6];
+	uint32_t pkt_tx_by_size[6];
+	uint32_t good_pkt_rx_count;
+	uint32_t bcast_pkt_rx_count;
+	uint32_t mcast_pkt_rx_count;
+	uint32_t good_pkt_tx_count;
+	uint32_t bcast_pkt_tx_count;
+	uint32_t mcast_pkt_tx_count;
+	uint32_t oversize_rx_count;
+	uint32_t tso_tx_count;
+	uint64_t good_octets_rx;
+	uint64_t good_octets_tx;
+	uint64_t missed_octets; /* counts missed and oversized */
+
+	uint8_t nvm_bits:6; /* number of bits remaining in/out */
+	uint8_t nvm_mode:2;
+#define E82545_NVM_MODE_OPADDR  0x0
+#define E82545_NVM_MODE_DATAIN  0x1
+#define E82545_NVM_MODE_DATAOUT 0x2
+        /* EEPROM data */
+        uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE];
+};
+
+static void e82545_reset(struct e82545_softc *sc, int dev);
+static void e82545_rx_enable(struct e82545_softc *sc);
+static void e82545_rx_disable(struct e82545_softc *sc);
+static void e82545_tap_callback(int fd, enum ev_type type, void *param);
+static void e82545_tx_start(struct e82545_softc *sc);
+static void e82545_tx_enable(struct e82545_softc *sc);
+static void e82545_tx_disable(struct e82545_softc *sc);
+
+static inline int
+e82545_size_stat_index(uint32_t size)
+{
+	if (size <= 64) {
+		return 0;
+	} else if (size >= 1024) {
+		return 5;
+	} else {
+		/* should be 1-4 */
+		return (ffs(size) - 6);
+	}
+}
+
+static void
+e82545_init_eeprom(struct e82545_softc *sc)
+{
+	uint16_t checksum, i;
+
+        /* mac addr */
+	sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) |
+		(((uint16_t)sc->esc_mac.octet[1]) << 8);
+	sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) |
+		(((uint16_t)sc->esc_mac.octet[3]) << 8);
+	sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) |
+		(((uint16_t)sc->esc_mac.octet[5]) << 8);
+
+	/* pci ids */
+	sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID;
+	sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL;
+	sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER;
+	sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL;
+
+	/* fill in the checksum */
+        checksum = 0;
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		checksum += sc->eeprom_data[i];
+	}
+	checksum = NVM_SUM - checksum;
+	sc->eeprom_data[NVM_CHECKSUM_REG] = checksum;
+	DPRINTF("eeprom checksum: 0x%x\r\n", checksum);
+}
+
+static void
+e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr,
+			uint8_t phy_addr, uint32_t data)
+{
+	DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data);
+}
+
+static uint32_t
+e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr,
+			uint8_t phy_addr)
+{
+	//DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr);
+	switch (reg_addr) {
+	case PHY_STATUS:
+		return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS |
+			MII_SR_AUTONEG_COMPLETE);
+	case PHY_AUTONEG_ADV:
+		return NWAY_AR_SELECTOR_FIELD;
+	case PHY_LP_ABILITY:
+		return 0;
+	case PHY_1000T_STATUS:
+		return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS |
+			SR_1000T_LOCAL_RX_STATUS);
+	case PHY_ID1:
+		return (M88E1011_I_PHY_ID >> 16) & 0xFFFF;
+	case PHY_ID2:
+		return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF;
+	default:
+		DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr);
+		return 0;
+	}
+	/* not reached */
+}
+
+static void
+e82545_eecd_strobe(struct e82545_softc *sc)
+{
+	/* Microwire state machine */
+	/*
+	DPRINTF("eeprom state machine srtobe "
+		"0x%x 0x%x 0x%x 0x%x\r\n",
+		sc->nvm_mode, sc->nvm_bits,
+		sc->nvm_opaddr, sc->nvm_data);*/
+
+	if (sc->nvm_bits == 0) {
+		DPRINTF("eeprom state machine not expecting data! "
+			"0x%x 0x%x 0x%x 0x%x\r\n",
+			sc->nvm_mode, sc->nvm_bits,
+			sc->nvm_opaddr, sc->nvm_data);
+		return;
+	}
+	sc->nvm_bits--;
+	if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) {
+		/* shifting out */
+		if (sc->nvm_data & 0x8000) {
+			sc->eeprom_control |= E1000_EECD_DO;
+		} else {
+			sc->eeprom_control &= ~E1000_EECD_DO;
+		}
+		sc->nvm_data <<= 1;
+		if (sc->nvm_bits == 0) {
+			/* read done, back to opcode mode. */
+			sc->nvm_opaddr = 0;
+			sc->nvm_mode = E82545_NVM_MODE_OPADDR;
+			sc->nvm_bits = E82545_NVM_OPADDR_BITS;
+		}
+	} else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) {
+		/* shifting in */
+		sc->nvm_data <<= 1;
+		if (sc->eeprom_control & E1000_EECD_DI) {
+			sc->nvm_data |= 1;
+		}
+		if (sc->nvm_bits == 0) {
+			/* eeprom write */
+			uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK;
+			uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK;
+			if (op != E82545_NVM_OPCODE_WRITE) {
+				DPRINTF("Illegal eeprom write op 0x%x\r\n",
+					sc->nvm_opaddr);
+			} else if (addr >= E82545_NVM_EEPROM_SIZE) {
+				DPRINTF("Illegal eeprom write addr 0x%x\r\n",
+					sc->nvm_opaddr);
+			} else {
+				DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n",
+				addr, sc->nvm_data);
+				sc->eeprom_data[addr] = sc->nvm_data;
+			}
+			/* back to opcode mode */
+			sc->nvm_opaddr = 0;
+			sc->nvm_mode = E82545_NVM_MODE_OPADDR;
+			sc->nvm_bits = E82545_NVM_OPADDR_BITS;
+		}
+	} else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) {
+		sc->nvm_opaddr <<= 1;
+		if (sc->eeprom_control & E1000_EECD_DI) {
+			sc->nvm_opaddr |= 1;
+		}
+		if (sc->nvm_bits == 0) {
+			uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK;
+			switch (op) {
+			case E82545_NVM_OPCODE_EWEN:
+				DPRINTF("eeprom write enable: 0x%x\r\n",
+					sc->nvm_opaddr);
+				/* back to opcode mode */
+				sc->nvm_opaddr = 0;
+				sc->nvm_mode = E82545_NVM_MODE_OPADDR;
+				sc->nvm_bits = E82545_NVM_OPADDR_BITS;
+				break;
+			case E82545_NVM_OPCODE_READ:
+			{
+				uint16_t addr = sc->nvm_opaddr &
+					E82545_NVM_ADDR_MASK;
+				sc->nvm_mode = E82545_NVM_MODE_DATAOUT;
+				sc->nvm_bits = E82545_NVM_DATA_BITS;
+				if (addr < E82545_NVM_EEPROM_SIZE) {
+					sc->nvm_data = sc->eeprom_data[addr];
+					DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n",
+						addr, sc->nvm_data);
+				} else {
+					DPRINTF("eeprom illegal read: 0x%x\r\n",
+						sc->nvm_opaddr);
+					sc->nvm_data = 0;
+				}
+				break;
+			}
+			case E82545_NVM_OPCODE_WRITE:
+				sc->nvm_mode = E82545_NVM_MODE_DATAIN;
+				sc->nvm_bits = E82545_NVM_DATA_BITS;
+				sc->nvm_data = 0;
+				break;
+			default:
+				DPRINTF("eeprom unknown op: 0x%x\r\r",
+					sc->nvm_opaddr);
+				/* back to opcode mode */
+				sc->nvm_opaddr = 0;
+				sc->nvm_mode = E82545_NVM_MODE_OPADDR;
+				sc->nvm_bits = E82545_NVM_OPADDR_BITS;
+			}
+		}
+	} else {
+		DPRINTF("eeprom state machine wrong state! "
+			"0x%x 0x%x 0x%x 0x%x\r\n",
+			sc->nvm_mode, sc->nvm_bits,
+			sc->nvm_opaddr, sc->nvm_data);
+	}
+}
+
+static void
+e82545_itr_callback(int fd, enum ev_type type, void *param)
+{
+	uint32_t new;
+	struct e82545_softc *sc = param;
+
+	pthread_mutex_lock(&sc->esc_mtx);
+	new = sc->esc_ICR & sc->esc_IMS;
+	if (new && !sc->esc_irq_asserted) {
+		DPRINTF("itr callback: lintr assert %x\r\n", new);
+		sc->esc_irq_asserted = 1;
+		pci_lintr_assert(sc->esc_pi);
+	} else {
+		mevent_delete(sc->esc_mevpitr);
+		sc->esc_mevpitr = NULL;
+	}
+	pthread_mutex_unlock(&sc->esc_mtx);
+}
+
+static void
+e82545_icr_assert(struct e82545_softc *sc, uint32_t bits)
+{
+	uint32_t new;
+
+	DPRINTF("icr assert: 0x%x\r\n", bits);
+	
+	/*
+	 * An interrupt is only generated if bits are set that
+	 * aren't already in the ICR, these bits are unmasked,
+	 * and there isn't an interrupt already pending.
+	 */
+	new = bits & ~sc->esc_ICR & sc->esc_IMS;
+	sc->esc_ICR |= bits;
+
+	if (new == 0) {
+		DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS);
+	} else if (sc->esc_mevpitr != NULL) {
+		DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS);
+	} else if (!sc->esc_irq_asserted) {
+		DPRINTF("icr assert: lintr assert %x\r\n", new);
+		sc->esc_irq_asserted = 1;
+		pci_lintr_assert(sc->esc_pi);
+		if (sc->esc_ITR != 0) {
+			sc->esc_mevpitr = mevent_add(
+			    (sc->esc_ITR + 3905) / 3906,  /* 256ns -> 1ms */
+			    EVF_TIMER, e82545_itr_callback, sc);
+		}
+	}
+}
+
+static void
+e82545_ims_change(struct e82545_softc *sc, uint32_t bits)
+{
+	uint32_t new;
+
+	/*
+	 * Changing the mask may allow previously asserted
+	 * but masked interrupt requests to generate an interrupt.
+	 */
+	new = bits & sc->esc_ICR & ~sc->esc_IMS;
+	sc->esc_IMS |= bits;
+
+	if (new == 0) {
+		DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS);
+	} else if (sc->esc_mevpitr != NULL) {
+		DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS);
+	} else if (!sc->esc_irq_asserted) {
+		DPRINTF("ims change: lintr assert %x\n\r", new);
+		sc->esc_irq_asserted = 1;
+		pci_lintr_assert(sc->esc_pi);
+		if (sc->esc_ITR != 0) {
+			sc->esc_mevpitr = mevent_add(
+			    (sc->esc_ITR + 3905) / 3906,  /* 256ns -> 1ms */
+			    EVF_TIMER, e82545_itr_callback, sc);
+		}
+	}
+}
+
+static void
+e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits)
+{
+
+	DPRINTF("icr deassert: 0x%x\r\n", bits);
+	sc->esc_ICR &= ~bits;
+
+	/*
+	 * If there are no longer any interrupt sources and there
+	 * was an asserted interrupt, clear it
+	 */
+	if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) {
+		DPRINTF("icr deassert: lintr deassert %x\r\n", bits);
+		pci_lintr_deassert(sc->esc_pi);
+		sc->esc_irq_asserted = 0;
+	}
+}
+
+static void
+e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value)
+{
+
+	DPRINTF("intr_write: off %x, val %x\n\r", offset, value);
+	
+	switch (offset) {
+	case E1000_ICR:
+		e82545_icr_deassert(sc, value);
+		break;
+	case E1000_ITR:
+		sc->esc_ITR = value;
+		break;
+	case E1000_ICS:
+		sc->esc_ICS = value;	/* not used: store for debug */
+		e82545_icr_assert(sc, value);
+		break;
+	case E1000_IMS:
+		e82545_ims_change(sc, value);
+		break;
+	case E1000_IMC:
+		sc->esc_IMC = value;	/* for debug */
+		sc->esc_IMS &= ~value;
+		// XXX clear interrupts if all ICR bits now masked
+		// and interrupt was pending ?
+		break;
+	default:
+		break;
+	}
+}
+
+static uint32_t
+e82545_intr_read(struct e82545_softc *sc, uint32_t offset)
+{
+	uint32_t retval;
+
+	retval = 0;
+
+	DPRINTF("intr_read: off %x\n\r", offset);
+	
+	switch (offset) {
+	case E1000_ICR:
+		retval = sc->esc_ICR;
+		sc->esc_ICR = 0;
+		e82545_icr_deassert(sc, ~0);
+		break;
+	case E1000_ITR:
+		retval = sc->esc_ITR;
+		break;
+	case E1000_ICS:
+		/* write-only register */
+		break;
+	case E1000_IMS:
+		retval = sc->esc_IMS;
+		break;
+	case E1000_IMC:
+		/* write-only register */
+		break;
+	default:
+		break;
+	}
+
+	return (retval);
+}
+
+static void
+e82545_devctl(struct e82545_softc *sc, uint32_t val)
+{
+
+	sc->esc_CTRL = val & ~E1000_CTRL_RST;
+
+	if (val & E1000_CTRL_RST) {
+		DPRINTF("e1k: s/w reset, ctl %x\n", val);
+		e82545_reset(sc, 1);
+	}
+	/* XXX check for phy reset ? */
+}
+
+static void
+e82545_rx_update_rdba(struct e82545_softc *sc)
+{
+
+	/* XXX verify desc base/len within phys mem range */
+	sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 |
+	    sc->esc_RDBAL;
+	
+	/* Cache host mapping of guest descriptor array */
+	sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx,
+	    sc->esc_rdba, sc->esc_RDLEN);	
+}
+
+static void
+e82545_rx_ctl(struct e82545_softc *sc, uint32_t val)
+{
+	int on;
+
+	on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN);
+
+	/* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */
+	sc->esc_RCTL = val & ~0xF9204c01;
+
+	DPRINTF("rx_ctl - %s RCTL %x, val %x\n",
+		on ? "on" : "off", sc->esc_RCTL, val);
+
+	/* state change requested */
+	if (on != sc->esc_rx_enabled) {
+		if (on) {
+			/* Catch disallowed/unimplemented settings */
+			//assert(!(val & E1000_RCTL_LBM_TCVR));
+
+			if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) {
+				sc->esc_rx_loopback = 1;
+			} else {
+				sc->esc_rx_loopback = 0;
+			}
+
+			e82545_rx_update_rdba(sc);
+			e82545_rx_enable(sc);
+		} else {
+			e82545_rx_disable(sc);
+			sc->esc_rx_loopback = 0;
+			sc->esc_rdba = 0;
+			sc->esc_rxdesc = NULL;
+		}
+	}
+}
+
+static void
+e82545_tx_update_tdba(struct e82545_softc *sc)
+{
+
+	/* XXX verify desc base/len within phys mem range */
+	sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL;
+
+	/* Cache host mapping of guest descriptor array */
+	sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba,
+            sc->esc_TDLEN);
+}
+
+static void
+e82545_tx_ctl(struct e82545_softc *sc, uint32_t val)
+{
+	int on;
+	
+	on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN);
+
+	/* ignore TCTL_EN settings that don't change state */
+	if (on == sc->esc_tx_enabled)
+		return;
+
+	if (on) {
+		e82545_tx_update_tdba(sc);
+		e82545_tx_enable(sc);
+	} else {
+		e82545_tx_disable(sc);
+		sc->esc_tdba = 0;
+		sc->esc_txdesc = NULL;
+	}
+
+	/* Save TCTL value after stripping reserved bits 31:25,23,2,0 */
+	sc->esc_TCTL = val & ~0xFE800005;
+}
+
+int
+e82545_bufsz(uint32_t rctl)
+{
+
+	switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) {
+	case (E1000_RCTL_SZ_2048): return (2048);
+	case (E1000_RCTL_SZ_1024): return (1024);
+	case (E1000_RCTL_SZ_512): return (512);
+	case (E1000_RCTL_SZ_256): return (256);
+	case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384);
+	case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192);
+	case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096);
+	}
+	return (256);	/* Forbidden value. */
+}
+
+static uint8_t dummybuf[2048];
+
+/* XXX one packet at a time until this is debugged */
+static void
+e82545_tap_callback(int fd, enum ev_type type, void *param)
+{
+	struct e82545_softc *sc = param;
+	struct e1000_rx_desc *rxd;
+	struct iovec vec[64];
+	int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size;
+	uint32_t cause = 0;
+	uint16_t *tp, tag, head;
+
+	pthread_mutex_lock(&sc->esc_mtx);
+	DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT);
+
+	if (!sc->esc_rx_enabled || sc->esc_rx_loopback) {
+		DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n",
+		    sc->esc_rx_enabled, sc->esc_rx_loopback);
+		while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
+		}
+		goto done1;
+	}
+	bufsz = e82545_bufsz(sc->esc_RCTL);
+	maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522;
+	maxpktdesc = (maxpktsz + bufsz - 1) / bufsz;
+	size = sc->esc_RDLEN / 16;
+	head = sc->esc_RDH;
+	left = (size + sc->esc_RDT - head) % size;
+	if (left < maxpktdesc) {
+		DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n",
+		    left, maxpktdesc);
+		while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
+		}
+		goto done1;
+	}
+
+	sc->esc_rx_active = 1;
+	pthread_mutex_unlock(&sc->esc_mtx);
+
+	for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) {
+
+		/* Grab rx descriptor pointed to by the head pointer */
+		for (i = 0; i < maxpktdesc; i++) {
+			rxd = &sc->esc_rxdesc[(head + i) % size];
+			vec[i].iov_base = paddr_guest2host(sc->esc_ctx,
+			    rxd->buffer_addr, bufsz);
+			vec[i].iov_len = bufsz;
+		}
+		len = readv(sc->esc_tapfd, vec, maxpktdesc);
+		if (len <= 0) {
+			DPRINTF("tap: readv() returned %d\n", len);
+			goto done;
+		}
+
+		/*
+		 * Adjust the packet length based on whether the CRC needs
+		 * to be stripped or if the packet is less than the minimum
+		 * eth packet size.
+		 */
+		if (len < ETHER_MIN_LEN - ETHER_CRC_LEN)
+			len = ETHER_MIN_LEN - ETHER_CRC_LEN;
+		if (!(sc->esc_RCTL & E1000_RCTL_SECRC))
+			len += ETHER_CRC_LEN;
+		n = (len + bufsz - 1) / bufsz;
+
+		DPRINTF("packet read %d bytes, %d segs, head %d\r\n",
+		    len, n, head);
+
+		/* Apply VLAN filter. */
+		tp = (uint16_t *)vec[0].iov_base + 6;
+		if ((sc->esc_RCTL & E1000_RCTL_VFE) &&
+		    (ntohs(tp[0]) == sc->esc_VET)) {
+			tag = ntohs(tp[1]) & 0x0fff;
+			if ((sc->esc_fvlan[tag >> 5] &
+			    (1 << (tag & 0x1f))) != 0) {
+				DPRINTF("known VLAN %d\r\n", tag);
+			} else {
+				DPRINTF("unknown VLAN %d\r\n", tag);
+				n = 0;
+				continue;
+			}
+		}
+
+		/* Update all consumed descriptors. */
+		for (i = 0; i < n - 1; i++) {
+			rxd = &sc->esc_rxdesc[(head + i) % size];
+			rxd->length = bufsz;
+			rxd->csum = 0;
+			rxd->errors = 0;
+			rxd->special = 0;
+			rxd->status = E1000_RXD_STAT_DD;
+		}
+		rxd = &sc->esc_rxdesc[(head + i) % size];
+		rxd->length = len % bufsz;
+		rxd->csum = 0;
+		rxd->errors = 0;
+		rxd->special = 0;
+		/* XXX signal no checksum for now */
+		rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM |
+		    E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD;
+
+		/* Schedule receive interrupts. */
+		if (len <= sc->esc_RSRPD) {
+			cause |= E1000_ICR_SRPD | E1000_ICR_RXT0;
+		} else {
+			/* XXX: RDRT and RADV timers should be here. */
+			cause |= E1000_ICR_RXT0;
+		}
+
+		head = (head + n) % size;
+		left -= n;
+	}
+
+done:
+	pthread_mutex_lock(&sc->esc_mtx);
+	sc->esc_rx_active = 0;
+	if (sc->esc_rx_enabled == 0)
+		pthread_cond_signal(&sc->esc_rx_cond);
+
+	sc->esc_RDH = head;
+	/* Respect E1000_RCTL_RDMTS */
+	left = (size + sc->esc_RDT - head) % size;
+	if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1)))
+		cause |= E1000_ICR_RXDMT0;
+	/* Assert all accumulated interrupts. */
+	if (cause != 0)
+		e82545_icr_assert(sc, cause);
+done1:
+	DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT);
+	pthread_mutex_unlock(&sc->esc_mtx);
+}
+
+static uint16_t
+e82545_carry(uint32_t sum)
+{
+
+	sum = (sum & 0xFFFF) + (sum >> 16);
+	if (sum > 0xFFFF)
+		sum -= 0xFFFF;
+	return (sum);
+}
+
+static uint16_t
+e82545_buf_checksum(uint8_t *buf, int len)
+{
+	int i;
+	uint32_t sum = 0;
+
+	/* Checksum all the pairs of bytes first... */
+	for (i = 0; i < (len & ~1U); i += 2)
+		sum += *((u_int16_t *)(buf + i));
+
+	/*
+	 * If there's a single byte left over, checksum it, too.
+	 * Network byte order is big-endian, so the remaining byte is
+	 * the high byte.
+	 */
+	if (i < len)
+		sum += htons(buf[i] << 8);
+
+	return (e82545_carry(sum));
+}
+
+static uint16_t
+e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len)
+{
+	int now, odd;
+	uint32_t sum = 0, s;
+
+	/* Skip completely unneeded vectors. */
+	while (iovcnt > 0 && iov->iov_len <= off && off > 0) {
+		off -= iov->iov_len;
+		iov++;
+		iovcnt--;
+	}
+
+	/* Calculate checksum of requested range. */
+	odd = 0;
+	while (len > 0 && iovcnt > 0) {
+		now = MIN(len, iov->iov_len - off);
+		s = e82545_buf_checksum(iov->iov_base + off, now);
+		sum += odd ? (s << 8) : s;
+		odd ^= (now & 1);
+		len -= now;
+		off = 0;
+		iov++;
+		iovcnt--;
+	}
+
+	return (e82545_carry(sum));
+}
+
+/*
+ * Return the transmit descriptor type.
+ */
+int
+e82545_txdesc_type(uint32_t lower)
+{
+	int type;
+
+	type = 0;
+	
+	if (lower & E1000_TXD_CMD_DEXT)
+		type = lower & E1000_TXD_MASK;
+
+	return (type);
+}
+
+static void
+e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck)
+{
+	uint16_t cksum;
+	int cklen;
+
+	DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n",
+	    iovcnt, ck->ck_start, ck->ck_off, ck->ck_len);
+	cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX;
+	cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen);
+	*(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum;
+}
+
+static void
+e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt)
+{
+
+	if (sc->esc_tapfd == -1)
+		return;
+
+	(void) writev(sc->esc_tapfd, iov, iovcnt);
+}
+
+static void
+e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail,
+    uint16_t dsize, int *tdwb)
+{
+	union e1000_tx_udesc *dsc;
+
+	for ( ; head != tail; head = (head + 1) % dsize) {
+		dsc = &sc->esc_txdesc[head];
+		if (dsc->td.lower.data & E1000_TXD_CMD_RS) {
+			dsc->td.upper.data |= E1000_TXD_STAT_DD;
+			*tdwb = 1;
+		}
+	}
+}
+
+static int
+e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
+    uint16_t dsize, uint16_t *rhead, int *tdwb)
+{
+	uint8_t *hdr, *hdrp;
+	struct iovec iovb[I82545_MAX_TXSEGS + 2];
+	struct iovec tiov[I82545_MAX_TXSEGS + 2];
+	struct e1000_context_desc *cd;
+	struct ck_info ckinfo[2];
+	struct iovec *iov;
+	union  e1000_tx_udesc *dsc;
+	int desc, dtype, len, ntype, iovcnt, tlen, hdrlen, vlen, tcp, tso;
+	int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff;
+	uint32_t tcpsum, tcpseq;
+	uint16_t ipcs, tcpcs, ipid, ohead;
+
+	ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0;
+	iovcnt = 0;
+	tlen = 0;
+	ntype = 0;
+	tso = 0;
+	ohead = head;
+
+	/* iovb[0/1] may be used for writable copy of headers. */
+	iov = &iovb[2];
+
+	for (desc = 0; ; desc++, head = (head + 1) % dsize) {
+		if (head == tail) {
+			*rhead = head;
+			return (0);
+		}
+		dsc = &sc->esc_txdesc[head];
+		dtype = e82545_txdesc_type(dsc->td.lower.data);
+
+		if (desc == 0) {
+			switch (dtype) {
+			case E1000_TXD_TYP_C:
+				DPRINTF("tx ctxt desc idx %d: %016jx "
+				    "%08x%08x\r\n",
+				    head, dsc->td.buffer_addr,
+				    dsc->td.upper.data, dsc->td.lower.data);
+				/* Save context and return */
+				sc->esc_txctx = dsc->cd;
+				goto done;
+			case E1000_TXD_TYP_L:
+				DPRINTF("tx legacy desc idx %d: %08x%08x\r\n",
+				    head, dsc->td.upper.data, dsc->td.lower.data);
+				/*
+				 * legacy cksum start valid in first descriptor
+				 */
+				ntype = dtype;
+				ckinfo[0].ck_start = dsc->td.upper.fields.css;
+				break;
+			case E1000_TXD_TYP_D:
+				DPRINTF("tx data desc idx %d: %08x%08x\r\n",
+				    head, dsc->td.upper.data, dsc->td.lower.data);
+				ntype = dtype;
+				break;
+			default:
+				break;
+			}
+		} else {
+			/* Descriptor type must be consistent */
+			assert(dtype == ntype);
+			DPRINTF("tx next desc idx %d: %08x%08x\r\n",
+			    head, dsc->td.upper.data, dsc->td.lower.data);
+		}
+
+		len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length :
+		    dsc->dd.lower.data & 0xFFFFF;
+
+		if (len > 0) {
+			/* Strip checksum supplied by guest. */
+			if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 &&
+			    (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0)
+				len -= 2;
+			tlen += len;
+			if (iovcnt < I82545_MAX_TXSEGS) {
+				iov[iovcnt].iov_base = paddr_guest2host(
+				    sc->esc_ctx, dsc->td.buffer_addr, len);
+				iov[iovcnt].iov_len = len;
+			}
+			iovcnt++;
+		}
+
+		/*
+		 * Pull out info that is valid in the final descriptor
+		 * and exit descriptor loop.
+		 */
+		if (dsc->td.lower.data & E1000_TXD_CMD_EOP) {
+			if (dtype == E1000_TXD_TYP_L) {
+				if (dsc->td.lower.data & E1000_TXD_CMD_IC) {
+					ckinfo[0].ck_valid = 1;
+					ckinfo[0].ck_off =
+					    dsc->td.lower.flags.cso;
+					ckinfo[0].ck_len = 0;
+				}
+			} else {
+				cd = &sc->esc_txctx;
+				if (dsc->dd.lower.data & E1000_TXD_CMD_TSE)
+					tso = 1;
+				if (dsc->dd.upper.fields.popts &
+				    E1000_TXD_POPTS_IXSM)
+					ckinfo[0].ck_valid = 1;
+				if (dsc->dd.upper.fields.popts &
+				    E1000_TXD_POPTS_IXSM || tso) {
+					ckinfo[0].ck_start =
+					    cd->lower_setup.ip_fields.ipcss;
+					ckinfo[0].ck_off =
+					    cd->lower_setup.ip_fields.ipcso;
+					ckinfo[0].ck_len =
+					    cd->lower_setup.ip_fields.ipcse;
+				}
+				if (dsc->dd.upper.fields.popts &
+				    E1000_TXD_POPTS_TXSM)
+					ckinfo[1].ck_valid = 1;
+				if (dsc->dd.upper.fields.popts &
+				    E1000_TXD_POPTS_TXSM || tso) {
+					ckinfo[1].ck_start =
+					    cd->upper_setup.tcp_fields.tucss;
+					ckinfo[1].ck_off =
+					    cd->upper_setup.tcp_fields.tucso;
+					ckinfo[1].ck_len =
+					    cd->upper_setup.tcp_fields.tucse;
+				}
+			}
+			break;
+		}
+	}
+
+	if (iovcnt > I82545_MAX_TXSEGS) {
+		WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n",
+		    iovcnt, I82545_MAX_TXSEGS);
+		goto done;
+	}
+
+	hdrlen = vlen = 0;
+	/* Estimate writable space for VLAN header insertion. */
+	if ((sc->esc_CTRL & E1000_CTRL_VME) &&
+	    (dsc->td.lower.data & E1000_TXD_CMD_VLE)) {
+		hdrlen = ETHER_ADDR_LEN*2;
+		vlen = ETHER_VLAN_ENCAP_LEN;
+	}
+	if (!tso) {
+		/* Estimate required writable space for checksums. */
+		if (ckinfo[0].ck_valid)
+			hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2);
+		if (ckinfo[1].ck_valid)
+			hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2);
+		/* Round up writable space to the first vector. */
+		if (hdrlen != 0 && iov[0].iov_len > hdrlen &&
+		    iov[0].iov_len < hdrlen + 100)
+			hdrlen = iov[0].iov_len;
+	} else {
+		/* In case of TSO header length provided by software. */
+		hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len;
+	}
+
+	/* Allocate, fill and prepend writable header vector. */
+	if (hdrlen != 0) {
+		hdr = __builtin_alloca(hdrlen + vlen);
+		hdr += vlen;
+		for (left = hdrlen, hdrp = hdr; left > 0;
+		    left -= now, hdrp += now) {
+			now = MIN(left, iov->iov_len);
+			memcpy(hdrp, iov->iov_base, now);
+			iov->iov_base += now;
+			iov->iov_len -= now;
+			if (iov->iov_len == 0) {
+				iov++;
+				iovcnt--;
+			}
+		}
+		iov--;
+		iovcnt++;
+		iov->iov_base = hdr;
+		iov->iov_len = hdrlen;
+	}
+
+	/* Insert VLAN tag. */
+	if (vlen != 0) {
+		hdr -= ETHER_VLAN_ENCAP_LEN;
+		memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2);
+		hdrlen += ETHER_VLAN_ENCAP_LEN;
+		hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8;
+		hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff;
+		hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8;
+		hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff;
+		iov->iov_base = hdr;
+		iov->iov_len += ETHER_VLAN_ENCAP_LEN;
+		/* Correct checksum offsets after VLAN tag insertion. */
+		ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN;
+		ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN;
+		if (ckinfo[0].ck_len != 0)
+			ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN;
+		ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN;
+		ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN;
+		if (ckinfo[1].ck_len != 0)
+			ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN;
+	}
+
+	/* Simple non-TSO case. */
+	if (!tso) {
+		/* Calculate checksums and transmit. */
+		if (ckinfo[0].ck_valid)
+			e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]);
+		if (ckinfo[1].ck_valid)
+			e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]);
+		e82545_transmit_backend(sc, iov, iovcnt);
+		goto done;
+	}
+
+	/* Doing TSO. */
+	tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0;
+	mss = sc->esc_txctx.tcp_seg_setup.fields.mss;
+	paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff);
+	DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n",
+	    tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt);
+	ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]);
+	tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]);
+	ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off];
+	tcpcs = 0;
+	if (ckinfo[1].ck_valid)	/* Save partial pseudo-header checksum. */
+		tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off];
+	pv = 1;
+	pvoff = 0;
+	for (seg = 0, left = paylen; left > 0; seg++, left -= now) {
+		now = MIN(left, mss);
+
+		/* Construct IOVs for the segment. */
+		/* Include whole original header. */
+		tiov[0].iov_base = hdr;
+		tiov[0].iov_len = hdrlen;
+		tiovcnt = 1;
+		/* Include respective part of payload IOV. */
+		for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) {
+			nnow = MIN(nleft, iov[pv].iov_len - pvoff);
+			tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff;
+			tiov[tiovcnt++].iov_len = nnow;
+			if (pvoff + nnow == iov[pv].iov_len) {
+				pv++;
+				pvoff = 0;
+			} else
+				pvoff += nnow;
+		}
+		DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n",
+		    seg, hdrlen, now, tiovcnt);
+
+		/* Update IP header. */
+		if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) {
+			/* IPv4 -- set length and ID */
+			*(uint16_t *)&hdr[ckinfo[0].ck_start + 2] =
+			    htons(hdrlen - ckinfo[0].ck_start + now);
+			*(uint16_t *)&hdr[ckinfo[0].ck_start + 4] =
+			    htons(ipid + seg);
+		} else {
+			/* IPv6 -- set length */
+			*(uint16_t *)&hdr[ckinfo[0].ck_start + 4] =
+			    htons(hdrlen - ckinfo[0].ck_start - 40 +
+				  now);
+		}
+
+		/* Update pseudo-header checksum. */
+		tcpsum = tcpcs;
+		tcpsum += htons(hdrlen - ckinfo[1].ck_start + now);
+
+		/* Update TCP/UDP headers. */
+		if (tcp) {
+			/* Update sequence number and FIN/PUSH flags. */
+			*(uint32_t *)&hdr[ckinfo[1].ck_start + 4] =
+			    htonl(tcpseq + paylen - left);
+			if (now < left) {
+				hdr[ckinfo[1].ck_start + 13] &=
+				    ~(TH_FIN | TH_PUSH);
+			}
+		} else {
+			/* Update payload length. */
+			*(uint32_t *)&hdr[ckinfo[1].ck_start + 4] =
+			    hdrlen - ckinfo[1].ck_start + now;
+		}
+
+		/* Calculate checksums and transmit. */
+		if (ckinfo[0].ck_valid) {
+			*(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs;
+			e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]);
+		}
+		if (ckinfo[1].ck_valid) {
+			*(uint16_t *)&hdr[ckinfo[1].ck_off] =
+			    e82545_carry(tcpsum);
+			e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]);
+		}
+		e82545_transmit_backend(sc, tiov, tiovcnt);
+	}
+
+done:
+	head = (head + 1) % dsize;
+	e82545_transmit_done(sc, ohead, head, dsize, tdwb);
+
+	*rhead = head;
+	return (desc + 1);
+}
+
+static void
+e82545_tx_run(struct e82545_softc *sc)
+{
+	uint32_t cause;
+	uint16_t head, rhead, tail, size;
+	int lim, tdwb, sent;
+
+	head = sc->esc_TDH;
+	tail = sc->esc_TDT;
+	size = sc->esc_TDLEN / 16;
+	DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n",
+	    sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT);
+
+	pthread_mutex_unlock(&sc->esc_mtx);
+	rhead = head;
+	tdwb = 0;
+	for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) {
+		sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb);
+		if (sent == 0)
+			break;
+		head = rhead;
+	}
+	pthread_mutex_lock(&sc->esc_mtx);
+
+	sc->esc_TDH = head;
+	sc->esc_TDHr = rhead;
+	cause = 0;
+	if (tdwb)
+		cause |= E1000_ICR_TXDW;
+	if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT)
+		cause |= E1000_ICR_TXQE;
+	if (cause)
+		e82545_icr_assert(sc, cause);
+
+	DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n",
+	    sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT);
+}
+
+static void *
+e82545_tx_thread(void *param)
+{
+	struct e82545_softc *sc = param;
+
+	pthread_mutex_lock(&sc->esc_mtx);
+	for (;;) {
+		while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) {
+			if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT)
+				break;
+			sc->esc_tx_active = 0;
+			if (sc->esc_tx_enabled == 0)
+				pthread_cond_signal(&sc->esc_tx_cond);
+			pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx);
+		}
+		sc->esc_tx_active = 1;
+
+		/* Process some tx descriptors.  Lock dropped inside. */
+		e82545_tx_run(sc);
+	}
+}
+
+static void
+e82545_tx_start(struct e82545_softc *sc)
+{
+
+	if (sc->esc_tx_active == 0)
+		pthread_cond_signal(&sc->esc_tx_cond);
+}
+
+static void
+e82545_tx_enable(struct e82545_softc *sc)
+{
+
+	sc->esc_tx_enabled = 1;
+}
+
+static void
+e82545_tx_disable(struct e82545_softc *sc)
+{
+
+	sc->esc_tx_enabled = 0;
+	while (sc->esc_tx_active)
+		pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx);
+}
+
+static void
+e82545_rx_enable(struct e82545_softc *sc)
+{
+
+	sc->esc_rx_enabled = 1;
+}
+
+static void
+e82545_rx_disable(struct e82545_softc *sc)
+{
+
+	sc->esc_rx_enabled = 0;
+	while (sc->esc_rx_active)
+		pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx);
+}
+
+static void
+e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval)
+{
+        struct eth_uni *eu;
+	int idx;
+
+	idx = reg >> 1;
+	assert(idx < 15);
+
+	eu = &sc->esc_uni[idx];
+
+	if (reg & 0x1) {
+		/* RAH */
+		eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV);
+		eu->eu_addrsel = (wval >> 16) & 0x3;
+		eu->eu_eth.octet[5] = wval >> 8;
+		eu->eu_eth.octet[4] = wval;
+	} else {
+		/* RAL */
+		eu->eu_eth.octet[3] = wval >> 24;
+		eu->eu_eth.octet[2] = wval >> 16;
+		eu->eu_eth.octet[1] = wval >> 8;
+		eu->eu_eth.octet[0] = wval;
+	}
+}
+
+static uint32_t
+e82545_read_ra(struct e82545_softc *sc, int reg)
+{
+        struct eth_uni *eu;
+	uint32_t retval;
+	int idx;
+
+	idx = reg >> 1;
+	assert(idx < 15);
+
+	eu = &sc->esc_uni[idx];
+
+	if (reg & 0x1) {
+		/* RAH */
+		retval = (eu->eu_valid << 31) |
+			 (eu->eu_addrsel << 16) |
+			 (eu->eu_eth.octet[5] << 8) |
+			 eu->eu_eth.octet[4];
+	} else {
+		/* RAL */
+		retval = (eu->eu_eth.octet[3] << 24) |
+			 (eu->eu_eth.octet[2] << 16) |
+			 (eu->eu_eth.octet[1] << 8) |
+			 eu->eu_eth.octet[0];
+	}
+
+	return (retval);	
+}
+
+static void
+e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value)
+{
+	int ridx;
+	
+	if (offset & 0x3) {
+		DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value);
+		return;
+	}
+	DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value);
+
+	switch (offset) {
+	case E1000_CTRL:
+	case E1000_CTRL_DUP:
+		e82545_devctl(sc, value);
+		break;
+	case E1000_FCAL:
+		sc->esc_FCAL = value;
+		break;
+	case E1000_FCAH:
+		sc->esc_FCAH = value & ~0xFFFF0000;
+		break;
+	case E1000_FCT:
+		sc->esc_FCT = value & ~0xFFFF0000;
+		break;
+	case E1000_VET:
+		sc->esc_VET = value & ~0xFFFF0000;
+		break;
+	case E1000_FCTTV:
+		sc->esc_FCTTV = value & ~0xFFFF0000;
+		break;
+	case E1000_LEDCTL:
+		sc->esc_LEDCTL = value & ~0x30303000;
+		break;
+	case E1000_PBA:
+		sc->esc_PBA = value & 0x0000FF80;
+		break;
+	case E1000_ICR:
+	case E1000_ITR:
+	case E1000_ICS:
+	case E1000_IMS:
+	case E1000_IMC:
+		e82545_intr_write(sc, offset, value);
+		break;
+	case E1000_RCTL:
+		e82545_rx_ctl(sc, value);
+		break;
+	case E1000_FCRTL:
+		sc->esc_FCRTL = value & ~0xFFFF0007;
+		break;
+	case E1000_FCRTH:
+		sc->esc_FCRTH = value & ~0xFFFF0007;
+		break;
+	case E1000_RDBAL(0):
+		sc->esc_RDBAL = value & ~0xF;
+		if (sc->esc_rx_enabled) {
+			/* Apparently legal: update cached address */
+			e82545_rx_update_rdba(sc);
+		}
+		break;
+	case E1000_RDBAH(0):
+		assert(!sc->esc_rx_enabled);
+		sc->esc_RDBAH = value;
+		break;
+	case E1000_RDLEN(0):
+		assert(!sc->esc_rx_enabled);
+		sc->esc_RDLEN = value & ~0xFFF0007F;
+		break;
+	case E1000_RDH(0):
+		/* XXX should only ever be zero ? Range check ? */
+		sc->esc_RDH = value;
+		break;
+	case E1000_RDT(0):
+		/* XXX if this opens up the rx ring, do something ? */
+		sc->esc_RDT = value;
+		break;
+	case E1000_RDTR:
+		/* ignore FPD bit 31 */
+		sc->esc_RDTR = value & ~0xFFFF0000;
+		break;
+	case E1000_RXDCTL(0):
+		sc->esc_RXDCTL = value & ~0xFEC0C0C0;
+		break;
+	case E1000_RADV:
+		sc->esc_RADV = value & ~0xFFFF0000;
+		break;
+	case E1000_RSRPD:
+		sc->esc_RSRPD = value & ~0xFFFFF000;
+		break;
+	case E1000_RXCSUM:
+		sc->esc_RXCSUM = value & ~0xFFFFF800;
+		break;
+	case E1000_TXCW:
+		sc->esc_TXCW = value & ~0x3FFF0000;
+		break;
+	case E1000_TCTL:
+		e82545_tx_ctl(sc, value);
+		break;
+	case E1000_TIPG:
+		sc->esc_TIPG = value;
+		break;
+	case E1000_AIT:
+		sc->esc_AIT = value;
+		break;
+	case E1000_TDBAL(0):
+		sc->esc_TDBAL = value & ~0xF;
+		if (sc->esc_tx_enabled) {
+			/* Apparently legal */
+			e82545_tx_update_tdba(sc);
+		}
+		break;
+	case E1000_TDBAH(0):
+		//assert(!sc->esc_tx_enabled);		
+		sc->esc_TDBAH = value;
+		break;
+	case E1000_TDLEN(0):
+		//assert(!sc->esc_tx_enabled);
+		sc->esc_TDLEN = value & ~0xFFF0007F;
+		break;
+	case E1000_TDH(0):
+		//assert(!sc->esc_tx_enabled);
+		/* XXX should only ever be zero ? Range check ? */
+		sc->esc_TDHr = sc->esc_TDH = value;
+		break;
+	case E1000_TDT(0):
+		/* XXX range check ? */
+		sc->esc_TDT = value;
+		if (sc->esc_tx_enabled)
+			e82545_tx_start(sc);
+		break;
+	case E1000_TIDV:
+		sc->esc_TIDV = value & ~0xFFFF0000;
+		break;
+	case E1000_TXDCTL(0):
+		//assert(!sc->esc_tx_enabled);
+		sc->esc_TXDCTL = value & ~0xC0C0C0;
+		break;
+	case E1000_TADV:
+		sc->esc_TADV = value & ~0xFFFF0000;
+		break;
+	case E1000_RAL(0) ... E1000_RAH(15):
+		/* convert to u32 offset */
+		ridx = (offset - E1000_RAL(0)) >> 2;
+		e82545_write_ra(sc, ridx, value);
+		break;
+	case E1000_MTA ... (E1000_MTA + (127*4)):
+		sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value;
+		break;
+	case E1000_VFTA ... (E1000_VFTA + (127*4)):
+		sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value;
+		break;		
+	case E1000_EECD:
+	{
+		//DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value);
+		/* edge triggered low->high */
+		uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ?
+			0 : (value & E1000_EECD_SK));
+		uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS|
+					E1000_EECD_DI|E1000_EECD_REQ);
+		sc->eeprom_control &= ~eecd_mask;
+		sc->eeprom_control |= (value & eecd_mask);
+		/* grant/revoke immediately */
+		if (value & E1000_EECD_REQ) {
+			sc->eeprom_control |= E1000_EECD_GNT;
+		} else {
+                        sc->eeprom_control &= ~E1000_EECD_GNT;
+		}
+		if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) {
+			e82545_eecd_strobe(sc);
+		}
+		return;
+	}
+	case E1000_MDIC:
+	{
+		uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >>
+						E1000_MDIC_REG_SHIFT);
+		uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >>
+						E1000_MDIC_PHY_SHIFT);
+		sc->mdi_control =
+			(value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST));
+		if ((value & E1000_MDIC_READY) != 0) {
+			DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value);
+			return;
+		}
+		switch (value & E82545_MDIC_OP_MASK) {
+		case E1000_MDIC_OP_READ:
+			sc->mdi_control &= ~E82545_MDIC_DATA_MASK;
+			sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr);
+			break;
+		case E1000_MDIC_OP_WRITE:
+			e82545_write_mdi(sc, reg_addr, phy_addr,
+				value & E82545_MDIC_DATA_MASK);
+			break;
+		default:
+			DPRINTF("Unknown MDIC op: 0x%x\r\n", value);
+			return;
+		}
+		/* TODO: barrier? */
+		sc->mdi_control |= E1000_MDIC_READY;
+		if (value & E82545_MDIC_IE) {
+			// TODO: generate interrupt
+		}
+		return;
+	}
+	case E1000_MANC:
+	case E1000_STATUS: 
+		return;
+	default:
+		DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value);
+		return;
+	}
+}
+
+static uint32_t
+e82545_read_register(struct e82545_softc *sc, uint32_t offset)
+{
+	uint32_t retval;
+	int ridx;
+	
+	if (offset & 0x3) {
+		DPRINTF("Unaligned register read offset:0x%x\r\n", offset);
+		return 0;
+	}
+		
+	DPRINTF("Register read: 0x%x\r\n", offset);
+
+	switch (offset) {
+	case E1000_CTRL:
+		retval = sc->esc_CTRL;
+		break;
+	case E1000_STATUS:
+		retval = E1000_STATUS_FD | E1000_STATUS_LU |
+		    E1000_STATUS_SPEED_1000;
+		break;
+	case E1000_FCAL:
+		retval = sc->esc_FCAL;
+		break;
+	case E1000_FCAH:
+		retval = sc->esc_FCAH;
+		break;
+	case E1000_FCT:
+		retval = sc->esc_FCT;
+		break;
+	case E1000_VET:
+		retval = sc->esc_VET;
+		break;
+	case E1000_FCTTV:
+		retval = sc->esc_FCTTV;
+		break;
+	case E1000_LEDCTL:
+		retval = sc->esc_LEDCTL;
+		break;
+	case E1000_PBA:
+		retval = sc->esc_PBA;
+		break;
+	case E1000_ICR:
+	case E1000_ITR:
+	case E1000_ICS:
+	case E1000_IMS:
+	case E1000_IMC:
+		retval = e82545_intr_read(sc, offset);
+		break;
+	case E1000_RCTL:
+		retval = sc->esc_RCTL;
+		break;
+	case E1000_FCRTL:
+		retval = sc->esc_FCRTL;
+		break;
+	case E1000_FCRTH:
+		retval = sc->esc_FCRTH;
+		break;
+	case E1000_RDBAL(0):
+		retval = sc->esc_RDBAL;
+		break;
+	case E1000_RDBAH(0):
+		retval = sc->esc_RDBAH;
+		break;
+	case E1000_RDLEN(0):
+		retval = sc->esc_RDLEN;
+		break;
+	case E1000_RDH(0):
+		retval = sc->esc_RDH;
+		break;
+	case E1000_RDT(0):
+		retval = sc->esc_RDT;
+		break;
+	case E1000_RDTR:
+		retval = sc->esc_RDTR;
+		break;
+	case E1000_RXDCTL(0):
+		retval = sc->esc_RXDCTL;
+		break;
+	case E1000_RADV:
+		retval = sc->esc_RADV;
+		break;
+	case E1000_RSRPD:
+		retval = sc->esc_RSRPD;
+		break;
+	case E1000_RXCSUM:	       
+		retval = sc->esc_RXCSUM;
+		break;
+	case E1000_TXCW:
+		retval = sc->esc_TXCW;
+		break;
+	case E1000_TCTL:
+		retval = sc->esc_TCTL;
+		break;
+	case E1000_TIPG:
+		retval = sc->esc_TIPG;
+		break;
+	case E1000_AIT:
+		retval = sc->esc_AIT;
+		break;
+	case E1000_TDBAL(0):
+		retval = sc->esc_TDBAL;
+		break;
+	case E1000_TDBAH(0):
+		retval = sc->esc_TDBAH;
+		break;
+	case E1000_TDLEN(0):
+		retval = sc->esc_TDLEN;
+		break;
+	case E1000_TDH(0):
+		retval = sc->esc_TDH;
+		break;
+	case E1000_TDT(0):
+		retval = sc->esc_TDT;
+		break;
+	case E1000_TIDV:
+		retval = sc->esc_TIDV;
+		break;
+	case E1000_TXDCTL(0):
+		retval = sc->esc_TXDCTL;
+		break;
+	case E1000_TADV:
+		retval = sc->esc_TADV;
+		break;
+	case E1000_RAL(0) ... E1000_RAH(15):
+		/* convert to u32 offset */
+		ridx = (offset - E1000_RAL(0)) >> 2;
+		retval = e82545_read_ra(sc, ridx);
+		break;
+	case E1000_MTA ... (E1000_MTA + (127*4)):
+		retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2];
+		break;
+	case E1000_VFTA ... (E1000_VFTA + (127*4)):
+		retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2];
+		break;		
+	case E1000_EECD:
+		//DPRINTF("EECD read %x\r\n", sc->eeprom_control);
+		retval = sc->eeprom_control;
+		break;
+	case E1000_MDIC:
+		retval = sc->mdi_control;
+		break;
+	case E1000_MANC:
+		retval = 0;
+		break;
+	/* stats that we emulate. */
+	case E1000_MPC:
+		retval = sc->missed_pkt_count;
+		break;
+	case E1000_PRC64:
+		retval = sc->pkt_rx_by_size[0];
+		break;
+	case E1000_PRC127:
+		retval = sc->pkt_rx_by_size[1];
+		break;
+	case E1000_PRC255:
+		retval = sc->pkt_rx_by_size[2];
+		break;
+	case E1000_PRC511:
+		retval = sc->pkt_rx_by_size[3];
+		break;
+	case E1000_PRC1023:
+		retval = sc->pkt_rx_by_size[4];
+		break;
+	case E1000_PRC1522:
+		retval = sc->pkt_rx_by_size[5];
+		break;
+	case E1000_GPRC:
+		retval = sc->good_pkt_rx_count;
+		break;
+	case E1000_BPRC:
+		retval = sc->bcast_pkt_rx_count;
+		break;
+	case E1000_MPRC:
+		retval = sc->mcast_pkt_rx_count;
+		break;
+	case E1000_GPTC:
+	case E1000_TPT:
+		retval = sc->good_pkt_tx_count;
+		break;
+	case E1000_GORCL:
+		retval = (uint32_t)sc->good_octets_rx;
+		break;
+	case E1000_GORCH:
+		retval = (uint32_t)(sc->good_octets_rx >> 32);
+		break;
+	case E1000_TOTL:
+	case E1000_GOTCL:
+		retval = (uint32_t)sc->good_octets_tx;
+		break;
+	case E1000_TOTH:
+	case E1000_GOTCH:
+		retval = (uint32_t)(sc->good_octets_tx >> 32);
+		break;
+	case E1000_ROC:
+		retval = sc->oversize_rx_count;
+		break;
+	case E1000_TORL:
+		retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets);
+		break;
+	case E1000_TORH:
+		retval = (uint32_t)((sc->good_octets_rx +
+		    sc->missed_octets) >> 32);
+		break;
+	case E1000_TPR:
+		retval = sc->good_pkt_rx_count + sc->missed_pkt_count +
+		    sc->oversize_rx_count;
+		break;
+	case E1000_PTC64:
+		retval = sc->pkt_tx_by_size[0];
+		break;
+	case E1000_PTC127:
+		retval = sc->pkt_tx_by_size[1];
+		break;
+	case E1000_PTC255:
+		retval = sc->pkt_tx_by_size[2];
+		break;
+	case E1000_PTC511:
+		retval = sc->pkt_tx_by_size[3];
+		break;
+	case E1000_PTC1023:
+		retval = sc->pkt_tx_by_size[4];
+		break;
+	case E1000_PTC1522:
+		retval = sc->pkt_tx_by_size[5];
+		break;
+	case E1000_MPTC:
+		retval = sc->mcast_pkt_tx_count;
+		break;
+	case E1000_BPTC:
+		retval = sc->bcast_pkt_tx_count;
+		break;
+	case E1000_TSCTC:
+		retval = sc->tso_tx_count;
+		break;
+	/* stats that are always 0. */
+	case E1000_CRCERRS:
+	case E1000_ALGNERRC:
+	case E1000_SYMERRS:
+	case E1000_RXERRC:
+	case E1000_SCC:
+	case E1000_ECOL:
+	case E1000_MCC:
+	case E1000_LATECOL:
+	case E1000_COLC:
+	case E1000_DC:
+	case E1000_TNCRS:
+	case E1000_SEC:
+	case E1000_CEXTERR:
+	case E1000_RLEC:
+	case E1000_XONRXC:
+	case E1000_XONTXC:
+	case E1000_XOFFRXC:
+	case E1000_XOFFTXC:
+	case E1000_FCRUC:
+	case E1000_RNBC:
+	case E1000_RUC:
+	case E1000_RFC:
+	case E1000_RJC:
+	case E1000_MGTPRC:
+	case E1000_MGTPDC:
+	case E1000_MGTPTC:
+	case E1000_TSCTFC:
+		retval = 0;
+		break;
+	default:
+		DPRINTF("Unknown read register: 0x%x\r\n", offset);
+		retval = 0;
+		break;
+	}
+
+	return (retval);
+}
+
+static void
+e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	     uint64_t offset, int size, uint64_t value)
+{
+	struct e82545_softc *sc;
+
+	//DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size);
+
+	sc = pi->pi_arg;
+
+	pthread_mutex_lock(&sc->esc_mtx);
+
+	switch (baridx) {
+	case E82545_BAR_IO:
+		switch (offset) {
+		case E82545_IOADDR:
+			if (size != 4) {
+				DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value);
+			} else
+				sc->io_addr = (uint32_t)value;
+			break;
+		case E82545_IODATA:
+			if (size != 4) {
+				DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value);
+			} else if (sc->io_addr > E82545_IO_REGISTER_MAX) {
+				DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value);
+			} else
+				e82545_write_register(sc, sc->io_addr,
+						      (uint32_t)value);
+			break;
+		default:
+			DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size);
+			break;
+		}
+		break;
+	case E82545_BAR_REGISTER:
+		if (size != 4) {
+			DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value);
+		} else
+			e82545_write_register(sc, (uint32_t)offset,
+					      (uint32_t)value);
+		break;
+	default:
+		DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n",
+			baridx, offset, value, size);
+	}
+
+	pthread_mutex_unlock(&sc->esc_mtx);
+}
+
+static uint64_t
+e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	    uint64_t offset, int size)
+{
+	struct e82545_softc *sc;
+	uint64_t retval;
+	
+	//DPRINTF("Read  bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size);
+	sc = pi->pi_arg;
+	retval = 0;
+
+	pthread_mutex_lock(&sc->esc_mtx);
+
+	switch (baridx) {
+	case E82545_BAR_IO:
+		switch (offset) {
+		case E82545_IOADDR:
+			if (size != 4) {
+				DPRINTF("Wrong io addr read sz:%d\r\n", size);
+			} else
+				retval = sc->io_addr;
+			break;
+		case E82545_IODATA:
+			if (size != 4) {
+				DPRINTF("Wrong io data read sz:%d\r\n", size);
+			}
+			if (sc->io_addr > E82545_IO_REGISTER_MAX) {
+				DPRINTF("Non-register io read addr:0x%x\r\n",
+					sc->io_addr);
+			} else
+				retval = e82545_read_register(sc, sc->io_addr);
+			break;
+		default:
+			DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n",
+				offset, size);
+			break;
+		}
+		break;
+	case E82545_BAR_REGISTER:
+		if (size != 4) {
+			DPRINTF("Wrong register read size:%d offset:0x%lx\r\n",
+				size, offset);
+		} else
+			retval = e82545_read_register(sc, (uint32_t)offset);
+		break;
+	default:
+		DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n",
+			baridx, offset, size);
+		break;
+	}
+
+	pthread_mutex_unlock(&sc->esc_mtx);
+
+	return (retval);
+}
+
+static void
+e82545_reset(struct e82545_softc *sc, int drvr)
+{
+	int i;
+
+	e82545_rx_disable(sc);
+	e82545_tx_disable(sc);
+
+	/* clear outstanding interrupts */
+	if (sc->esc_irq_asserted)
+		pci_lintr_deassert(sc->esc_pi);
+
+	/* misc */
+	if (!drvr) {
+		sc->esc_FCAL = 0;
+		sc->esc_FCAH = 0;
+		sc->esc_FCT = 0;
+		sc->esc_VET = 0;
+		sc->esc_FCTTV = 0;
+	}
+	sc->esc_LEDCTL = 0x07061302;
+	sc->esc_PBA = 0x00100030;
+	
+	/* start nvm in opcode mode. */
+	sc->nvm_opaddr = 0;
+	sc->nvm_mode = E82545_NVM_MODE_OPADDR;
+	sc->nvm_bits = E82545_NVM_OPADDR_BITS;
+	sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN;
+	e82545_init_eeprom(sc);
+
+	/* interrupt */
+	sc->esc_ICR = 0;
+	sc->esc_ITR = 250;
+	sc->esc_ICS = 0;
+	sc->esc_IMS = 0;
+	sc->esc_IMC = 0;
+		
+	/* L2 filters */
+	if (!drvr) {
+		memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan));
+		memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast));
+		memset(sc->esc_uni, 0, sizeof(sc->esc_uni));
+
+		/* XXX not necessary on 82545 ?? */
+		sc->esc_uni[0].eu_valid = 1;
+		memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet,
+		    ETHER_ADDR_LEN);
+	} else {
+		/* Clear RAH valid bits */
+		for (i = 0; i < 16; i++)
+			sc->esc_uni[i].eu_valid = 0;
+	}
+	
+	/* receive */
+	if (!drvr) {
+		sc->esc_RDBAL = 0;
+		sc->esc_RDBAH = 0;
+	}
+	sc->esc_RCTL = 0;
+	sc->esc_FCRTL = 0;
+	sc->esc_FCRTH = 0;
+	sc->esc_RDLEN = 0;
+	sc->esc_RDH = 0;
+	sc->esc_RDT = 0;
+	sc->esc_RDTR = 0;
+	sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */
+	sc->esc_RADV = 0;
+	sc->esc_RXCSUM = 0;
+
+	/* transmit */
+	if (!drvr) {
+		sc->esc_TDBAL = 0;
+		sc->esc_TDBAH = 0;
+		sc->esc_TIPG = 0;
+		sc->esc_AIT = 0;
+		sc->esc_TIDV = 0;
+		sc->esc_TADV = 0;
+	}
+	sc->esc_tdba = 0;
+	sc->esc_txdesc = NULL;
+	sc->esc_TXCW = 0;
+	sc->esc_TCTL = 0;
+	sc->esc_TDLEN = 0;
+	sc->esc_TDT = 0;
+	sc->esc_TDHr = sc->esc_TDH = 0;
+	sc->esc_TXDCTL = 0;
+}
+
+static void
+e82545_open_tap(struct e82545_softc *sc, char *opts)
+{
+	char tbuf[80];
+	
+	if (opts == NULL) {
+		sc->esc_tapfd = -1;
+		return;
+	}
+
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, opts, sizeof(tbuf));
+
+	sc->esc_tapfd = open(tbuf, O_RDWR);
+	if (sc->esc_tapfd == -1) {
+		DPRINTF("unable to open tap device %s\n", opts);
+		exit(1);
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	int opt = 1;
+	if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) {
+		WPRINTF("tap device O_NONBLOCK failed: %d\n", errno);
+		close(sc->esc_tapfd);
+		sc->esc_tapfd = -1;
+	}
+
+	sc->esc_mevp = mevent_add(sc->esc_tapfd,
+				  EVF_READ,
+				  e82545_tap_callback,
+				  sc);
+	if (sc->esc_mevp == NULL) {
+		DPRINTF("Could not register mevent %d\n", EVF_READ);
+		close(sc->esc_tapfd);
+		sc->esc_tapfd = -1;
+	}
+}
+
+static int
+e82545_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+	struct ether_addr *ea;
+	char *tmpstr;
+	char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+	tmpstr = strsep(&mac_str,"=");
+	if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+		ea = ether_aton(mac_str);
+		if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+		    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+			fprintf(stderr, "Invalid MAC %s\n", mac_str);
+			return (1);
+		} else
+			memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+	}
+	return (0);
+}
+
+static int
+e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	DPRINTF("Loading with options: %s\r\n", opts);
+
+	MD5_CTX mdctx;
+	unsigned char digest[16];
+	char nstr[80];
+	struct e82545_softc *sc;
+	char *devname;
+	char *vtopts;
+	int mac_provided;
+
+	/* Setup our softc */
+	sc = calloc(sizeof(*sc), 1);
+
+	pi->pi_arg = sc;
+	sc->esc_pi = pi;
+	sc->esc_ctx = ctx;
+
+	pthread_mutex_init(&sc->esc_mtx, NULL);
+	pthread_cond_init(&sc->esc_rx_cond, NULL);
+	pthread_cond_init(&sc->esc_tx_cond, NULL);
+	pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc);
+	snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot,
+	    pi->pi_func);
+        pthread_set_name_np(sc->esc_tx_tid, nstr);
+
+	pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL);
+	pci_set_cfgdata8(pi,  PCIR_CLASS, PCIC_NETWORK);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL);
+
+	pci_set_cfgdata8(pi,  PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
+	pci_set_cfgdata8(pi,  PCIR_INTPIN, 0x1);
+	
+	/* TODO: this card also supports msi, but the freebsd driver for it
+	 * does not, so I have not implemented it. */
+	pci_lintr_request(pi);
+
+	pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32,
+		E82545_BAR_REGISTER_LEN);
+	pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32,
+		E82545_BAR_FLASH_LEN);
+	pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO,
+		E82545_BAR_IO_LEN);
+
+	/*
+	 * Attempt to open the tap device and read the MAC address
+	 * if specified.  Copied from virtio-net, slightly modified.
+	 */
+	mac_provided = 0;
+	sc->esc_tapfd = -1;
+	if (opts != NULL) {
+		int err;
+
+		devname = vtopts = strdup(opts);
+		(void) strsep(&vtopts, ",");
+
+		if (vtopts != NULL) {
+			err = e82545_parsemac(vtopts, sc->esc_mac.octet);
+			if (err != 0) {
+				free(devname);
+				return (err);
+			}
+			mac_provided = 1;
+		}
+
+		if (strncmp(devname, "tap", 3) == 0 ||
+		    strncmp(devname, "vmnet", 5) == 0)
+			e82545_open_tap(sc, devname);
+
+		free(devname);
+	}
+
+	/*
+	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
+	 * followed by an MD5 of the PCI slot/func number and dev name
+	 */
+	if (!mac_provided) {
+		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+		    pi->pi_func, vmname);
+
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, nstr, strlen(nstr));
+		MD5Final(digest, &mdctx);
+
+		sc->esc_mac.octet[0] = 0x00;
+		sc->esc_mac.octet[1] = 0xa0;
+		sc->esc_mac.octet[2] = 0x98;
+		sc->esc_mac.octet[3] = digest[0];
+		sc->esc_mac.octet[4] = digest[1];
+		sc->esc_mac.octet[5] = digest[2];
+	}
+
+	/* H/w initiated reset */
+	e82545_reset(sc, 0);
+
+	return (0);
+}
+
+struct pci_devemu pci_de_e82545 = {
+	.pe_emu = 	"e1000",
+	.pe_init =	e82545_init,
+	.pe_barwrite =	e82545_write,
+	.pe_barread =	e82545_read
+};
+PCI_EMUL_SET(pci_de_e82545);
+


Property changes on: trunk/usr.sbin/bhyve/pci_e82545.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_emul.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_emul.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_emul.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,2103 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 302705 2016-07-13 06:09:34Z ngie $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 302705 2016-07-13 06:09:34Z ngie $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "bhyverun.h"
+#include "inout.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+#define CONF1_ADDR_PORT    0x0cf8
+#define CONF1_DATA_PORT    0x0cfc
+
+#define CONF1_ENABLE	   0x80000000ul
+
+#define	MAXBUSES	(PCI_BUSMAX + 1)
+#define MAXSLOTS	(PCI_SLOTMAX + 1)
+#define	MAXFUNCS	(PCI_FUNCMAX + 1)
+
+struct funcinfo {
+	char	*fi_name;
+	char	*fi_param;
+	struct pci_devinst *fi_devi;
+};
+
+struct intxinfo {
+	int	ii_count;
+	int	ii_pirq_pin;
+	int	ii_ioapic_irq;
+};
+
+struct slotinfo {
+	struct intxinfo si_intpins[4];
+	struct funcinfo si_funcs[MAXFUNCS];
+};
+
+struct businfo {
+	uint16_t iobase, iolimit;		/* I/O window */
+	uint32_t membase32, memlimit32;		/* mmio window below 4GB */
+	uint64_t membase64, memlimit64;		/* mmio window above 4GB */
+	struct slotinfo slotinfo[MAXSLOTS];
+};
+
+static struct businfo *pci_businfo[MAXBUSES];
+
+SET_DECLARE(pci_devemu_set, struct pci_devemu);
+
+static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_membase64;
+
+#define	PCI_EMUL_IOBASE		0x2000
+#define	PCI_EMUL_IOLIMIT	0x10000
+
+#define	PCI_EMUL_ECFG_BASE	0xE0000000		    /* 3.5GB */
+#define	PCI_EMUL_ECFG_SIZE	(MAXBUSES * 1024 * 1024)    /* 1MB per bus */
+SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
+
+#define	PCI_EMUL_MEMLIMIT32	PCI_EMUL_ECFG_BASE
+
+#define	PCI_EMUL_MEMBASE64	0xD000000000UL
+#define	PCI_EMUL_MEMLIMIT64	0xFD00000000UL
+
+static struct pci_devemu *pci_emul_finddev(char *name);
+static void pci_lintr_route(struct pci_devinst *pi);
+static void pci_lintr_update(struct pci_devinst *pi);
+static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot,
+    int func, int coff, int bytes, uint32_t *val);
+
+static __inline void
+CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
+{
+
+	if (bytes == 1)
+		pci_set_cfgdata8(pi, coff, val);
+	else if (bytes == 2)
+		pci_set_cfgdata16(pi, coff, val);
+	else
+		pci_set_cfgdata32(pi, coff, val);
+}
+
+static __inline uint32_t
+CFGREAD(struct pci_devinst *pi, int coff, int bytes)
+{
+
+	if (bytes == 1)
+		return (pci_get_cfgdata8(pi, coff));
+	else if (bytes == 2)
+		return (pci_get_cfgdata16(pi, coff));
+	else
+		return (pci_get_cfgdata32(pi, coff));
+}
+
+/*
+ * I/O access
+ */
+
+/*
+ * Slot options are in the form:
+ *
+ *  <bus>:<slot>:<func>,<emul>[,<config>]
+ *  <slot>[:<func>],<emul>[,<config>]
+ *
+ *  slot is 0..31
+ *  func is 0..7
+ *  emul is a string describing the type of PCI device e.g. virtio-net
+ *  config is an optional string, depending on the device, that can be
+ *  used for configuration.
+ *   Examples are:
+ *     1,virtio-net,tap0
+ *     3:0,dummy
+ */
+static void
+pci_parse_slot_usage(char *aopt)
+{
+
+	fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt);
+}
+
+int
+pci_parse_slot(char *opt)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	char *emul, *config, *str, *cp;
+	int error, bnum, snum, fnum;
+
+	error = -1;
+	str = strdup(opt);
+
+	emul = config = NULL;
+	if ((cp = strchr(str, ',')) != NULL) {
+		*cp = '\0';
+		emul = cp + 1;
+		if ((cp = strchr(emul, ',')) != NULL) {
+			*cp = '\0';
+			config = cp + 1;
+		}
+	} else {
+		pci_parse_slot_usage(opt);
+		goto done;
+	}
+
+	/* <bus>:<slot>:<func> */
+	if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
+		bnum = 0;
+		/* <slot>:<func> */
+		if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
+			fnum = 0;
+			/* <slot> */
+			if (sscanf(str, "%d", &snum) != 1) {
+				snum = -1;
+			}
+		}
+	}
+
+	if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
+	    fnum < 0 || fnum >= MAXFUNCS) {
+		pci_parse_slot_usage(opt);
+		goto done;
+	}
+
+	if (pci_businfo[bnum] == NULL)
+		pci_businfo[bnum] = calloc(1, sizeof(struct businfo));
+
+	bi = pci_businfo[bnum];
+	si = &bi->slotinfo[snum];
+
+	if (si->si_funcs[fnum].fi_name != NULL) {
+		fprintf(stderr, "pci slot %d:%d already occupied!\n",
+			snum, fnum);
+		goto done;
+	}
+
+	if (pci_emul_finddev(emul) == NULL) {
+		fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n",
+			snum, fnum, emul);
+		goto done;
+	}
+
+	error = 0;
+	si->si_funcs[fnum].fi_name = emul;
+	si->si_funcs[fnum].fi_param = config;
+
+done:
+	if (error)
+		free(str);
+
+	return (error);
+}
+
+static int
+pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
+{
+
+	if (offset < pi->pi_msix.pba_offset)
+		return (0);
+
+	if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
+		return (0);
+	}
+
+	return (1);
+}
+
+int
+pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
+		     uint64_t value)
+{
+	int msix_entry_offset;
+	int tab_index;
+	char *dest;
+
+	/* support only 4 or 8 byte writes */
+	if (size != 4 && size != 8)
+		return (-1);
+
+	/*
+	 * Return if table index is beyond what device supports
+	 */
+	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
+	if (tab_index >= pi->pi_msix.table_count)
+		return (-1);
+
+	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	/* support only aligned writes */
+	if ((msix_entry_offset % size) != 0)
+		return (-1);
+
+	dest = (char *)(pi->pi_msix.table + tab_index);
+	dest += msix_entry_offset;
+
+	if (size == 4)
+		*((uint32_t *)dest) = value;
+	else
+		*((uint64_t *)dest) = value;
+
+	return (0);
+}
+
+uint64_t
+pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
+{
+	char *dest;
+	int msix_entry_offset;
+	int tab_index;
+	uint64_t retval = ~0;
+
+	/*
+	 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
+	 * table but we also allow 1 byte access to accommodate reads from
+	 * ddb.
+	 */
+	if (size != 1 && size != 4 && size != 8)
+		return (retval);
+
+	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	/* support only aligned reads */
+	if ((msix_entry_offset % size) != 0) {
+		return (retval);
+	}
+
+	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
+
+	if (tab_index < pi->pi_msix.table_count) {
+		/* valid MSI-X Table access */
+		dest = (char *)(pi->pi_msix.table + tab_index);
+		dest += msix_entry_offset;
+
+		if (size == 1)
+			retval = *((uint8_t *)dest);
+		else if (size == 4)
+			retval = *((uint32_t *)dest);
+		else
+			retval = *((uint64_t *)dest);
+	} else if (pci_valid_pba_offset(pi, offset)) {
+		/* return 0 for PBA access */
+		retval = 0;
+	}
+
+	return (retval);
+}
+
+int
+pci_msix_table_bar(struct pci_devinst *pi)
+{
+
+	if (pi->pi_msix.table != NULL)
+		return (pi->pi_msix.table_bar);
+	else
+		return (-1);
+}
+
+int
+pci_msix_pba_bar(struct pci_devinst *pi)
+{
+
+	if (pi->pi_msix.table != NULL)
+		return (pi->pi_msix.pba_bar);
+	else
+		return (-1);
+}
+
+static int
+pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		    uint32_t *eax, void *arg)
+{
+	struct pci_devinst *pdi = arg;
+	struct pci_devemu *pe = pdi->pi_d;
+	uint64_t offset;
+	int i;
+
+	for (i = 0; i <= PCI_BARMAX; i++) {
+		if (pdi->pi_bar[i].type == PCIBAR_IO &&
+		    port >= pdi->pi_bar[i].addr &&
+		    port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
+			offset = port - pdi->pi_bar[i].addr;
+			if (in)
+				*eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
+							 offset, bytes);
+			else
+				(*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
+						   bytes, *eax);
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+static int
+pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+		     int size, uint64_t *val, void *arg1, long arg2)
+{
+	struct pci_devinst *pdi = arg1;
+	struct pci_devemu *pe = pdi->pi_d;
+	uint64_t offset;
+	int bidx = (int) arg2;
+
+	assert(bidx <= PCI_BARMAX);
+	assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
+	       pdi->pi_bar[bidx].type == PCIBAR_MEM64);
+	assert(addr >= pdi->pi_bar[bidx].addr &&
+	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
+
+	offset = addr - pdi->pi_bar[bidx].addr;
+
+	if (dir == MEM_F_WRITE) {
+		if (size == 8) {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   4, *val & 0xffffffff);
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4,
+					   4, *val >> 32);
+		} else {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   size, *val);
+		}
+	} else {
+		if (size == 8) {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, 4);
+			*val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						  offset + 4, 4) << 32;
+		} else {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, size);
+		}
+	}
+
+	return (0);
+}
+
+
+static int
+pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
+			uint64_t *addr)
+{
+	uint64_t base;
+
+	assert((size & (size - 1)) == 0);	/* must be a power of 2 */
+
+	base = roundup2(*baseptr, size);
+
+	if (base + size <= limit) {
+		*addr = base;
+		*baseptr = base + size;
+		return (0);
+	} else
+		return (-1);
+}
+
+int
+pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
+		   uint64_t size)
+{
+
+	return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
+}
+
+/*
+ * Register (or unregister) the MMIO or I/O region associated with the BAR
+ * register 'idx' of an emulated pci device.
+ */
+static void
+modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
+{
+	int error;
+	struct inout_port iop;
+	struct mem_range mr;
+
+	switch (pi->pi_bar[idx].type) {
+	case PCIBAR_IO:
+		bzero(&iop, sizeof(struct inout_port));
+		iop.name = pi->pi_name;
+		iop.port = pi->pi_bar[idx].addr;
+		iop.size = pi->pi_bar[idx].size;
+		if (registration) {
+			iop.flags = IOPORT_F_INOUT;
+			iop.handler = pci_emul_io_handler;
+			iop.arg = pi;
+			error = register_inout(&iop);
+		} else 
+			error = unregister_inout(&iop);
+		break;
+	case PCIBAR_MEM32:
+	case PCIBAR_MEM64:
+		bzero(&mr, sizeof(struct mem_range));
+		mr.name = pi->pi_name;
+		mr.base = pi->pi_bar[idx].addr;
+		mr.size = pi->pi_bar[idx].size;
+		if (registration) {
+			mr.flags = MEM_F_RW;
+			mr.handler = pci_emul_mem_handler;
+			mr.arg1 = pi;
+			mr.arg2 = idx;
+			error = register_mem(&mr);
+		} else
+			error = unregister_mem(&mr);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	assert(error == 0);
+}
+
+static void
+unregister_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 0);
+}
+
+static void
+register_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 1);
+}
+
+/* Are we decoding i/o port accesses for the emulated pci device? */
+static int
+porten(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_PORTEN);
+}
+
+/* Are we decoding memory accesses for the emulated pci device? */
+static int
+memen(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_MEMEN);
+}
+
+/*
+ * Update the MMIO or I/O address that is decoded by the BAR register.
+ *
+ * If the pci device has enabled the address space decoding then intercept
+ * the address range decoded by the BAR register.
+ */
+static void
+update_bar_address(struct  pci_devinst *pi, uint64_t addr, int idx, int type)
+{
+	int decode;
+
+	if (pi->pi_bar[idx].type == PCIBAR_IO)
+		decode = porten(pi);
+	else
+		decode = memen(pi);
+
+	if (decode)
+		unregister_bar(pi, idx);
+
+	switch (type) {
+	case PCIBAR_IO:
+	case PCIBAR_MEM32:
+		pi->pi_bar[idx].addr = addr;
+		break;
+	case PCIBAR_MEM64:
+		pi->pi_bar[idx].addr &= ~0xffffffffUL;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	case PCIBAR_MEMHI64:
+		pi->pi_bar[idx].addr &= 0xffffffff;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	default:
+		assert(0);
+	}
+
+	if (decode)
+		register_bar(pi, idx);
+}
+
+int
+pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
+		    enum pcibar_type type, uint64_t size)
+{
+	int error;
+	uint64_t *baseptr, limit, addr, mask, lobits, bar;
+
+	assert(idx >= 0 && idx <= PCI_BARMAX);
+
+	if ((size & (size - 1)) != 0)
+		size = 1UL << flsl(size);	/* round up to a power of 2 */
+
+	/* Enforce minimum BAR sizes required by the PCI standard */
+	if (type == PCIBAR_IO) {
+		if (size < 4)
+			size = 4;
+	} else {
+		if (size < 16)
+			size = 16;
+	}
+
+	switch (type) {
+	case PCIBAR_NONE:
+		baseptr = NULL;
+		addr = mask = lobits = 0;
+		break;
+	case PCIBAR_IO:
+		baseptr = &pci_emul_iobase;
+		limit = PCI_EMUL_IOLIMIT;
+		mask = PCIM_BAR_IO_BASE;
+		lobits = PCIM_BAR_IO_SPACE;
+		break;
+	case PCIBAR_MEM64:
+		/*
+		 * XXX
+		 * Some drivers do not work well if the 64-bit BAR is allocated
+		 * above 4GB. Allow for this by allocating small requests under
+		 * 4GB unless then allocation size is larger than some arbitrary
+		 * number (32MB currently).
+		 */
+		if (size > 32 * 1024 * 1024) {
+			/*
+			 * XXX special case for device requiring peer-peer DMA
+			 */
+			if (size == 0x100000000UL)
+				baseptr = &hostbase;
+			else
+				baseptr = &pci_emul_membase64;
+			limit = PCI_EMUL_MEMLIMIT64;
+			mask = PCIM_BAR_MEM_BASE;
+			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
+				 PCIM_BAR_MEM_PREFETCH;
+			break;
+		} else {
+			baseptr = &pci_emul_membase32;
+			limit = PCI_EMUL_MEMLIMIT32;
+			mask = PCIM_BAR_MEM_BASE;
+			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+		}
+		break;
+	case PCIBAR_MEM32:
+		baseptr = &pci_emul_membase32;
+		limit = PCI_EMUL_MEMLIMIT32;
+		mask = PCIM_BAR_MEM_BASE;
+		lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+		break;
+	default:
+		printf("pci_emul_alloc_base: invalid bar type %d\n", type);
+		assert(0);
+	}
+
+	if (baseptr != NULL) {
+		error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
+		if (error != 0)
+			return (error);
+	}
+
+	pdi->pi_bar[idx].type = type;
+	pdi->pi_bar[idx].addr = addr;
+	pdi->pi_bar[idx].size = size;
+
+	/* Initialize the BAR register in config space */
+	bar = (addr & mask) | lobits;
+	pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
+
+	if (type == PCIBAR_MEM64) {
+		assert(idx + 1 <= PCI_BARMAX);
+		pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
+		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
+	}
+	
+	register_bar(pdi, idx);
+
+	return (0);
+}
+
+#define	CAP_START_OFFSET	0x40
+static int
+pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
+{
+	int i, capoff, reallen;
+	uint16_t sts;
+
+	assert(caplen > 0);
+
+	reallen = roundup2(caplen, 4);		/* dword aligned */
+
+	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
+		capoff = CAP_START_OFFSET;
+	else
+		capoff = pi->pi_capend + 1;
+
+	/* Check if we have enough space */
+	if (capoff + reallen > PCI_REGMAX + 1)
+		return (-1);
+
+	/* Set the previous capability pointer */
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
+		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
+		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
+	} else
+		pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
+
+	/* Copy the capability */
+	for (i = 0; i < caplen; i++)
+		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
+
+	/* Set the next capability pointer */
+	pci_set_cfgdata8(pi, capoff + 1, 0);
+
+	pi->pi_prevcap = capoff;
+	pi->pi_capend = capoff + reallen - 1;
+	return (0);
+}
+
+static struct pci_devemu *
+pci_emul_finddev(char *name)
+{
+	struct pci_devemu **pdpp, *pdp;
+
+	SET_FOREACH(pdpp, pci_devemu_set) {
+		pdp = *pdpp;
+		if (!strcmp(pdp->pe_emu, name)) {
+			return (pdp);
+		}
+	}
+
+	return (NULL);
+}
+
+static int
+pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
+    int func, struct funcinfo *fi)
+{
+	struct pci_devinst *pdi;
+	int err;
+
+	pdi = calloc(1, sizeof(struct pci_devinst));
+
+	pdi->pi_vmctx = ctx;
+	pdi->pi_bus = bus;
+	pdi->pi_slot = slot;
+	pdi->pi_func = func;
+	pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
+	pdi->pi_lintr.pin = 0;
+	pdi->pi_lintr.state = IDLE;
+	pdi->pi_lintr.pirq_pin = 0;
+	pdi->pi_lintr.ioapic_irq = 0;
+	pdi->pi_d = pde;
+	snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot);
+
+	/* Disable legacy interrupts */
+	pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
+	pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
+
+	pci_set_cfgdata8(pdi, PCIR_COMMAND,
+		    PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
+
+	err = (*pde->pe_init)(ctx, pdi, fi->fi_param);
+	if (err == 0)
+		fi->fi_devi = pdi;
+	else
+		free(pdi);
+
+	return (err);
+}
+
+void
+pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
+{
+	int mmc;
+
+	/* Number of msi messages must be a power of 2 between 1 and 32 */
+	assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
+	mmc = ffs(msgnum) - 1;
+
+	bzero(msicap, sizeof(struct msicap));
+	msicap->capid = PCIY_MSI;
+	msicap->nextptr = nextptr;
+	msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
+}
+
+int
+pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
+{
+	struct msicap msicap;
+
+	pci_populate_msicap(&msicap, msgnum, 0);
+
+	return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
+}
+
+static void
+pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
+		     uint32_t msix_tab_size)
+{
+
+	assert(msix_tab_size % 4096 == 0);
+
+	bzero(msixcap, sizeof(struct msixcap));
+	msixcap->capid = PCIY_MSIX;
+
+	/*
+	 * Message Control Register, all fields set to
+	 * zero except for the Table Size.
+	 * Note: Table size N is encoded as N-1
+	 */
+	msixcap->msgctrl = msgnum - 1;
+
+	/*
+	 * MSI-X BAR setup:
+	 * - MSI-X table start at offset 0
+	 * - PBA table starts at a 4K aligned offset after the MSI-X table
+	 */
+	msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
+	msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
+}
+
+static void
+pci_msix_table_init(struct pci_devinst *pi, int table_entries)
+{
+	int i, table_size;
+
+	assert(table_entries > 0);
+	assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
+
+	table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
+	pi->pi_msix.table = calloc(1, table_size);
+
+	/* set mask bit of vector control register */
+	for (i = 0; i < table_entries; i++)
+		pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
+}
+
+int
+pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
+{
+	uint32_t tab_size;
+	struct msixcap msixcap;
+
+	assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
+	assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
+	
+	tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
+
+	/* Align table size to nearest 4K */
+	tab_size = roundup2(tab_size, 4096);
+
+	pi->pi_msix.table_bar = barnum;
+	pi->pi_msix.pba_bar   = barnum;
+	pi->pi_msix.table_offset = 0;
+	pi->pi_msix.table_count = msgnum;
+	pi->pi_msix.pba_offset = tab_size;
+	pi->pi_msix.pba_size = PBA_SIZE(msgnum);
+
+	pci_msix_table_init(pi, msgnum);
+
+	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
+
+	/* allocate memory for MSI-X Table and PBA */
+	pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
+				tab_size + pi->pi_msix.pba_size);
+
+	return (pci_emul_add_capability(pi, (u_char *)&msixcap,
+					sizeof(msixcap)));
+}
+
+void
+msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		 int bytes, uint32_t val)
+{
+	uint16_t msgctrl, rwmask;
+	int off;
+	
+	off = offset - capoff;
+	/* Message Control Register */
+	if (off == 2 && bytes == 2) {
+		rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
+		msgctrl = pci_get_cfgdata16(pi, offset);
+		msgctrl &= ~rwmask;
+		msgctrl |= val & rwmask;
+		val = msgctrl;
+
+		pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
+		pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
+		pci_lintr_update(pi);
+	} 
+	
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+void
+msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		int bytes, uint32_t val)
+{
+	uint16_t msgctrl, rwmask, msgdata, mme;
+	uint32_t addrlo;
+
+	/*
+	 * If guest is writing to the message control register make sure
+	 * we do not overwrite read-only fields.
+	 */
+	if ((offset - capoff) == 2 && bytes == 2) {
+		rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
+		msgctrl = pci_get_cfgdata16(pi, offset);
+		msgctrl &= ~rwmask;
+		msgctrl |= val & rwmask;
+		val = msgctrl;
+
+		addrlo = pci_get_cfgdata32(pi, capoff + 4);
+		if (msgctrl & PCIM_MSICTRL_64BIT)
+			msgdata = pci_get_cfgdata16(pi, capoff + 12);
+		else
+			msgdata = pci_get_cfgdata16(pi, capoff + 8);
+
+		mme = msgctrl & PCIM_MSICTRL_MME_MASK;
+		pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
+		if (pi->pi_msi.enabled) {
+			pi->pi_msi.addr = addrlo;
+			pi->pi_msi.msg_data = msgdata;
+			pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
+		} else {
+			pi->pi_msi.maxmsgnum = 0;
+		}
+		pci_lintr_update(pi);
+	}
+
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+void
+pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		 int bytes, uint32_t val)
+{
+
+	/* XXX don't write to the readonly parts */
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+#define	PCIECAP_VERSION	0x2
+int
+pci_emul_add_pciecap(struct pci_devinst *pi, int type)
+{
+	int err;
+	struct pciecap pciecap;
+
+	if (type != PCIEM_TYPE_ROOT_PORT)
+		return (-1);
+
+	bzero(&pciecap, sizeof(pciecap));
+
+	pciecap.capid = PCIY_EXPRESS;
+	pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT;
+	pciecap.link_capabilities = 0x411;	/* gen1, x1 */
+	pciecap.link_status = 0x11;		/* gen1, x1 */
+
+	err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
+	return (err);
+}
+
+/*
+ * This function assumes that 'coff' is in the capabilities region of the
+ * config space.
+ */
+static void
+pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
+{
+	int capid;
+	uint8_t capoff, nextoff;
+
+	/* Do not allow un-aligned writes */
+	if ((offset & (bytes - 1)) != 0)
+		return;
+
+	/* Find the capability that we want to update */
+	capoff = CAP_START_OFFSET;
+	while (1) {
+		nextoff = pci_get_cfgdata8(pi, capoff + 1);
+		if (nextoff == 0)
+			break;
+		if (offset >= capoff && offset < nextoff)
+			break;
+
+		capoff = nextoff;
+	}
+	assert(offset >= capoff);
+
+	/*
+	 * Capability ID and Next Capability Pointer are readonly.
+	 * However, some o/s's do 4-byte writes that include these.
+	 * For this case, trim the write back to 2 bytes and adjust
+	 * the data.
+	 */
+	if (offset == capoff || offset == capoff + 1) {
+		if (offset == capoff && bytes == 4) {
+			bytes = 2;
+			offset += 2;
+			val >>= 16;
+		} else
+			return;
+	}
+
+	capid = pci_get_cfgdata8(pi, capoff);
+	switch (capid) {
+	case PCIY_MSI:
+		msicap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	case PCIY_MSIX:
+		msixcap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	case PCIY_EXPRESS:
+		pciecap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+pci_emul_iscap(struct pci_devinst *pi, int offset)
+{
+	uint16_t sts;
+
+	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
+	if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
+		if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
+			return (1);
+	}
+	return (0);
+}
+
+static int
+pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2)
+{
+	/*
+	 * Ignore writes; return 0xff's for reads. The mem read code
+	 * will take care of truncating to the correct size.
+	 */
+	if (dir == MEM_F_READ) {
+		*val = 0xffffffffffffffff;
+	}
+
+	return (0);
+}
+
+static int
+pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+    int bytes, uint64_t *val, void *arg1, long arg2)
+{
+	int bus, slot, func, coff, in;
+
+	coff = addr & 0xfff;
+	func = (addr >> 12) & 0x7;
+	slot = (addr >> 15) & 0x1f;
+	bus = (addr >> 20) & 0xff;
+	in = (dir == MEM_F_READ);
+	if (in)
+		*val = ~0UL;
+	pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val);
+	return (0);
+}
+
+uint64_t
+pci_ecfg_base(void)
+{
+
+	return (PCI_EMUL_ECFG_BASE);
+}
+
+#define	BUSIO_ROUNDUP		32
+#define	BUSMEM_ROUNDUP		(1024 * 1024)
+
+int
+init_pci(struct vmctx *ctx)
+{
+	struct mem_range mr;
+	struct pci_devemu *pde;
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct funcinfo *fi;
+	size_t lowmem;
+	int bus, slot, func;
+	int error;
+
+	pci_emul_iobase = PCI_EMUL_IOBASE;
+	pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+	pci_emul_membase64 = PCI_EMUL_MEMBASE64;
+
+	for (bus = 0; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+		/* 
+		 * Keep track of the i/o and memory resources allocated to
+		 * this bus.
+		 */
+		bi->iobase = pci_emul_iobase;
+		bi->membase32 = pci_emul_membase32;
+		bi->membase64 = pci_emul_membase64;
+
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			for (func = 0; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_name == NULL)
+					continue;
+				pde = pci_emul_finddev(fi->fi_name);
+				assert(pde != NULL);
+				error = pci_emul_init(ctx, pde, bus, slot,
+				    func, fi);
+				if (error)
+					return (error);
+			}
+		}
+
+		/*
+		 * Add some slop to the I/O and memory resources decoded by
+		 * this bus to give a guest some flexibility if it wants to
+		 * reprogram the BARs.
+		 */
+		pci_emul_iobase += BUSIO_ROUNDUP;
+		pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
+		bi->iolimit = pci_emul_iobase;
+
+		pci_emul_membase32 += BUSMEM_ROUNDUP;
+		pci_emul_membase32 = roundup2(pci_emul_membase32,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit32 = pci_emul_membase32;
+
+		pci_emul_membase64 += BUSMEM_ROUNDUP;
+		pci_emul_membase64 = roundup2(pci_emul_membase64,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit64 = pci_emul_membase64;
+	}
+
+	/*
+	 * PCI backends are initialized before routing INTx interrupts
+	 * so that LPC devices are able to reserve ISA IRQs before
+	 * routing PIRQ pins.
+	 */
+	for (bus = 0; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			for (func = 0; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_devi == NULL)
+					continue;
+				pci_lintr_route(fi->fi_devi);
+			}
+		}
+	}
+	lpc_pirq_routed();
+
+	/*
+	 * The guest physical memory map looks like the following:
+	 * [0,		    lowmem)		guest system memory
+	 * [lowmem,	    lowmem_limit)	memory hole (may be absent)
+	 * [lowmem_limit,   0xE0000000)		PCI hole (32-bit BAR allocation)
+	 * [0xE0000000,	    0xF0000000)		PCI extended config window
+	 * [0xF0000000,	    4GB)		LAPIC, IOAPIC, HPET, firmware
+	 * [4GB,	    4GB + highmem)
+	 */
+
+	/*
+	 * Accesses to memory addresses that are not allocated to system
+	 * memory or PCI devices return 0xff's.
+	 */
+	lowmem = vm_get_lowmem_size(ctx);
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = "PCI hole";
+	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
+	mr.base = lowmem;
+	mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
+	mr.handler = pci_emul_fallback_handler;
+	error = register_mem_fallback(&mr);
+	assert(error == 0);
+
+	/* PCI extended config space */
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = "PCI ECFG";
+	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
+	mr.base = PCI_EMUL_ECFG_BASE;
+	mr.size = PCI_EMUL_ECFG_SIZE;
+	mr.handler = pci_emul_ecfg_handler;
+	error = register_mem(&mr);
+	assert(error == 0);
+
+	return (0);
+}
+
+static void
+pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+
+	dsdt_line("  Package ()");
+	dsdt_line("  {");
+	dsdt_line("    0x%X,", slot << 16 | 0xffff);
+	dsdt_line("    0x%02X,", pin - 1);
+	dsdt_line("    Zero,");
+	dsdt_line("    0x%X", ioapic_irq);
+	dsdt_line("  },");
+}
+
+static void
+pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+	char *name;
+
+	name = lpc_pirq_name(pirq_pin);
+	if (name == NULL)
+		return;
+	dsdt_line("  Package ()");
+	dsdt_line("  {");
+	dsdt_line("    0x%X,", slot << 16 | 0xffff);
+	dsdt_line("    0x%02X,", pin - 1);
+	dsdt_line("    %s,", name);
+	dsdt_line("    0x00");
+	dsdt_line("  },");
+	free(name);
+}
+
+/*
+ * A bhyve virtual machine has a flat PCI hierarchy with a root port
+ * corresponding to each PCI bus.
+ */
+static void
+pci_bus_write_dsdt(int bus)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct pci_devinst *pi;
+	int count, func, slot;
+
+	/*
+	 * If there are no devices on this 'bus' then just return.
+	 */
+	if ((bi = pci_businfo[bus]) == NULL) {
+		/*
+		 * Bus 0 is special because it decodes the I/O ports used
+		 * for PCI config space access even if there are no devices
+		 * on it.
+		 */
+		if (bus != 0)
+			return;
+	}
+
+	dsdt_line("  Device (PC%02X)", bus);
+	dsdt_line("  {");
+	dsdt_line("    Name (_HID, EisaId (\"PNP0A03\"))");
+	dsdt_line("    Name (_ADR, Zero)");
+
+	dsdt_line("    Method (_BBN, 0, NotSerialized)");
+	dsdt_line("    {");
+	dsdt_line("        Return (0x%08X)", bus);
+	dsdt_line("    }");
+	dsdt_line("    Name (_CRS, ResourceTemplate ()");
+	dsdt_line("    {");
+	dsdt_line("      WordBusNumber (ResourceProducer, MinFixed, "
+	    "MaxFixed, PosDecode,");
+	dsdt_line("        0x0000,             // Granularity");
+	dsdt_line("        0x%04X,             // Range Minimum", bus);
+	dsdt_line("        0x%04X,             // Range Maximum", bus);
+	dsdt_line("        0x0000,             // Translation Offset");
+	dsdt_line("        0x0001,             // Length");
+	dsdt_line("        ,, )");
+
+	if (bus == 0) {
+		dsdt_indent(3);
+		dsdt_fixed_ioport(0xCF8, 8);
+		dsdt_unindent(3);
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0000,             // Range Minimum");
+		dsdt_line("        0x0CF7,             // Range Maximum");
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x0CF8,             // Length");
+		dsdt_line("        ,, , TypeStatic)");
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0D00,             // Range Minimum");
+		dsdt_line("        0x%04X,             // Range Maximum",
+		    PCI_EMUL_IOBASE - 1);
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x%04X,             // Length",
+		    PCI_EMUL_IOBASE - 0x0D00);
+		dsdt_line("        ,, , TypeStatic)");
+
+		if (bi == NULL) {
+			dsdt_line("    })");
+			goto done;
+		}
+	}
+	assert(bi != NULL);
+
+	/* i/o window */
+	dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+	    "PosDecode, EntireRange,");
+	dsdt_line("        0x0000,             // Granularity");
+	dsdt_line("        0x%04X,             // Range Minimum", bi->iobase);
+	dsdt_line("        0x%04X,             // Range Maximum",
+	    bi->iolimit - 1);
+	dsdt_line("        0x0000,             // Translation Offset");
+	dsdt_line("        0x%04X,             // Length",
+	    bi->iolimit - bi->iobase);
+	dsdt_line("        ,, , TypeStatic)");
+
+	/* mmio window (32-bit) */
+	dsdt_line("      DWordMemory (ResourceProducer, PosDecode, "
+	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
+	dsdt_line("        0x00000000,         // Granularity");
+	dsdt_line("        0x%08X,         // Range Minimum\n", bi->membase32);
+	dsdt_line("        0x%08X,         // Range Maximum\n",
+	    bi->memlimit32 - 1);
+	dsdt_line("        0x00000000,         // Translation Offset");
+	dsdt_line("        0x%08X,         // Length\n",
+	    bi->memlimit32 - bi->membase32);
+	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
+
+	/* mmio window (64-bit) */
+	dsdt_line("      QWordMemory (ResourceProducer, PosDecode, "
+	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
+	dsdt_line("        0x0000000000000000, // Granularity");
+	dsdt_line("        0x%016lX, // Range Minimum\n", bi->membase64);
+	dsdt_line("        0x%016lX, // Range Maximum\n",
+	    bi->memlimit64 - 1);
+	dsdt_line("        0x0000000000000000, // Translation Offset");
+	dsdt_line("        0x%016lX, // Length\n",
+	    bi->memlimit64 - bi->membase64);
+	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
+	dsdt_line("    })");
+
+	count = pci_count_lintr(bus);
+	if (count != 0) {
+		dsdt_indent(2);
+		dsdt_line("Name (PPRT, Package ()");
+		dsdt_line("{");
+		pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
+ 		dsdt_line("})");
+		dsdt_line("Name (APRT, Package ()");
+		dsdt_line("{");
+		pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
+ 		dsdt_line("})");
+		dsdt_line("Method (_PRT, 0, NotSerialized)");
+		dsdt_line("{");
+		dsdt_line("  If (PICM)");
+		dsdt_line("  {");
+		dsdt_line("    Return (APRT)");
+		dsdt_line("  }");
+		dsdt_line("  Else");
+		dsdt_line("  {");
+		dsdt_line("    Return (PPRT)");
+		dsdt_line("  }");
+		dsdt_line("}");
+		dsdt_unindent(2);
+	}
+
+	dsdt_indent(2);
+	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
+		for (func = 0; func < MAXFUNCS; func++) {
+			pi = si->si_funcs[func].fi_devi;
+			if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
+				pi->pi_d->pe_write_dsdt(pi);
+		}
+	}
+	dsdt_unindent(2);
+done:
+	dsdt_line("  }");
+}
+
+void
+pci_write_dsdt(void)
+{
+	int bus;
+
+	dsdt_indent(1);
+	dsdt_line("Name (PICM, 0x00)");
+	dsdt_line("Method (_PIC, 1, NotSerialized)");
+	dsdt_line("{");
+	dsdt_line("  Store (Arg0, PICM)");
+	dsdt_line("}");
+	dsdt_line("");
+	dsdt_line("Scope (_SB)");
+	dsdt_line("{");
+	for (bus = 0; bus < MAXBUSES; bus++)
+		pci_bus_write_dsdt(bus);
+	dsdt_line("}");
+	dsdt_unindent(1);
+}
+
+int
+pci_bus_configured(int bus)
+{
+	assert(bus >= 0 && bus < MAXBUSES);
+	return (pci_businfo[bus] != NULL);
+}
+
+int
+pci_msi_enabled(struct pci_devinst *pi)
+{
+	return (pi->pi_msi.enabled);
+}
+
+int
+pci_msi_maxmsgnum(struct pci_devinst *pi)
+{
+	if (pi->pi_msi.enabled)
+		return (pi->pi_msi.maxmsgnum);
+	else
+		return (0);
+}
+
+int
+pci_msix_enabled(struct pci_devinst *pi)
+{
+
+	return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
+}
+
+void
+pci_generate_msix(struct pci_devinst *pi, int index)
+{
+	struct msix_table_entry *mte;
+
+	if (!pci_msix_enabled(pi))
+		return;
+
+	if (pi->pi_msix.function_mask)
+		return;
+
+	if (index >= pi->pi_msix.table_count)
+		return;
+
+	mte = &pi->pi_msix.table[index];
+	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
+		/* XXX Set PBA bit if interrupt is disabled */
+		vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
+	}
+}
+
+void
+pci_generate_msi(struct pci_devinst *pi, int index)
+{
+
+	if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
+		vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
+			     pi->pi_msi.msg_data + index);
+	}
+}
+
+static bool
+pci_lintr_permitted(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+	return (!(pi->pi_msi.enabled || pi->pi_msix.enabled ||
+		(cmd & PCIM_CMD_INTxDIS)));
+}
+
+void
+pci_lintr_request(struct pci_devinst *pi)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	int bestpin, bestcount, pin;
+
+	bi = pci_businfo[pi->pi_bus];
+	assert(bi != NULL);
+
+	/*
+	 * Just allocate a pin from our slot.  The pin will be
+	 * assigned IRQs later when interrupts are routed.
+	 */
+	si = &bi->slotinfo[pi->pi_slot];
+	bestpin = 0;
+	bestcount = si->si_intpins[0].ii_count;
+	for (pin = 1; pin < 4; pin++) {
+		if (si->si_intpins[pin].ii_count < bestcount) {
+			bestpin = pin;
+			bestcount = si->si_intpins[pin].ii_count;
+		}
+	}
+
+	si->si_intpins[bestpin].ii_count++;
+	pi->pi_lintr.pin = bestpin + 1;
+	pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1);
+}
+
+static void
+pci_lintr_route(struct pci_devinst *pi)
+{
+	struct businfo *bi;
+	struct intxinfo *ii;
+
+	if (pi->pi_lintr.pin == 0)
+		return;
+
+	bi = pci_businfo[pi->pi_bus];
+	assert(bi != NULL);
+	ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1];
+
+	/*
+	 * Attempt to allocate an I/O APIC pin for this intpin if one
+	 * is not yet assigned.
+	 */
+	if (ii->ii_ioapic_irq == 0)
+		ii->ii_ioapic_irq = ioapic_pci_alloc_irq();
+	assert(ii->ii_ioapic_irq > 0);
+
+	/*
+	 * Attempt to allocate a PIRQ pin for this intpin if one is
+	 * not yet assigned.
+	 */
+	if (ii->ii_pirq_pin == 0)
+		ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx);
+	assert(ii->ii_pirq_pin > 0);
+
+	pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq;
+	pi->pi_lintr.pirq_pin = ii->ii_pirq_pin;
+	pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin));
+}
+
+void
+pci_lintr_assert(struct pci_devinst *pi)
+{
+
+	assert(pi->pi_lintr.pin > 0);
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == IDLE) {
+		if (pci_lintr_permitted(pi)) {
+			pi->pi_lintr.state = ASSERTED;
+			pci_irq_assert(pi);
+		} else
+			pi->pi_lintr.state = PENDING;
+	}
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+void
+pci_lintr_deassert(struct pci_devinst *pi)
+{
+
+	assert(pi->pi_lintr.pin > 0);
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == ASSERTED) {
+		pi->pi_lintr.state = IDLE;
+		pci_irq_deassert(pi);
+	} else if (pi->pi_lintr.state == PENDING)
+		pi->pi_lintr.state = IDLE;
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+static void
+pci_lintr_update(struct pci_devinst *pi)
+{
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) {
+		pci_irq_deassert(pi);
+		pi->pi_lintr.state = PENDING;
+	} else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) {
+		pi->pi_lintr.state = ASSERTED;
+		pci_irq_assert(pi);
+	}
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+int
+pci_count_lintr(int bus)
+{
+	int count, slot, pin;
+	struct slotinfo *slotinfo;
+
+	count = 0;
+	if (pci_businfo[bus] != NULL) {
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			slotinfo = &pci_businfo[bus]->slotinfo[slot];
+			for (pin = 0; pin < 4; pin++) {
+				if (slotinfo->si_intpins[pin].ii_count != 0)
+					count++;
+			}
+		}
+	}
+	return (count);
+}
+
+void
+pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct intxinfo *ii;
+	int slot, pin;
+
+	if ((bi = pci_businfo[bus]) == NULL)
+		return;
+
+	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
+		for (pin = 0; pin < 4; pin++) {
+			ii = &si->si_intpins[pin];
+			if (ii->ii_count != 0)
+				cb(bus, slot, pin + 1, ii->ii_pirq_pin,
+				    ii->ii_ioapic_irq, arg);
+		}
+	}
+}
+
+/*
+ * Return 1 if the emulated device in 'slot' is a multi-function device.
+ * Return 0 otherwise.
+ */
+static int
+pci_emul_is_mfdev(int bus, int slot)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	int f, numfuncs;
+
+	numfuncs = 0;
+	if ((bi = pci_businfo[bus]) != NULL) {
+		si = &bi->slotinfo[slot];
+		for (f = 0; f < MAXFUNCS; f++) {
+			if (si->si_funcs[f].fi_devi != NULL) {
+				numfuncs++;
+			}
+		}
+	}
+	return (numfuncs > 1);
+}
+
+/*
+ * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
+ * whether or not is a multi-function being emulated in the pci 'slot'.
+ */
+static void
+pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
+{
+	int mfdev;
+
+	if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
+		mfdev = pci_emul_is_mfdev(bus, slot);
+		switch (bytes) {
+		case 1:
+		case 2:
+			*rv &= ~PCIM_MFDEV;
+			if (mfdev) {
+				*rv |= PCIM_MFDEV;
+			}
+			break;
+		case 4:
+			*rv &= ~(PCIM_MFDEV << 16);
+			if (mfdev) {
+				*rv |= (PCIM_MFDEV << 16);
+			}
+			break;
+		}
+	}
+}
+
+static void
+pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
+{
+	int i, rshift;
+	uint32_t cmd, cmd2, changed, old, readonly;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* stash old value */
+
+	/*
+	 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
+	 *
+	 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
+	 * 'write 1 to clear'. However these bits are not set to '1' by
+	 * any device emulation so it is simpler to treat them as readonly.
+	 */
+	rshift = (coff & 0x3) * 8;
+	readonly = 0xFFFFF880 >> rshift;
+
+	old = CFGREAD(pi, coff, bytes);
+	new &= ~readonly;
+	new |= (old & readonly);
+	CFGWRITE(pi, coff, new, bytes);			/* update config */
+
+	cmd2 = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* get updated value */
+	changed = cmd ^ cmd2;
+
+	/*
+	 * If the MMIO or I/O address space decoding has changed then
+	 * register/unregister all BARs that decode that address space.
+	 */
+	for (i = 0; i <= PCI_BARMAX; i++) {
+		switch (pi->pi_bar[i].type) {
+			case PCIBAR_NONE:
+			case PCIBAR_MEMHI64:
+				break;
+			case PCIBAR_IO:
+				/* I/O address space decoding changed? */
+				if (changed & PCIM_CMD_PORTEN) {
+					if (porten(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break;
+			case PCIBAR_MEM32:
+			case PCIBAR_MEM64:
+				/* MMIO address space decoding changed? */
+				if (changed & PCIM_CMD_MEMEN) {
+					if (memen(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break; 
+			default:
+				assert(0); 
+		}
+	}
+
+	/*
+	 * If INTx has been unmasked and is pending, assert the
+	 * interrupt.
+	 */
+	pci_lintr_update(pi);
+}	
+
+static void
+pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
+    int coff, int bytes, uint32_t *eax)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct pci_devinst *pi;
+	struct pci_devemu *pe;
+	int idx, needcfg;
+	uint64_t addr, bar, mask;
+
+	if ((bi = pci_businfo[bus]) != NULL) {
+		si = &bi->slotinfo[slot];
+		pi = si->si_funcs[func].fi_devi;
+	} else
+		pi = NULL;
+
+	/*
+	 * Just return if there is no device at this slot:func or if the
+	 * the guest is doing an un-aligned access.
+	 */
+	if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) ||
+	    (coff & (bytes - 1)) != 0) {
+		if (in)
+			*eax = 0xffffffff;
+		return;
+	}
+
+	/*
+	 * Ignore all writes beyond the standard config space and return all
+	 * ones on reads.
+	 */
+	if (coff >= PCI_REGMAX + 1) {
+		if (in) {
+			*eax = 0xffffffff;
+			/*
+			 * Extended capabilities begin at offset 256 in config
+			 * space. Absence of extended capabilities is signaled
+			 * with all 0s in the extended capability header at
+			 * offset 256.
+			 */
+			if (coff <= PCI_REGMAX + 4)
+				*eax = 0x00000000;
+		}
+		return;
+	}
+
+	pe = pi->pi_d;
+
+	/*
+	 * Config read
+	 */
+	if (in) {
+		/* Let the device emulation override the default handler */
+		if (pe->pe_cfgread != NULL) {
+			needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes,
+			    eax);
+		} else {
+			needcfg = 1;
+		}
+
+		if (needcfg)
+			*eax = CFGREAD(pi, coff, bytes);
+
+		pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
+	} else {
+		/* Let the device emulation override the default handler */
+		if (pe->pe_cfgwrite != NULL &&
+		    (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0)
+			return;
+
+		/*
+		 * Special handling for write to BAR registers
+		 */
+		if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+			/*
+			 * Ignore writes to BAR registers that are not
+			 * 4-byte aligned.
+			 */
+			if (bytes != 4 || (coff & 0x3) != 0)
+				return;
+			idx = (coff - PCIR_BAR(0)) / 4;
+			mask = ~(pi->pi_bar[idx].size - 1);
+			switch (pi->pi_bar[idx].type) {
+			case PCIBAR_NONE:
+				pi->pi_bar[idx].addr = bar = 0;
+				break;
+			case PCIBAR_IO:
+				addr = *eax & mask;
+				addr &= 0xffff;
+				bar = addr | PCIM_BAR_IO_SPACE;
+				/*
+				 * Register the new BAR value for interception
+				 */
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_IO);
+				}
+				break;
+			case PCIBAR_MEM32:
+				addr = bar = *eax & mask;
+				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM32);
+				}
+				break;
+			case PCIBAR_MEM64:
+				addr = bar = *eax & mask;
+				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
+				       PCIM_BAR_MEM_PREFETCH;
+				if (addr != (uint32_t)pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM64);
+				}
+				break;
+			case PCIBAR_MEMHI64:
+				mask = ~(pi->pi_bar[idx - 1].size - 1);
+				addr = ((uint64_t)*eax << 32) & mask;
+				bar = addr >> 32;
+				if (bar != pi->pi_bar[idx - 1].addr >> 32) {
+					update_bar_address(pi, addr, idx - 1,
+							   PCIBAR_MEMHI64);
+				}
+				break;
+			default:
+				assert(0);
+			}
+			pci_set_cfgdata32(pi, coff, bar);
+
+		} else if (pci_emul_iscap(pi, coff)) {
+			pci_emul_capwrite(pi, coff, bytes, *eax);
+		} else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
+			pci_emul_cmdsts_write(pi, coff, *eax, bytes);
+		} else {
+			CFGWRITE(pi, coff, *eax, bytes);
+		}
+	}
+}
+
+static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
+
+static int
+pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	uint32_t x;
+
+	if (bytes != 4) {
+		if (in)
+			*eax = (bytes == 2) ? 0xffff : 0xff;
+		return (0);
+	}
+
+	if (in) {
+		x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff;
+		if (cfgenable)
+			x |= CONF1_ENABLE;
+		*eax = x;
+	} else {
+		x = *eax;
+		cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
+		cfgoff = x & PCI_REGMAX;
+		cfgfunc = (x >> 8) & PCI_FUNCMAX;
+		cfgslot = (x >> 11) & PCI_SLOTMAX;
+		cfgbus = (x >> 16) & PCI_BUSMAX;
+	}
+
+	return (0);
+}
+INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
+
+static int
+pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	int coff;
+
+	assert(bytes == 1 || bytes == 2 || bytes == 4);
+
+	coff = cfgoff + (port - CONF1_DATA_PORT);
+	if (cfgenable) {
+		pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes,
+		    eax);
+	} else {
+		/* Ignore accesses to cfgdata if not enabled by cfgaddr */
+		if (in)
+			*eax = 0xffffffff;
+	}
+	return (0);
+}
+
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
+
+#define PCI_EMUL_TEST
+#ifdef PCI_EMUL_TEST
+/*
+ * Define a dummy test device
+ */
+#define DIOSZ	8
+#define DMEMSZ	4096
+struct pci_emul_dsoftc {
+	uint8_t   ioregs[DIOSZ];
+	uint8_t	  memregs[2][DMEMSZ];
+};
+
+#define	PCI_EMUL_MSI_MSGS	 4
+#define	PCI_EMUL_MSIX_MSGS	16
+
+static int
+pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	int error;
+	struct pci_emul_dsoftc *sc;
+
+	sc = calloc(1, sizeof(struct pci_emul_dsoftc));
+
+	pi->pi_arg = sc;
+
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
+	pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
+
+	error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
+	assert(error == 0);
+
+	error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
+	assert(error == 0);
+
+	error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
+	assert(error == 0);
+
+	error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
+	assert(error == 0);
+
+	return (0);
+}
+
+static void
+pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	      uint64_t offset, int size, uint64_t value)
+{
+	int i;
+	struct pci_emul_dsoftc *sc = pi->pi_arg;
+
+	if (baridx == 0) {
+		if (offset + size > DIOSZ) {
+			printf("diow: iow too large, offset %ld size %d\n",
+			       offset, size);
+			return;
+		}
+
+		if (size == 1) {
+			sc->ioregs[offset] = value & 0xff;
+		} else if (size == 2) {
+			*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
+		} else if (size == 4) {
+			*(uint32_t *)&sc->ioregs[offset] = value;
+		} else {
+			printf("diow: iow unknown size %d\n", size);
+		}
+
+		/*
+		 * Special magic value to generate an interrupt
+		 */
+		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
+			pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
+
+		if (value == 0xabcdef) {
+			for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
+				pci_generate_msi(pi, i);
+		}
+	}
+
+	if (baridx == 1 || baridx == 2) {
+		if (offset + size > DMEMSZ) {
+			printf("diow: memw too large, offset %ld size %d\n",
+			       offset, size);
+			return;
+		}
+
+		i = baridx - 1;		/* 'memregs' index */
+
+		if (size == 1) {
+			sc->memregs[i][offset] = value;
+		} else if (size == 2) {
+			*(uint16_t *)&sc->memregs[i][offset] = value;
+		} else if (size == 4) {
+			*(uint32_t *)&sc->memregs[i][offset] = value;
+		} else if (size == 8) {
+			*(uint64_t *)&sc->memregs[i][offset] = value;
+		} else {
+			printf("diow: memw unknown size %d\n", size);
+		}
+		
+		/*
+		 * magic interrupt ??
+		 */
+	}
+
+	if (baridx > 2) {
+		printf("diow: unknown bar idx %d\n", baridx);
+	}
+}
+
+static uint64_t
+pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	      uint64_t offset, int size)
+{
+	struct pci_emul_dsoftc *sc = pi->pi_arg;
+	uint32_t value;
+	int i;
+
+	if (baridx == 0) {
+		if (offset + size > DIOSZ) {
+			printf("dior: ior too large, offset %ld size %d\n",
+			       offset, size);
+			return (0);
+		}
+	
+		if (size == 1) {
+			value = sc->ioregs[offset];
+		} else if (size == 2) {
+			value = *(uint16_t *) &sc->ioregs[offset];
+		} else if (size == 4) {
+			value = *(uint32_t *) &sc->ioregs[offset];
+		} else {
+			printf("dior: ior unknown size %d\n", size);
+		}
+	}
+
+	if (baridx == 1 || baridx == 2) {
+		if (offset + size > DMEMSZ) {
+			printf("dior: memr too large, offset %ld size %d\n",
+			       offset, size);
+			return (0);
+		}
+		
+		i = baridx - 1;		/* 'memregs' index */
+
+		if (size == 1) {
+			value = sc->memregs[i][offset];
+		} else if (size == 2) {
+			value = *(uint16_t *) &sc->memregs[i][offset];
+		} else if (size == 4) {
+			value = *(uint32_t *) &sc->memregs[i][offset];
+		} else if (size == 8) {
+			value = *(uint64_t *) &sc->memregs[i][offset];
+		} else {
+			printf("dior: ior unknown size %d\n", size);
+		}
+	}
+
+
+	if (baridx > 2) {
+		printf("dior: unknown bar idx %d\n", baridx);
+		return (0);
+	}
+
+	return (value);
+}
+
+struct pci_devemu pci_dummy = {
+	.pe_emu = "dummy",
+	.pe_init = pci_emul_dinit,
+	.pe_barwrite = pci_emul_diow,
+	.pe_barread = pci_emul_dior
+};
+PCI_EMUL_SET(pci_dummy);
+
+#endif /* PCI_EMUL_TEST */


Property changes on: trunk/usr.sbin/bhyve/pci_emul.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_emul.h
===================================================================
--- trunk/usr.sbin/bhyve/pci_emul.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_emul.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,289 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.h 304421 2016-08-18 11:45:46Z mav $
+ */
+
+#ifndef _PCI_EMUL_H_
+#define _PCI_EMUL_H_
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/_pthreadtypes.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <assert.h>
+
+#define	PCI_BARMAX	PCIR_MAX_BAR_0	/* BAR registers in a Type 0 header */
+
+struct vmctx;
+struct pci_devinst;
+struct memory_region;
+
+struct pci_devemu {
+	char      *pe_emu;		/* Name of device emulation */
+
+	/* instance creation */
+	int       (*pe_init)(struct vmctx *, struct pci_devinst *,
+			     char *opts);
+
+	/* ACPI DSDT enumeration */
+	void	(*pe_write_dsdt)(struct pci_devinst *);
+
+	/* config space read/write callbacks */
+	int	(*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
+			       struct pci_devinst *pi, int offset,
+			       int bytes, uint32_t val);
+	int	(*pe_cfgread)(struct vmctx *ctx, int vcpu,
+			      struct pci_devinst *pi, int offset,
+			      int bytes, uint32_t *retval);
+
+	/* BAR read/write callbacks */
+	void      (*pe_barwrite)(struct vmctx *ctx, int vcpu,
+				 struct pci_devinst *pi, int baridx,
+				 uint64_t offset, int size, uint64_t value);
+	uint64_t  (*pe_barread)(struct vmctx *ctx, int vcpu,
+				struct pci_devinst *pi, int baridx,
+				uint64_t offset, int size);
+};
+#define PCI_EMUL_SET(x)   DATA_SET(pci_devemu_set, x);
+
+enum pcibar_type {
+	PCIBAR_NONE,
+	PCIBAR_IO,
+	PCIBAR_MEM32,
+	PCIBAR_MEM64,
+	PCIBAR_MEMHI64
+};
+
+struct pcibar {
+	enum pcibar_type	type;		/* io or memory */
+	uint64_t		size;
+	uint64_t		addr;
+};
+
+#define PI_NAMESZ	40
+
+struct msix_table_entry {
+	uint64_t	addr;
+	uint32_t	msg_data;
+	uint32_t	vector_control;
+} __packed;
+
+/* 
+ * In case the structure is modified to hold extra information, use a define
+ * for the size that should be emulated.
+ */
+#define	MSIX_TABLE_ENTRY_SIZE	16
+#define MAX_MSIX_TABLE_ENTRIES	2048
+#define	PBA_SIZE(msgnum)	(roundup2((msgnum), 64) / 8)
+
+enum lintr_stat {
+	IDLE,
+	ASSERTED,
+	PENDING
+};
+
+struct pci_devinst {
+	struct pci_devemu *pi_d;
+	struct vmctx *pi_vmctx;
+	uint8_t	  pi_bus, pi_slot, pi_func;
+	char	  pi_name[PI_NAMESZ];
+	int	  pi_bar_getsize;
+	int	  pi_prevcap;
+	int	  pi_capend;
+
+	struct {
+		int8_t    	pin;
+		enum lintr_stat	state;
+		int		pirq_pin;
+		int	  	ioapic_irq;
+		pthread_mutex_t	lock;
+	} pi_lintr;
+
+	struct {
+		int		enabled;
+		uint64_t	addr;
+		uint64_t	msg_data;
+		int		maxmsgnum;
+	} pi_msi;
+
+	struct {
+		int	enabled;
+		int	table_bar;
+		int	pba_bar;
+		uint32_t table_offset;
+		int	table_count;
+		uint32_t pba_offset;
+		int	pba_size;
+		int	function_mask; 	
+		struct msix_table_entry *table;	/* allocated at runtime */
+		void	*pba_page;
+		int	pba_page_offset;
+	} pi_msix;
+
+	void      *pi_arg;		/* devemu-private data */
+
+	u_char	  pi_cfgdata[PCI_REGMAX + 1];
+	struct pcibar pi_bar[PCI_BARMAX + 1];
+};
+
+struct msicap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	msgctrl;
+	uint32_t	addrlo;
+	uint32_t	addrhi;
+	uint16_t	msgdata;
+} __packed;
+static_assert(sizeof(struct msicap) == 14, "compile-time assertion failed");
+
+struct msixcap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	msgctrl;
+	uint32_t	table_info;	/* bar index and offset within it */
+	uint32_t	pba_info;	/* bar index and offset within it */
+} __packed;
+static_assert(sizeof(struct msixcap) == 12, "compile-time assertion failed");
+
+struct pciecap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	pcie_capabilities;
+
+	uint32_t	dev_capabilities;	/* all devices */
+	uint16_t	dev_control;
+	uint16_t	dev_status;
+
+	uint32_t	link_capabilities;	/* devices with links */
+	uint16_t	link_control;
+	uint16_t	link_status;
+
+	uint32_t	slot_capabilities;	/* ports with slots */
+	uint16_t	slot_control;
+	uint16_t	slot_status;
+
+	uint16_t	root_control;		/* root ports */
+	uint16_t	root_capabilities;
+	uint32_t	root_status;
+
+	uint32_t	dev_capabilities2;	/* all devices */
+	uint16_t	dev_control2;
+	uint16_t	dev_status2;
+
+	uint32_t	link_capabilities2;	/* devices with links */
+	uint16_t	link_control2;
+	uint16_t	link_status2;
+
+	uint32_t	slot_capabilities2;	/* ports with slots */
+	uint16_t	slot_control2;
+	uint16_t	slot_status2;
+} __packed;
+static_assert(sizeof(struct pciecap) == 60, "compile-time assertion failed");
+
+typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
+    int ioapic_irq, void *arg);
+
+int	init_pci(struct vmctx *ctx);
+void	msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+	    int bytes, uint32_t val);
+void	msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+	    int bytes, uint32_t val);
+void	pci_callback(void);
+int	pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
+	    enum pcibar_type type, uint64_t size);
+int	pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx,
+	    uint64_t hostbase, enum pcibar_type type, uint64_t size);
+int	pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
+int	pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
+void	pci_generate_msi(struct pci_devinst *pi, int msgnum);
+void	pci_generate_msix(struct pci_devinst *pi, int msgnum);
+void	pci_lintr_assert(struct pci_devinst *pi);
+void	pci_lintr_deassert(struct pci_devinst *pi);
+void	pci_lintr_request(struct pci_devinst *pi);
+int	pci_msi_enabled(struct pci_devinst *pi);
+int	pci_msix_enabled(struct pci_devinst *pi);
+int	pci_msix_table_bar(struct pci_devinst *pi);
+int	pci_msix_pba_bar(struct pci_devinst *pi);
+int	pci_msi_maxmsgnum(struct pci_devinst *pi);
+int	pci_parse_slot(char *opt);
+void	pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
+int	pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum);
+int	pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
+			     uint64_t value);
+uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size);
+int	pci_count_lintr(int bus);
+void	pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg);
+void	pci_write_dsdt(void);
+uint64_t pci_ecfg_base(void);
+int	pci_bus_configured(int bus);
+
+static __inline void 
+pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
+{
+	assert(offset <= PCI_REGMAX);
+	*(uint8_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline void 
+pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
+{
+	assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
+	*(uint16_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline void 
+pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
+{
+	assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
+	*(uint32_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline uint8_t
+pci_get_cfgdata8(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= PCI_REGMAX);
+	return (*(uint8_t *)(pi->pi_cfgdata + offset));
+}
+
+static __inline uint16_t
+pci_get_cfgdata16(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
+	return (*(uint16_t *)(pi->pi_cfgdata + offset));
+}
+
+static __inline uint32_t
+pci_get_cfgdata32(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
+	return (*(uint32_t *)(pi->pi_cfgdata + offset));
+}
+
+#endif /* _PCI_EMUL_H_ */


Property changes on: trunk/usr.sbin/bhyve/pci_emul.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_hostbridge.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_hostbridge.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_hostbridge.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,71 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_hostbridge.c 284900 2015-06-28 03:22:26Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_hostbridge.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include "pci_emul.h"
+
+static int
+pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	/* config space */
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275);	/* NetApp */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275);	/* NetApp */
+	pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
+
+	pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_PORT);
+
+	return (0);
+}
+
+static int
+pci_amd_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	(void) pci_hostbridge_init(ctx, pi, opts);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1022);	/* AMD */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x7432);	/* made up */
+
+	return (0);
+}
+
+struct pci_devemu pci_de_amd_hostbridge = {
+	.pe_emu = "amd_hostbridge",
+	.pe_init = pci_amd_hostbridge_init,
+};
+PCI_EMUL_SET(pci_de_amd_hostbridge);
+
+struct pci_devemu pci_de_hostbridge = {
+	.pe_emu = "hostbridge",
+	.pe_init = pci_hostbridge_init,
+};
+PCI_EMUL_SET(pci_de_hostbridge);


Property changes on: trunk/usr.sbin/bhyve/pci_hostbridge.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_irq.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_irq.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_irq.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,347 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_irq.c 283927 2015-06-02 19:20:39Z jhb $");
+
+#include <sys/param.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+/*
+ * Implement an 8 pin PCI interrupt router compatible with the router
+ * present on Intel's ICH10 chip.
+ */
+
+/* Fields in each PIRQ register. */
+#define	PIRQ_DIS	0x80
+#define	PIRQ_IRQ	0x0f
+
+/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */
+#define	PERMITTED_IRQS	0xdef8
+#define	IRQ_PERMITTED(irq)	(((1U << (irq)) & PERMITTED_IRQS) != 0)
+
+/* IRQ count to disable an IRQ. */
+#define	IRQ_DISABLED	0xff
+
+static struct pirq {
+	uint8_t	reg;
+	int	use_count;
+	int	active_count;
+	pthread_mutex_t lock;
+} pirqs[8];
+
+static u_char irq_counts[16];
+static int pirq_cold = 1;
+
+/*
+ * Returns true if this pin is enabled with a valid IRQ.  Setting the
+ * register to a reserved IRQ causes interrupts to not be asserted as
+ * if the pin was disabled.
+ */
+static bool
+pirq_valid_irq(int reg)
+{
+
+	if (reg & PIRQ_DIS)
+		return (false);
+	return (IRQ_PERMITTED(reg & PIRQ_IRQ));
+}
+
+uint8_t
+pirq_read(int pin)
+{
+
+	assert(pin > 0 && pin <= nitems(pirqs));
+	return (pirqs[pin - 1].reg);
+}
+
+void
+pirq_write(struct vmctx *ctx, int pin, uint8_t val)
+{
+	struct pirq *pirq;
+
+	assert(pin > 0 && pin <= nitems(pirqs));
+	pirq = &pirqs[pin - 1];
+	pthread_mutex_lock(&pirq->lock);
+	if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) {
+		if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
+			vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
+		pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ);
+		if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
+			vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
+	}
+	pthread_mutex_unlock(&pirq->lock);
+}
+
+void
+pci_irq_reserve(int irq)
+{
+
+	assert(irq >= 0 && irq < nitems(irq_counts));
+	assert(pirq_cold);
+	assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED);
+	irq_counts[irq] = IRQ_DISABLED;
+}
+
+void
+pci_irq_use(int irq)
+{
+
+	assert(irq >= 0 && irq < nitems(irq_counts));
+	assert(pirq_cold);
+	assert(irq_counts[irq] != IRQ_DISABLED);
+	irq_counts[irq]++;
+}
+
+void
+pci_irq_init(struct vmctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nitems(pirqs); i++) {
+		pirqs[i].reg = PIRQ_DIS;
+		pirqs[i].use_count = 0;
+		pirqs[i].active_count = 0;
+		pthread_mutex_init(&pirqs[i].lock, NULL);
+	}
+	for (i = 0; i < nitems(irq_counts); i++) {
+		if (IRQ_PERMITTED(i))
+			irq_counts[i] = 0;
+		else
+			irq_counts[i] = IRQ_DISABLED;
+	}
+}
+
+void
+pci_irq_assert(struct pci_devinst *pi)
+{
+	struct pirq *pirq;
+
+	if (pi->pi_lintr.pirq_pin > 0) {
+		assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
+		pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
+		pthread_mutex_lock(&pirq->lock);
+		pirq->active_count++;
+		if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) {
+			vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
+			    pi->pi_lintr.ioapic_irq);
+			pthread_mutex_unlock(&pirq->lock);
+			return;
+		}
+		pthread_mutex_unlock(&pirq->lock);
+	}
+	vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
+}
+
+void
+pci_irq_deassert(struct pci_devinst *pi)
+{
+	struct pirq *pirq;
+
+	if (pi->pi_lintr.pirq_pin > 0) {
+		assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
+		pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
+		pthread_mutex_lock(&pirq->lock);
+		pirq->active_count--;
+		if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) {
+			vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
+			    pi->pi_lintr.ioapic_irq);
+			pthread_mutex_unlock(&pirq->lock);
+			return;
+		}
+		pthread_mutex_unlock(&pirq->lock);
+	}
+	vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
+}
+
+int
+pirq_alloc_pin(struct vmctx *ctx)
+{
+	int best_count, best_irq, best_pin, irq, pin;
+
+	pirq_cold = 0;
+
+	/* First, find the least-used PIRQ pin. */
+	best_pin = 0;
+	best_count = pirqs[0].use_count;
+	for (pin = 1; pin < nitems(pirqs); pin++) {
+		if (pirqs[pin].use_count < best_count) {
+			best_pin = pin;
+			best_count = pirqs[pin].use_count;
+		}
+	}
+	pirqs[best_pin].use_count++;
+
+	/* Second, route this pin to an IRQ. */
+	if (pirqs[best_pin].reg == PIRQ_DIS) {
+		best_irq = -1;
+		best_count = 0;
+		for (irq = 0; irq < nitems(irq_counts); irq++) {
+			if (irq_counts[irq] == IRQ_DISABLED)
+				continue;
+			if (best_irq == -1 || irq_counts[irq] < best_count) {
+				best_irq = irq;
+				best_count = irq_counts[irq];
+			}
+		}
+		assert(best_irq >= 0);
+		irq_counts[best_irq]++;
+		pirqs[best_pin].reg = best_irq;
+		vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER);
+	}
+
+	return (best_pin + 1);
+}
+
+int
+pirq_irq(int pin)
+{
+	assert(pin > 0 && pin <= nitems(pirqs));
+	return (pirqs[pin - 1].reg & PIRQ_IRQ);
+}
+
+/* XXX: Generate $PIR table. */
+
+static void
+pirq_dsdt(void)
+{
+	char *irq_prs, *old;
+	int irq, pin;
+
+	irq_prs = NULL;
+	for (irq = 0; irq < nitems(irq_counts); irq++) {
+		if (!IRQ_PERMITTED(irq))
+			continue;
+		if (irq_prs == NULL)
+			asprintf(&irq_prs, "%d", irq);
+		else {
+			old = irq_prs;
+			asprintf(&irq_prs, "%s,%d", old, irq);
+			free(old);
+		}
+	}
+
+	/*
+	 * A helper method to validate a link register's value.  This
+	 * duplicates pirq_valid_irq().
+	 */
+	dsdt_line("");
+	dsdt_line("Method (PIRV, 1, NotSerialized)");
+	dsdt_line("{");
+	dsdt_line("  If (And (Arg0, 0x%02X))", PIRQ_DIS);
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  And (Arg0, 0x%02X, Local0)", PIRQ_IRQ);
+	dsdt_line("  If (LLess (Local0, 0x03))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  If (LEqual (Local0, 0x08))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  If (LEqual (Local0, 0x0D))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  Return (0x01)");
+	dsdt_line("}");
+
+	for (pin = 0; pin < nitems(pirqs); pin++) {
+		dsdt_line("");
+		dsdt_line("Device (LNK%c)", 'A' + pin);
+		dsdt_line("{");
+		dsdt_line("  Name (_HID, EisaId (\"PNP0C0F\"))");
+		dsdt_line("  Name (_UID, 0x%02X)", pin + 1);
+		dsdt_line("  Method (_STA, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    If (PIRV (PIR%c))", 'A' + pin);
+		dsdt_line("    {");
+		dsdt_line("       Return (0x0B)");
+		dsdt_line("    }");
+		dsdt_line("    Else");
+		dsdt_line("    {");
+		dsdt_line("       Return (0x09)");
+		dsdt_line("    }");
+		dsdt_line("  }");
+		dsdt_line("  Name (_PRS, ResourceTemplate ()");
+		dsdt_line("  {");
+		dsdt_line("    IRQ (Level, ActiveLow, Shared, )");
+		dsdt_line("      {%s}", irq_prs);
+		dsdt_line("  })");
+		dsdt_line("  Name (CB%02X, ResourceTemplate ()", pin + 1);
+		dsdt_line("  {");
+		dsdt_line("    IRQ (Level, ActiveLow, Shared, )");
+		dsdt_line("      {}");
+		dsdt_line("  })");
+		dsdt_line("  CreateWordField (CB%02X, 0x01, CIR%c)",
+		    pin + 1, 'A' + pin);
+		dsdt_line("  Method (_CRS, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    And (PIR%c, 0x%02X, Local0)", 'A' + pin,
+		    PIRQ_DIS | PIRQ_IRQ);
+		dsdt_line("    If (PIRV (Local0))");
+		dsdt_line("    {");
+		dsdt_line("      ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin);
+		dsdt_line("    }");
+		dsdt_line("    Else");
+		dsdt_line("    {");
+		dsdt_line("      Store (0x00, CIR%c)", 'A' + pin);
+		dsdt_line("    }");
+		dsdt_line("    Return (CB%02X)", pin + 1);
+		dsdt_line("  }");
+		dsdt_line("  Method (_DIS, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    Store (0x80, PIR%c)", 'A' + pin);
+		dsdt_line("  }");
+		dsdt_line("  Method (_SRS, 1, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin);
+		dsdt_line("    FindSetRightBit (SIR%c, Local0)", 'A' + pin);
+		dsdt_line("    Store (Decrement (Local0), PIR%c)", 'A' + pin);
+		dsdt_line("  }");
+		dsdt_line("}");
+	}
+	free(irq_prs);
+}
+LPC_DSDT(pirq_dsdt);


Property changes on: trunk/usr.sbin/bhyve/pci_irq.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_irq.h
===================================================================
--- trunk/usr.sbin/bhyve/pci_irq.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_irq.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,46 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_irq.h 283927 2015-06-02 19:20:39Z jhb $
+ */
+
+#ifndef __PCI_IRQ_H__
+#define	__PCI_IRQ_H__
+
+struct pci_devinst;
+
+void	pci_irq_assert(struct pci_devinst *pi);
+void	pci_irq_deassert(struct pci_devinst *pi);
+void	pci_irq_init(struct vmctx *ctx);
+void	pci_irq_reserve(int irq);
+void	pci_irq_use(int irq);
+int	pirq_alloc_pin(struct vmctx *ctx);
+int	pirq_irq(int pin);
+uint8_t	pirq_read(int pin);
+void	pirq_write(struct vmctx *ctx, int pin, uint8_t val);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/pci_irq.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_lpc.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_lpc.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_lpc.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,451 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Neel Natu <neel at freebsd.org>
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale at pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_lpc.c 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_lpc.c 295124 2016-02-01 14:56:11Z grehan $");
+
+#include <sys/types.h>
+#include <machine/vmm.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "bootrom.h"
+#include "inout.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "uart_emul.h"
+
+#define	IO_ICU1		0x20
+#define	IO_ICU2		0xA0
+
+SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt);
+SET_DECLARE(lpc_sysres_set, struct lpc_sysres);
+
+#define	ELCR_PORT	0x4d0
+SYSRES_IO(ELCR_PORT, 2);
+
+#define	IO_TIMER1_PORT	0x40
+
+#define	NMISC_PORT	0x61
+SYSRES_IO(NMISC_PORT, 1);
+
+static struct pci_devinst *lpc_bridge;
+
+static const char *romfile;
+
+#define	LPC_UART_NUM	2
+static struct lpc_uart_softc {
+	struct uart_softc *uart_softc;
+	const char *opts;
+	int	iobase;
+	int	irq;
+	int	enabled;
+} lpc_uart_softc[LPC_UART_NUM];
+
+static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
+
+/*
+ * LPC device configuration is in the following form:
+ * <lpc_device_name>[,<options>]
+ * For e.g. "com1,stdio" or "bootrom,/var/romfile"
+ */
+int
+lpc_device_parse(const char *opts)
+{
+	int unit, error;
+	char *str, *cpy, *lpcdev;
+
+	error = -1;
+	str = cpy = strdup(opts);
+	lpcdev = strsep(&str, ",");
+	if (lpcdev != NULL) {
+		if (strcasecmp(lpcdev, "bootrom") == 0) {
+			romfile = str;
+			error = 0;
+			goto done;
+		}
+		for (unit = 0; unit < LPC_UART_NUM; unit++) {
+			if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
+				lpc_uart_softc[unit].opts = str;
+				error = 0;
+				goto done;
+			}
+		}
+	}
+
+done:
+	if (error)
+		free(cpy);
+
+	return (error);
+}
+
+const char *
+lpc_bootrom(void)
+{
+
+	return (romfile);
+}
+
+static void
+lpc_uart_intr_assert(void *arg)
+{
+	struct lpc_uart_softc *sc = arg;
+
+	assert(sc->irq >= 0);
+
+	vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq);
+}
+
+static void
+lpc_uart_intr_deassert(void *arg)
+{
+	/* 
+	 * The COM devices on the LPC bus generate edge triggered interrupts,
+	 * so nothing more to do here.
+	 */
+}
+
+static int
+lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		    uint32_t *eax, void *arg)
+{
+	int offset;
+	struct lpc_uart_softc *sc = arg;
+
+	offset = port - sc->iobase;
+
+	switch (bytes) {
+	case 1:
+		if (in)
+			*eax = uart_read(sc->uart_softc, offset);
+		else
+			uart_write(sc->uart_softc, offset, *eax);
+		break;
+	case 2:
+		if (in) {
+			*eax = uart_read(sc->uart_softc, offset);
+			*eax |= uart_read(sc->uart_softc, offset + 1) << 8;
+		} else {
+			uart_write(sc->uart_softc, offset, *eax);
+			uart_write(sc->uart_softc, offset + 1, *eax >> 8);
+		}
+		break;
+	default:
+		return (-1);
+	}
+
+	return (0);
+}
+
+static int
+lpc_init(struct vmctx *ctx)
+{
+	struct lpc_uart_softc *sc;
+	struct inout_port iop;
+	const char *name;
+	int unit, error;
+
+	if (romfile != NULL) {
+		error = bootrom_init(ctx, romfile);
+		if (error)
+			return (error);
+	}
+
+	/* COM1 and COM2 */
+	for (unit = 0; unit < LPC_UART_NUM; unit++) {
+		sc = &lpc_uart_softc[unit];
+		name = lpc_uart_names[unit];
+
+		if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) {
+			fprintf(stderr, "Unable to allocate resources for "
+			    "LPC device %s\n", name);
+			return (-1);
+		}
+		pci_irq_reserve(sc->irq);
+
+		sc->uart_softc = uart_init(lpc_uart_intr_assert,
+				    lpc_uart_intr_deassert, sc);
+
+		if (uart_set_backend(sc->uart_softc, sc->opts) != 0) {
+			fprintf(stderr, "Unable to initialize backend '%s' "
+			    "for LPC device %s\n", sc->opts, name);
+			return (-1);
+		}
+
+		bzero(&iop, sizeof(struct inout_port));
+		iop.name = name;
+		iop.port = sc->iobase;
+		iop.size = UART_IO_BAR_SIZE;
+		iop.flags = IOPORT_F_INOUT;
+		iop.handler = lpc_uart_io_handler;
+		iop.arg = sc;
+
+		error = register_inout(&iop);
+		assert(error == 0);
+		sc->enabled = 1;
+	}
+
+	return (0);
+}
+
+static void
+pci_lpc_write_dsdt(struct pci_devinst *pi)
+{
+	struct lpc_dsdt **ldpp, *ldp;
+
+	dsdt_line("");
+	dsdt_line("Device (ISA)");
+	dsdt_line("{");
+	dsdt_line("  Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func);
+	dsdt_line("  OperationRegion (LPCR, PCI_Config, 0x00, 0x100)");
+	dsdt_line("  Field (LPCR, AnyAcc, NoLock, Preserve)");
+	dsdt_line("  {");
+	dsdt_line("    Offset (0x60),");
+	dsdt_line("    PIRA,   8,");
+	dsdt_line("    PIRB,   8,");
+	dsdt_line("    PIRC,   8,");
+	dsdt_line("    PIRD,   8,");
+	dsdt_line("    Offset (0x68),");
+	dsdt_line("    PIRE,   8,");
+	dsdt_line("    PIRF,   8,");
+	dsdt_line("    PIRG,   8,");
+	dsdt_line("    PIRH,   8");
+	dsdt_line("  }");
+	dsdt_line("");
+
+	dsdt_indent(1);
+	SET_FOREACH(ldpp, lpc_dsdt_set) {
+		ldp = *ldpp;
+		ldp->handler();
+	}
+
+	dsdt_line("");
+	dsdt_line("Device (PIC)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0000\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_ICU1, 2);
+	dsdt_fixed_ioport(IO_ICU2, 2);
+	dsdt_fixed_irq(2);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+
+	dsdt_line("");
+	dsdt_line("Device (TIMR)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0100\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_TIMER1_PORT, 4);
+	dsdt_fixed_irq(0);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+	dsdt_unindent(1);
+
+	dsdt_line("}");
+}
+
+static void
+pci_lpc_sysres_dsdt(void)
+{
+	struct lpc_sysres **lspp, *lsp;
+
+	dsdt_line("");
+	dsdt_line("Device (SIO)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0C02\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+
+	dsdt_indent(2);
+	SET_FOREACH(lspp, lpc_sysres_set) {
+		lsp = *lspp;
+		switch (lsp->type) {
+		case LPC_SYSRES_IO:
+			dsdt_fixed_ioport(lsp->base, lsp->length);
+			break;
+		case LPC_SYSRES_MEM:
+			dsdt_fixed_mem32(lsp->base, lsp->length);
+			break;
+		}
+	}
+	dsdt_unindent(2);
+
+	dsdt_line("  })");
+	dsdt_line("}");
+}
+LPC_DSDT(pci_lpc_sysres_dsdt);
+
+static void
+pci_lpc_uart_dsdt(void)
+{
+	struct lpc_uart_softc *sc;
+	int unit;
+
+	for (unit = 0; unit < LPC_UART_NUM; unit++) {
+		sc = &lpc_uart_softc[unit];
+		if (!sc->enabled)
+			continue;
+		dsdt_line("");
+		dsdt_line("Device (%s)", lpc_uart_names[unit]);
+		dsdt_line("{");
+		dsdt_line("  Name (_HID, EisaId (\"PNP0501\"))");
+		dsdt_line("  Name (_UID, %d)", unit + 1);
+		dsdt_line("  Name (_CRS, ResourceTemplate ()");
+		dsdt_line("  {");
+		dsdt_indent(2);
+		dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE);
+		dsdt_fixed_irq(sc->irq);
+		dsdt_unindent(2);
+		dsdt_line("  })");
+		dsdt_line("}");
+	}
+}
+LPC_DSDT(pci_lpc_uart_dsdt);
+
+static int
+pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		  int coff, int bytes, uint32_t val)
+{
+	int pirq_pin;
+
+	if (bytes == 1) {
+		pirq_pin = 0;
+		if (coff >= 0x60 && coff <= 0x63)
+			pirq_pin = coff - 0x60 + 1;
+		if (coff >= 0x68 && coff <= 0x6b)
+			pirq_pin = coff - 0x68 + 5;
+		if (pirq_pin != 0) {
+			pirq_write(ctx, pirq_pin, val);
+			pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin));
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+static void
+pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	       int baridx, uint64_t offset, int size, uint64_t value)
+{
+}
+
+static uint64_t
+pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	      int baridx, uint64_t offset, int size)
+{
+	return (0);
+}
+
+#define	LPC_DEV		0x7000
+#define	LPC_VENDOR	0x8086
+
+static int
+pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	/*
+	 * Do not allow more than one LPC bridge to be configured.
+	 */
+	if (lpc_bridge != NULL) {
+		fprintf(stderr, "Only one LPC bridge is allowed.\n");
+		return (-1);
+	}
+
+	/*
+	 * Enforce that the LPC can only be configured on bus 0. This
+	 * simplifies the ACPI DSDT because it can provide a decode for
+	 * all legacy i/o ports behind bus 0.
+	 */
+	if (pi->pi_bus != 0) {
+		fprintf(stderr, "LPC bridge can be present only on bus 0.\n");
+		return (-1);
+	}
+
+	if (lpc_init(ctx) != 0)
+		return (-1);
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+
+	lpc_bridge = pi;
+
+	return (0);
+}
+
+char *
+lpc_pirq_name(int pin)
+{
+	char *name;
+
+	if (lpc_bridge == NULL)
+		return (NULL);
+	asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1);
+	return (name);
+}
+
+void
+lpc_pirq_routed(void)
+{
+	int pin;
+
+	if (lpc_bridge == NULL)
+		return;
+
+ 	for (pin = 0; pin < 4; pin++)
+		pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1));
+	for (pin = 0; pin < 4; pin++)
+		pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
+}
+
+struct pci_devemu pci_de_lpc = {
+	.pe_emu =	"lpc",
+	.pe_init =	pci_lpc_init,
+	.pe_write_dsdt = pci_lpc_write_dsdt,
+	.pe_cfgwrite =	pci_lpc_cfgwrite,
+	.pe_barwrite =	pci_lpc_write,
+	.pe_barread =	pci_lpc_read
+};
+PCI_EMUL_SET(pci_de_lpc);


Property changes on: trunk/usr.sbin/bhyve/pci_lpc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_lpc.h
===================================================================
--- trunk/usr.sbin/bhyve/pci_lpc.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_lpc.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,74 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_lpc.h 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+#ifndef _LPC_H_
+#define	_LPC_H_
+
+#include <sys/linker_set.h>
+
+typedef void (*lpc_write_dsdt_t)(void);
+
+struct lpc_dsdt {
+	lpc_write_dsdt_t handler;
+};
+
+#define	LPC_DSDT(handler)						\
+	static struct lpc_dsdt __CONCAT(__lpc_dsdt, __LINE__) = {	\
+		(handler),						\
+	};								\
+	DATA_SET(lpc_dsdt_set, __CONCAT(__lpc_dsdt, __LINE__))
+
+enum lpc_sysres_type {
+	LPC_SYSRES_IO,
+	LPC_SYSRES_MEM
+};
+
+struct lpc_sysres {
+	enum lpc_sysres_type type;
+	uint32_t base;
+	uint32_t length;
+};
+
+#define	LPC_SYSRES(type, base, length)					\
+	static struct lpc_sysres __CONCAT(__lpc_sysres, __LINE__) = {	\
+		(type),							\
+		(base),							\
+		(length)						\
+	};								\
+	DATA_SET(lpc_sysres_set, __CONCAT(__lpc_sysres, __LINE__))
+
+#define	SYSRES_IO(base, length)		LPC_SYSRES(LPC_SYSRES_IO, base, length)
+#define	SYSRES_MEM(base, length)	LPC_SYSRES(LPC_SYSRES_MEM, base, length)
+
+int	lpc_device_parse(const char *opt);
+char	*lpc_pirq_name(int pin);
+void	lpc_pirq_routed(void);
+const char *lpc_bootrom(void);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/pci_lpc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_passthru.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_passthru.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_passthru.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,898 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 302705 2016-07-13 06:09:34Z ngie $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 302705 2016-07-13 06:09:34Z ngie $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/pciio.h>
+#include <sys/ioctl.h>
+
+#include <dev/io/iodev.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/iodev.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+#include "pci_emul.h"
+#include "mem.h"
+
+#ifndef _PATH_DEVPCI
+#define	_PATH_DEVPCI	"/dev/pci"
+#endif
+
+#ifndef	_PATH_DEVIO
+#define	_PATH_DEVIO	"/dev/io"
+#endif
+
+#ifndef _PATH_MEM
+#define	_PATH_MEM	"/dev/mem"
+#endif
+
+#define	LEGACY_SUPPORT	1
+
+#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
+#define MSIX_CAPLEN 12
+
+static int pcifd = -1;
+static int iofd = -1;
+static int memfd = -1;
+
+struct passthru_softc {
+	struct pci_devinst *psc_pi;
+	struct pcibar psc_bar[PCI_BARMAX + 1];
+	struct {
+		int		capoff;
+		int		msgctrl;
+		int		emulated;
+	} psc_msi;
+	struct {
+		int		capoff;
+	} psc_msix;
+	struct pcisel psc_sel;
+};
+
+static int
+msi_caplen(int msgctrl)
+{
+	int len;
+	
+	len = 10;		/* minimum length of msi capability */
+
+	if (msgctrl & PCIM_MSICTRL_64BIT)
+		len += 4;
+
+#if 0
+	/*
+	 * Ignore the 'mask' and 'pending' bits in the MSI capability.
+	 * We'll let the guest manipulate them directly.
+	 */
+	if (msgctrl & PCIM_MSICTRL_VECTOR)
+		len += 10;
+#endif
+
+	return (len);
+}
+
+static uint32_t
+read_config(const struct pcisel *sel, long reg, int width)
+{
+	struct pci_io pi;
+
+	bzero(&pi, sizeof(pi));
+	pi.pi_sel = *sel;
+	pi.pi_reg = reg;
+	pi.pi_width = width;
+
+	if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+		return (0);				/* XXX */
+	else
+		return (pi.pi_data);
+}
+
+static void
+write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
+{
+	struct pci_io pi;
+
+	bzero(&pi, sizeof(pi));
+	pi.pi_sel = *sel;
+	pi.pi_reg = reg;
+	pi.pi_width = width;
+	pi.pi_data = data;
+
+	(void)ioctl(pcifd, PCIOCWRITE, &pi);		/* XXX */
+}
+
+#ifdef LEGACY_SUPPORT
+static int
+passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
+{
+	int capoff, i;
+	struct msicap msicap;
+	u_char *capdata;
+
+	pci_populate_msicap(&msicap, msgnum, nextptr);
+
+	/*
+	 * XXX
+	 * Copy the msi capability structure in the last 16 bytes of the
+	 * config space. This is wrong because it could shadow something
+	 * useful to the device.
+	 */
+	capoff = 256 - roundup(sizeof(msicap), 4);
+	capdata = (u_char *)&msicap;
+	for (i = 0; i < sizeof(msicap); i++)
+		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
+
+	return (capoff);
+}
+#endif	/* LEGACY_SUPPORT */
+
+static int
+cfginitmsi(struct passthru_softc *sc)
+{
+	int i, ptr, capptr, cap, sts, caplen, table_size;
+	uint32_t u32;
+	struct pcisel sel;
+	struct pci_devinst *pi;
+	struct msixcap msixcap;
+	uint32_t *msixcap_ptr;
+
+	pi = sc->psc_pi;
+	sel = sc->psc_sel;
+
+	/*
+	 * Parse the capabilities and cache the location of the MSI
+	 * and MSI-X capabilities.
+	 */
+	sts = read_config(&sel, PCIR_STATUS, 2);
+	if (sts & PCIM_STATUS_CAPPRESENT) {
+		ptr = read_config(&sel, PCIR_CAP_PTR, 1);
+		while (ptr != 0 && ptr != 0xff) {
+			cap = read_config(&sel, ptr + PCICAP_ID, 1);
+			if (cap == PCIY_MSI) {
+				/*
+				 * Copy the MSI capability into the config
+				 * space of the emulated pci device
+				 */
+				sc->psc_msi.capoff = ptr;
+				sc->psc_msi.msgctrl = read_config(&sel,
+								  ptr + 2, 2);
+				sc->psc_msi.emulated = 0;
+				caplen = msi_caplen(sc->psc_msi.msgctrl);
+				capptr = ptr;
+				while (caplen > 0) {
+					u32 = read_config(&sel, capptr, 4);
+					pci_set_cfgdata32(pi, capptr, u32);
+					caplen -= 4;
+					capptr += 4;
+				}
+			} else if (cap == PCIY_MSIX) {
+				/*
+				 * Copy the MSI-X capability 
+				 */
+				sc->psc_msix.capoff = ptr;
+				caplen = 12;
+				msixcap_ptr = (uint32_t*) &msixcap;
+				capptr = ptr;
+				while (caplen > 0) {
+					u32 = read_config(&sel, capptr, 4);
+					*msixcap_ptr = u32;
+					pci_set_cfgdata32(pi, capptr, u32);
+					caplen -= 4;
+					capptr += 4;
+					msixcap_ptr++;
+				}
+			}
+			ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
+		}
+	}
+
+	if (sc->psc_msix.capoff != 0) {
+		pi->pi_msix.pba_bar =
+		    msixcap.pba_info & PCIM_MSIX_BIR_MASK;
+		pi->pi_msix.pba_offset =
+		    msixcap.pba_info & ~PCIM_MSIX_BIR_MASK;
+		pi->pi_msix.table_bar =
+		    msixcap.table_info & PCIM_MSIX_BIR_MASK;
+		pi->pi_msix.table_offset =
+		    msixcap.table_info & ~PCIM_MSIX_BIR_MASK;
+		pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
+		pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count);
+
+		/* Allocate the emulated MSI-X table array */
+		table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
+		pi->pi_msix.table = calloc(1, table_size);
+
+		/* Mask all table entries */
+		for (i = 0; i < pi->pi_msix.table_count; i++) {
+			pi->pi_msix.table[i].vector_control |=
+						PCIM_MSIX_VCTRL_MASK;
+		}
+	}
+
+#ifdef LEGACY_SUPPORT
+	/*
+	 * If the passthrough device does not support MSI then craft a
+	 * MSI capability for it. We link the new MSI capability at the
+	 * head of the list of capabilities.
+	 */
+	if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
+		int origptr, msiptr;
+		origptr = read_config(&sel, PCIR_CAP_PTR, 1);
+		msiptr = passthru_add_msicap(pi, 1, origptr);
+		sc->psc_msi.capoff = msiptr;
+		sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
+		sc->psc_msi.emulated = 1;
+		pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
+	}
+#endif
+
+	/* Make sure one of the capabilities is present */
+	if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 
+		return (-1);
+	else
+		return (0);
+}
+
+static uint64_t
+msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
+{
+	struct pci_devinst *pi;
+	struct msix_table_entry *entry;
+	uint8_t *src8;
+	uint16_t *src16;
+	uint32_t *src32;
+	uint64_t *src64;
+	uint64_t data;
+	size_t entry_offset;
+	int index;
+
+	pi = sc->psc_pi;
+	if (offset >= pi->pi_msix.pba_offset &&
+	    offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
+		switch(size) {
+		case 1:
+			src8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			data = *src8;
+			break;
+		case 2:
+			src16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			data = *src16;
+			break;
+		case 4:
+			src32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			data = *src32;
+			break;
+		case 8:
+			src64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			data = *src64;
+			break;
+		default:
+			return (-1);
+		}
+		return (data);
+	}
+
+	if (offset < pi->pi_msix.table_offset)
+		return (-1);
+
+	offset -= pi->pi_msix.table_offset;
+	index = offset / MSIX_TABLE_ENTRY_SIZE;
+	if (index >= pi->pi_msix.table_count)
+		return (-1);
+
+	entry = &pi->pi_msix.table[index];
+	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	switch(size) {
+	case 1:
+		src8 = (uint8_t *)((void *)entry + entry_offset);
+		data = *src8;
+		break;
+	case 2:
+		src16 = (uint16_t *)((void *)entry + entry_offset);
+		data = *src16;
+		break;
+	case 4:
+		src32 = (uint32_t *)((void *)entry + entry_offset);
+		data = *src32;
+		break;
+	case 8:
+		src64 = (uint64_t *)((void *)entry + entry_offset);
+		data = *src64;
+		break;
+	default:
+		return (-1);
+	}
+
+	return (data);
+}
+
+static void
+msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
+		 uint64_t offset, int size, uint64_t data)
+{
+	struct pci_devinst *pi;
+	struct msix_table_entry *entry;
+	uint8_t *dest8;
+	uint16_t *dest16;
+	uint32_t *dest32;
+	uint64_t *dest64;
+	size_t entry_offset;
+	uint32_t vector_control;
+	int index;
+
+	pi = sc->psc_pi;
+	if (offset >= pi->pi_msix.pba_offset &&
+	    offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
+		switch(size) {
+		case 1:
+			dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			*dest8 = data;
+			break;
+		case 2:
+			dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			*dest16 = data;
+			break;
+		case 4:
+			dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			*dest32 = data;
+			break;
+		case 8:
+			dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
+			    pi->pi_msix.pba_page_offset);
+			*dest64 = data;
+			break;
+		default:
+			break;
+		}
+		return;
+	}
+
+	if (offset < pi->pi_msix.table_offset)
+		return;
+
+	offset -= pi->pi_msix.table_offset;
+	index = offset / MSIX_TABLE_ENTRY_SIZE;
+	if (index >= pi->pi_msix.table_count)
+		return;
+
+	entry = &pi->pi_msix.table[index];
+	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	/* Only 4 byte naturally-aligned writes are supported */
+	assert(size == 4);
+	assert(entry_offset % 4 == 0);
+
+	vector_control = entry->vector_control;
+	dest32 = (uint32_t *)((void *)entry + entry_offset);
+	*dest32 = data;
+	/* If MSI-X hasn't been enabled, do nothing */
+	if (pi->pi_msix.enabled) {
+		/* If the entry is masked, don't set it up */
+		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
+		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
+			(void)vm_setup_pptdev_msix(ctx, vcpu,
+			    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
+			    sc->psc_sel.pc_func, index, entry->addr,
+			    entry->msg_data, entry->vector_control);
+		}
+	}
+}
+
+static int
+init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
+{
+	int b, s, f;
+	int error, idx;
+	size_t len, remaining;
+	uint32_t table_size, table_offset;
+	uint32_t pba_size, pba_offset;
+	vm_paddr_t start;
+	struct pci_devinst *pi = sc->psc_pi;
+
+	assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
+
+	b = sc->psc_sel.pc_bus;
+	s = sc->psc_sel.pc_dev;
+	f = sc->psc_sel.pc_func;
+
+	/* 
+	 * If the MSI-X table BAR maps memory intended for
+	 * other uses, it is at least assured that the table 
+	 * either resides in its own page within the region, 
+	 * or it resides in a page shared with only the PBA.
+	 */
+	table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
+
+	table_size = pi->pi_msix.table_offset - table_offset;
+	table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
+	table_size = roundup2(table_size, 4096);
+
+	idx = pi->pi_msix.table_bar;
+	start = pi->pi_bar[idx].addr;
+	remaining = pi->pi_bar[idx].size;
+
+	if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) {
+		pba_offset = pi->pi_msix.pba_offset;
+		pba_size = pi->pi_msix.pba_size;
+		if (pba_offset >= table_offset + table_size ||
+		    table_offset >= pba_offset + pba_size) {
+			/*
+			 * If the PBA does not share a page with the MSI-x
+			 * tables, no PBA emulation is required.
+			 */
+			pi->pi_msix.pba_page = NULL;
+			pi->pi_msix.pba_page_offset = 0;
+		} else {
+			/*
+			 * The PBA overlaps with either the first or last
+			 * page of the MSI-X table region.  Map the
+			 * appropriate page.
+			 */
+			if (pba_offset <= table_offset)
+				pi->pi_msix.pba_page_offset = table_offset;
+			else
+				pi->pi_msix.pba_page_offset = table_offset +
+				    table_size - 4096;
+			pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ |
+			    PROT_WRITE, MAP_SHARED, memfd, start +
+			    pi->pi_msix.pba_page_offset);
+			if (pi->pi_msix.pba_page == MAP_FAILED) {
+				warn(
+			    "Failed to map PBA page for MSI-X on %d/%d/%d",
+				    b, s, f);
+				return (-1);
+			}
+		}
+	}
+
+	/* Map everything before the MSI-X table */
+	if (table_offset > 0) {
+		len = table_offset;
+		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
+		if (error)
+			return (error);
+
+		base += len;
+		start += len;
+		remaining -= len;
+	}
+
+	/* Skip the MSI-X table */
+	base += table_size;
+	start += table_size;
+	remaining -= table_size;
+
+	/* Map everything beyond the end of the MSI-X table */
+	if (remaining > 0) {
+		len = remaining;
+		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+static int
+cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
+{
+	int i, error;
+	struct pci_devinst *pi;
+	struct pci_bar_io bar;
+	enum pcibar_type bartype;
+	uint64_t base, size;
+
+	pi = sc->psc_pi;
+
+	/*
+	 * Initialize BAR registers
+	 */
+	for (i = 0; i <= PCI_BARMAX; i++) {
+		bzero(&bar, sizeof(bar));
+		bar.pbi_sel = sc->psc_sel;
+		bar.pbi_reg = PCIR_BAR(i);
+
+		if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
+			continue;
+
+		if (PCI_BAR_IO(bar.pbi_base)) {
+			bartype = PCIBAR_IO;
+			base = bar.pbi_base & PCIM_BAR_IO_BASE;
+		} else {
+			switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
+			case PCIM_BAR_MEM_64:
+				bartype = PCIBAR_MEM64;
+				break;
+			default:
+				bartype = PCIBAR_MEM32;
+				break;
+			}
+			base = bar.pbi_base & PCIM_BAR_MEM_BASE;
+		}
+		size = bar.pbi_length;
+
+		if (bartype != PCIBAR_IO) {
+			if (((base | size) & PAGE_MASK) != 0) {
+				warnx("passthru device %d/%d/%d BAR %d: "
+				    "base %#lx or size %#lx not page aligned\n",
+				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
+				    sc->psc_sel.pc_func, i, base, size);
+				return (-1);
+			}
+		}
+
+		/* Cache information about the "real" BAR */
+		sc->psc_bar[i].type = bartype;
+		sc->psc_bar[i].size = size;
+		sc->psc_bar[i].addr = base;
+
+		/* Allocate the BAR in the guest I/O or MMIO space */
+		error = pci_emul_alloc_pbar(pi, i, base, bartype, size);
+		if (error)
+			return (-1);
+
+		/* The MSI-X table needs special handling */
+		if (i == pci_msix_table_bar(pi)) {
+			error = init_msix_table(ctx, sc, base);
+			if (error) 
+				return (-1);
+		} else if (bartype != PCIBAR_IO) {
+			/* Map the physical BAR in the guest MMIO space */
+			error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
+				sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
+				pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
+			if (error)
+				return (-1);
+		}
+
+		/*
+		 * 64-bit BAR takes up two slots so skip the next one.
+		 */
+		if (bartype == PCIBAR_MEM64) {
+			i++;
+			assert(i <= PCI_BARMAX);
+			sc->psc_bar[i].type = PCIBAR_MEMHI64;
+		}
+	}
+	return (0);
+}
+
+static int
+cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
+{
+	int error;
+	struct passthru_softc *sc;
+
+	error = 1;
+	sc = pi->pi_arg;
+
+	bzero(&sc->psc_sel, sizeof(struct pcisel));
+	sc->psc_sel.pc_bus = bus;
+	sc->psc_sel.pc_dev = slot;
+	sc->psc_sel.pc_func = func;
+
+	if (cfginitmsi(sc) != 0) {
+		warnx("failed to initialize MSI for PCI %d/%d/%d",
+		    bus, slot, func);
+		goto done;
+	}
+
+	if (cfginitbar(ctx, sc) != 0) {
+		warnx("failed to initialize BARs for PCI %d/%d/%d",
+		    bus, slot, func);
+		goto done;
+	}
+
+	error = 0;				/* success */
+done:
+	return (error);
+}
+
+static int
+passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	int bus, slot, func, error, memflags;
+	struct passthru_softc *sc;
+
+	sc = NULL;
+	error = 1;
+
+	memflags = vm_get_memflags(ctx);
+	if (!(memflags & VM_MEM_F_WIRED)) {
+		warnx("passthru requires guest memory to be wired");
+		goto done;
+	}
+
+	if (pcifd < 0) {
+		pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+		if (pcifd < 0) {
+			warn("failed to open %s", _PATH_DEVPCI);
+			goto done;
+		}
+	}
+
+	if (iofd < 0) {
+		iofd = open(_PATH_DEVIO, O_RDWR, 0);
+		if (iofd < 0) {
+			warn("failed to open %s", _PATH_DEVIO);
+			goto done;
+		}
+	}
+
+	if (memfd < 0) {
+		memfd = open(_PATH_MEM, O_RDWR, 0);
+		if (memfd < 0) {
+			warn("failed to open %s", _PATH_MEM);
+			goto done;
+		}
+	}
+
+	if (opts == NULL ||
+	    sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) {
+		warnx("invalid passthru options");
+		goto done;
+	}
+
+	if (vm_assign_pptdev(ctx, bus, slot, func) != 0) {
+		warnx("PCI device at %d/%d/%d is not using the ppt(4) driver",
+		    bus, slot, func);
+		goto done;
+	}
+
+	sc = calloc(1, sizeof(struct passthru_softc));
+
+	pi->pi_arg = sc;
+	sc->psc_pi = pi;
+
+	/* initialize config space */
+	if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+		goto done;
+	
+	error = 0;		/* success */
+done:
+	if (error) {
+		free(sc);
+		vm_unassign_pptdev(ctx, bus, slot, func);
+	}
+	return (error);
+}
+
+static int
+bar_access(int coff)
+{
+	if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
+		return (1);
+	else
+		return (0);
+}
+
+static int
+msicap_access(struct passthru_softc *sc, int coff)
+{
+	int caplen;
+
+	if (sc->psc_msi.capoff == 0)
+		return (0);
+
+	caplen = msi_caplen(sc->psc_msi.msgctrl);
+
+	if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
+		return (1);
+	else
+		return (0);
+}
+
+static int 
+msixcap_access(struct passthru_softc *sc, int coff)
+{
+	if (sc->psc_msix.capoff == 0) 
+		return (0);
+
+	return (coff >= sc->psc_msix.capoff && 
+	        coff < sc->psc_msix.capoff + MSIX_CAPLEN);
+}
+
+static int
+passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		 int coff, int bytes, uint32_t *rv)
+{
+	struct passthru_softc *sc;
+
+	sc = pi->pi_arg;
+
+	/*
+	 * PCI BARs and MSI capability is emulated.
+	 */
+	if (bar_access(coff) || msicap_access(sc, coff))
+		return (-1);
+
+#ifdef LEGACY_SUPPORT
+	/*
+	 * Emulate PCIR_CAP_PTR if this device does not support MSI capability
+	 * natively.
+	 */
+	if (sc->psc_msi.emulated) {
+		if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
+			return (-1);
+	}
+#endif
+
+	/* Everything else just read from the device's config space */
+	*rv = read_config(&sc->psc_sel, coff, bytes);
+
+	return (0);
+}
+
+static int
+passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		  int coff, int bytes, uint32_t val)
+{
+	int error, msix_table_entries, i;
+	struct passthru_softc *sc;
+
+	sc = pi->pi_arg;
+
+	/*
+	 * PCI BARs are emulated
+	 */
+	if (bar_access(coff))
+		return (-1);
+
+	/*
+	 * MSI capability is emulated
+	 */
+	if (msicap_access(sc, coff)) {
+		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
+
+		error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
+			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
+			pi->pi_msi.addr, pi->pi_msi.msg_data,
+			pi->pi_msi.maxmsgnum);
+		if (error != 0)
+			err(1, "vm_setup_pptdev_msi");
+		return (0);
+	}
+
+	if (msixcap_access(sc, coff)) {
+		msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
+		if (pi->pi_msix.enabled) {
+			msix_table_entries = pi->pi_msix.table_count;
+			for (i = 0; i < msix_table_entries; i++) {
+				error = vm_setup_pptdev_msix(ctx, vcpu,
+				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 
+				    sc->psc_sel.pc_func, i, 
+				    pi->pi_msix.table[i].addr,
+				    pi->pi_msix.table[i].msg_data,
+				    pi->pi_msix.table[i].vector_control);
+		
+				if (error)
+					err(1, "vm_setup_pptdev_msix");
+			}
+		}
+		return (0);
+	}
+
+#ifdef LEGACY_SUPPORT
+	/*
+	 * If this device does not support MSI natively then we cannot let
+	 * the guest disable legacy interrupts from the device. It is the
+	 * legacy interrupt that is triggering the virtual MSI to the guest.
+	 */
+	if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
+		if (coff == PCIR_COMMAND && bytes == 2)
+			val &= ~PCIM_CMD_INTxDIS;
+	}
+#endif
+
+	write_config(&sc->psc_sel, coff, bytes, val);
+
+	return (0);
+}
+
+static void
+passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	       uint64_t offset, int size, uint64_t value)
+{
+	struct passthru_softc *sc;
+	struct iodev_pio_req pio;
+
+	sc = pi->pi_arg;
+
+	if (baridx == pci_msix_table_bar(pi)) {
+		msix_table_write(ctx, vcpu, sc, offset, size, value);
+	} else {
+		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
+		bzero(&pio, sizeof(struct iodev_pio_req));
+		pio.access = IODEV_PIO_WRITE;
+		pio.port = sc->psc_bar[baridx].addr + offset;
+		pio.width = size;
+		pio.val = value;
+		
+		(void)ioctl(iofd, IODEV_PIO, &pio);
+	}
+}
+
+static uint64_t
+passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	      uint64_t offset, int size)
+{
+	struct passthru_softc *sc;
+	struct iodev_pio_req pio;
+	uint64_t val;
+
+	sc = pi->pi_arg;
+
+	if (baridx == pci_msix_table_bar(pi)) {
+		val = msix_table_read(sc, offset, size);
+	} else {
+		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
+		bzero(&pio, sizeof(struct iodev_pio_req));
+		pio.access = IODEV_PIO_READ;
+		pio.port = sc->psc_bar[baridx].addr + offset;
+		pio.width = size;
+		pio.val = 0;
+
+		(void)ioctl(iofd, IODEV_PIO, &pio);
+
+		val = pio.val;
+	}
+
+	return (val);
+}
+
+struct pci_devemu passthru = {
+	.pe_emu		= "passthru",
+	.pe_init	= passthru_init,
+	.pe_cfgwrite	= passthru_cfgwrite,
+	.pe_cfgread	= passthru_cfgread,
+	.pe_barwrite 	= passthru_write,
+	.pe_barread    	= passthru_read,
+};
+PCI_EMUL_SET(passthru);


Property changes on: trunk/usr.sbin/bhyve/pci_passthru.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_uart.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_uart.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_uart.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,120 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_uart.c 267393 2014-06-12 13:13:15Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_uart.c 267393 2014-06-12 13:13:15Z jhb $");
+
+#include <sys/types.h>
+
+#include <stdio.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "uart_emul.h"
+
+/*
+ * Pick a PCI vid/did of a chip with a single uart at
+ * BAR0, that most versions of FreeBSD can understand:
+ * Siig CyberSerial 1-port.
+ */
+#define COM_VENDOR	0x131f
+#define COM_DEV		0x2000
+
+static void
+pci_uart_intr_assert(void *arg)
+{
+	struct pci_devinst *pi = arg;
+
+	pci_lintr_assert(pi);
+}
+
+static void
+pci_uart_intr_deassert(void *arg)
+{
+	struct pci_devinst *pi = arg;
+
+	pci_lintr_deassert(pi);
+}
+
+static void
+pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	       int baridx, uint64_t offset, int size, uint64_t value)
+{
+
+	assert(baridx == 0);
+	assert(size == 1);
+
+	uart_write(pi->pi_arg, offset, value);
+}
+
+uint64_t
+pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	      int baridx, uint64_t offset, int size)
+{
+	uint8_t val;
+
+	assert(baridx == 0);
+	assert(size == 1);
+
+	val = uart_read(pi->pi_arg, offset);
+	return (val);
+}
+
+static int
+pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	struct uart_softc *sc;
+
+	pci_emul_alloc_bar(pi, 0, PCIBAR_IO, UART_IO_BAR_SIZE);
+	pci_lintr_request(pi);
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM);
+
+	sc = uart_init(pci_uart_intr_assert, pci_uart_intr_deassert, pi);
+	pi->pi_arg = sc;
+
+	if (uart_set_backend(sc, opts) != 0) {
+		fprintf(stderr, "Unable to initialize backend '%s' for "
+		    "pci uart at %d:%d\n", opts, pi->pi_slot, pi->pi_func);
+		return (-1);
+	}
+
+	return (0);
+}
+
+struct pci_devemu pci_de_com = {
+	.pe_emu =	"uart",
+	.pe_init =	pci_uart_init,
+	.pe_barwrite =	pci_uart_write,
+	.pe_barread =	pci_uart_read
+};
+PCI_EMUL_SET(pci_de_com);


Property changes on: trunk/usr.sbin/bhyve/pci_uart.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_virtio_block.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_virtio_block.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_virtio_block.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,411 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 284900 2015-06-28 03:22:26Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <md5.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+#include "block_if.h"
+
+#define VTBLK_RINGSZ	64
+
+#define VTBLK_S_OK	0
+#define VTBLK_S_IOERR	1
+#define	VTBLK_S_UNSUPP	2
+
+#define	VTBLK_BLK_ID_BYTES	20
+
+/* Capability bits */
+#define	VTBLK_F_SEG_MAX		(1 << 2)	/* Maximum request segments */
+#define	VTBLK_F_BLK_SIZE	(1 << 6)	/* cfg block size valid */
+#define	VTBLK_F_FLUSH		(1 << 9)	/* Cache flush support */
+#define	VTBLK_F_TOPOLOGY	(1 << 10)	/* Optimal I/O alignment */
+
+/*
+ * Host capabilities
+ */
+#define VTBLK_S_HOSTCAPS      \
+  ( VTBLK_F_SEG_MAX  |						    \
+    VTBLK_F_BLK_SIZE |						    \
+    VTBLK_F_FLUSH    |						    \
+    VTBLK_F_TOPOLOGY |						    \
+    VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
+
+/*
+ * Config space "registers"
+ */
+struct vtblk_config {
+	uint64_t	vbc_capacity;
+	uint32_t	vbc_size_max;
+	uint32_t	vbc_seg_max;
+	struct {
+		uint16_t cylinders;
+		uint8_t heads;
+		uint8_t sectors;
+	} vbc_geometry;
+	uint32_t	vbc_blk_size;
+	struct {
+		uint8_t physical_block_exp;
+		uint8_t alignment_offset;
+		uint16_t min_io_size;
+		uint32_t opt_io_size;
+	} vbc_topology;
+	uint8_t		vbc_writeback;
+} __packed;
+
+/*
+ * Fixed-size block header
+ */
+struct virtio_blk_hdr {
+#define	VBH_OP_READ		0
+#define	VBH_OP_WRITE		1
+#define	VBH_OP_FLUSH		4
+#define	VBH_OP_FLUSH_OUT	5
+#define	VBH_OP_IDENT		8		
+#define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
+	uint32_t       	vbh_type;
+	uint32_t	vbh_ioprio;
+	uint64_t	vbh_sector;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtblk_debug;
+#define DPRINTF(params) if (pci_vtblk_debug) printf params
+#define WPRINTF(params) printf params
+
+struct pci_vtblk_ioreq {
+	struct blockif_req		io_req;
+	struct pci_vtblk_softc	       *io_sc;
+	uint8_t			       *io_status;
+	uint16_t			io_idx;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtblk_softc {
+	struct virtio_softc vbsc_vs;
+	pthread_mutex_t vsc_mtx;
+	struct vqueue_info vbsc_vq;
+	struct vtblk_config vbsc_cfg;
+	struct blockif_ctxt *bc;
+	char vbsc_ident[VTBLK_BLK_ID_BYTES];
+	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
+};
+
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+	"vtblk",		/* our name */
+	1,			/* we support 1 virtqueue */
+	sizeof(struct vtblk_config), /* config reg size */
+	pci_vtblk_reset,	/* reset */
+	pci_vtblk_notify,	/* device-wide qnotify */
+	pci_vtblk_cfgread,	/* read PCI config */
+	pci_vtblk_cfgwrite,	/* write PCI config */
+	NULL,			/* apply negotiated features */
+	VTBLK_S_HOSTCAPS,	/* our capabilities */
+};
+
+static void
+pci_vtblk_reset(void *vsc)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	DPRINTF(("vtblk: device reset requested !\n"));
+	vi_reset_dev(&sc->vbsc_vs);
+}
+
+static void
+pci_vtblk_done(struct blockif_req *br, int err)
+{
+	struct pci_vtblk_ioreq *io = br->br_param;
+	struct pci_vtblk_softc *sc = io->io_sc;
+
+	/* convert errno into a virtio block error return */
+	if (err == EOPNOTSUPP || err == ENOSYS)
+		*io->io_status = VTBLK_S_UNSUPP;
+	else if (err != 0)
+		*io->io_status = VTBLK_S_IOERR;
+	else
+		*io->io_status = VTBLK_S_OK;
+
+	/*
+	 * Return the descriptor back to the host.
+	 * We wrote 1 byte (our status) to host.
+	 */
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
+	vq_endchains(&sc->vbsc_vq, 0);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+}
+
+static void
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
+{
+	struct virtio_blk_hdr *vbh;
+	struct pci_vtblk_ioreq *io;
+	int i, n;
+	int err;
+	ssize_t iolen;
+	int writeop, type;
+	off_t offset;
+	struct iovec iov[BLOCKIF_IOV_MAX + 2];
+	uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
+
+	n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
+
+	/*
+	 * The first descriptor will be the read-only fixed header,
+	 * and the last is for status (hence +2 above and below).
+	 * The remaining iov's are the actual data I/O vectors.
+	 *
+	 * XXX - note - this fails on crash dump, which does a
+	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
+	 */
+	assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
+
+	io = &sc->vbsc_ios[idx];
+	assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+	vbh = iov[0].iov_base;
+	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
+	io->io_req.br_iovcnt = n - 2;
+	io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
+	io->io_status = iov[--n].iov_base;
+	assert(iov[n].iov_len == 1);
+	assert(flags[n] & VRING_DESC_F_WRITE);
+
+	/*
+	 * XXX
+	 * The guest should not be setting the BARRIER flag because
+	 * we don't advertise the capability.
+	 */
+	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
+	writeop = (type == VBH_OP_WRITE);
+
+	iolen = 0;
+	for (i = 1; i < n; i++) {
+		/*
+		 * - write op implies read-only descriptor,
+		 * - read/ident op implies write-only descriptor,
+		 * therefore test the inverse of the descriptor bit
+		 * to the op.
+		 */
+		assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+		iolen += iov[i].iov_len;
+	}
+	io->io_req.br_resid = iolen;
+
+	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", 
+		 writeop ? "write" : "read/ident", iolen, i - 1, offset));
+
+	switch (type) {
+	case VBH_OP_READ:
+		err = blockif_read(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_WRITE:
+		err = blockif_write(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_FLUSH:
+	case VBH_OP_FLUSH_OUT:
+		err = blockif_flush(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_IDENT:
+		/* Assume a single buffer */
+		/* S/n equal to buffer is not zero-terminated. */
+		memset(iov[1].iov_base, 0, iov[1].iov_len);
+		strncpy(iov[1].iov_base, sc->vbsc_ident,
+		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
+		pci_vtblk_done(&io->io_req, 0);
+		return;
+	default:
+		pci_vtblk_done(&io->io_req, EOPNOTSUPP);
+		return;
+	}
+	assert(err == 0);
+}
+
+static void
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	while (vq_has_descs(vq))
+		pci_vtblk_proc(sc, vq);
+}
+
+static int
+pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	char bident[sizeof("XX:X:X")];
+	struct blockif_ctxt *bctxt;
+	MD5_CTX mdctx;
+	u_char digest[16];
+	struct pci_vtblk_softc *sc;
+	off_t size;
+	int i, sectsz, sts, sto;
+
+	if (opts == NULL) {
+		printf("virtio-block: backing device required\n");
+		return (1);
+	}
+
+	/*
+	 * The supplied backing file has to exist
+	 */
+	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
+	bctxt = blockif_open(opts, bident);
+	if (bctxt == NULL) {       	
+		perror("Could not open backing file");
+		return (1);
+	}
+
+	size = blockif_size(bctxt);
+	sectsz = blockif_sectsz(bctxt);
+	blockif_psectsz(bctxt, &sts, &sto);
+
+	sc = calloc(1, sizeof(struct pci_vtblk_softc));
+	sc->bc = bctxt;
+	for (i = 0; i < VTBLK_RINGSZ; i++) {
+		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
+		io->io_req.br_callback = pci_vtblk_done;
+		io->io_req.br_param = io;
+		io->io_sc = sc;
+		io->io_idx = i;
+	}
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	/* init virtio softc and virtqueues */
+	vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
+	/*
+	 * Create an identifier for the backing file. Use parts of the
+	 * md5 sum of the filename
+	 */
+	MD5Init(&mdctx);
+	MD5Update(&mdctx, opts, strlen(opts));
+	MD5Final(digest, &mdctx);	
+	sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
+	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
+
+	/* setup virtio block config space */
+	sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
+	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
+	sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX;
+	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
+	sc->vbsc_cfg.vbc_geometry.heads = 0;
+	sc->vbsc_cfg.vbc_geometry.sectors = 0;
+	sc->vbsc_cfg.vbc_blk_size = sectsz;
+	sc->vbsc_cfg.vbc_topology.physical_block_exp =
+	    (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
+	sc->vbsc_cfg.vbc_topology.alignment_offset =
+	    (sto != 0) ? ((sts - sto) / sectsz) : 0;
+	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
+	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
+	sc->vbsc_cfg.vbc_writeback = 0;
+
+	/*
+	 * Should we move some of this into virtio.c?  Could
+	 * have the device, class, and subdev_0 as fields in
+	 * the virtio constants structure.
+	 */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
+		blockif_close(sc->bc);
+		free(sc);
+		return (1);
+	}
+	vi_set_io_bar(&sc->vbsc_vs, 0);
+	return (0);
+}
+
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+
+	DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+	return (1);
+}
+
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtblk_softc *sc = vsc;
+	void *ptr;
+
+	/* our caller has already verified offset and size */
+	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+struct pci_devemu pci_de_vblk = {
+	.pe_emu =	"virtio-blk",
+	.pe_init =	pci_vtblk_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vblk);


Property changes on: trunk/usr.sbin/bhyve/pci_virtio_block.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_virtio_net.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_virtio_net.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,973 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_net.c 307183 2016-10-13 06:32:21Z np $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_net.c 307183 2016-10-13 06:32:21Z np $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <machine/atomic.h>
+#include <net/ethernet.h>
+#ifndef NETMAP_WITH_LIBS
+#define NETMAP_WITH_LIBS
+#endif
+#include <net/netmap_user.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <md5.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "mevent.h"
+#include "virtio.h"
+
+#define VTNET_RINGSZ	1024
+
+#define VTNET_MAXSEGS	256
+
+/*
+ * Host capabilities.  Note that we only offer a few of these.
+ */
+#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
+#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
+#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
+#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
+#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
+#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
+#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
+#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
+#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
+#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
+#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
+#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
+#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
+				(1 << 21) /* guest can send gratuitous pkts */
+
+#define VTNET_S_HOSTCAPS      \
+  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+    VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
+
+/*
+ * PCI config-space "registers"
+ */
+struct virtio_net_config {
+	uint8_t  mac[6];
+	uint16_t status;
+} __packed;
+
+/*
+ * Queue definitions.
+ */
+#define VTNET_RXQ	0
+#define VTNET_TXQ	1
+#define VTNET_CTLQ	2	/* NB: not yet supported */
+
+#define VTNET_MAXQ	3
+
+/*
+ * Fixed network header size
+ */
+struct virtio_net_rxhdr {
+	uint8_t		vrh_flags;
+	uint8_t		vrh_gso_type;
+	uint16_t	vrh_hdr_len;
+	uint16_t	vrh_gso_size;
+	uint16_t	vrh_csum_start;
+	uint16_t	vrh_csum_offset;
+	uint16_t	vrh_bufs;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtnet_debug;
+#define DPRINTF(params) if (pci_vtnet_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtnet_softc {
+	struct virtio_softc vsc_vs;
+	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
+	pthread_mutex_t vsc_mtx;
+	struct mevent	*vsc_mevp;
+
+	int		vsc_tapfd;
+	struct nm_desc	*vsc_nmd;
+
+	int		vsc_rx_ready;
+	volatile int	resetting;	/* set and checked outside lock */
+
+	uint64_t	vsc_features;	/* negotiated features */
+	
+	struct virtio_net_config vsc_config;
+
+	pthread_mutex_t	rx_mtx;
+	int		rx_in_progress;
+	int		rx_vhdrlen;
+	int		rx_merge;	/* merged rx bufs in use */
+
+	pthread_t 	tx_tid;
+	pthread_mutex_t	tx_mtx;
+	pthread_cond_t	tx_cond;
+	int		tx_in_progress;
+
+	void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
+	void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
+			     int iovcnt, int len);
+};
+
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+static void pci_vtnet_neg_features(void *, uint64_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+	"vtnet",		/* our name */
+	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
+	sizeof(struct virtio_net_config), /* config reg size */
+	pci_vtnet_reset,	/* reset */
+	NULL,			/* device-wide qnotify -- not used */
+	pci_vtnet_cfgread,	/* read PCI config */
+	pci_vtnet_cfgwrite,	/* write PCI config */
+	pci_vtnet_neg_features,	/* apply negotiated features */
+	VTNET_S_HOSTCAPS,	/* our capabilities */
+};
+
+/*
+ * If the transmit thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_txwait(struct pci_vtnet_softc *sc)
+{
+
+	pthread_mutex_lock(&sc->tx_mtx);
+	while (sc->tx_in_progress) {
+		pthread_mutex_unlock(&sc->tx_mtx);
+		usleep(10000);
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * If the receive thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
+{
+
+	pthread_mutex_lock(&sc->rx_mtx);
+	while (sc->rx_in_progress) {
+		pthread_mutex_unlock(&sc->rx_mtx);
+		usleep(10000);
+		pthread_mutex_lock(&sc->rx_mtx);
+	}
+	pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static void
+pci_vtnet_reset(void *vsc)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	DPRINTF(("vtnet: device reset requested !\n"));
+
+	sc->resetting = 1;
+
+	/*
+	 * Wait for the transmit and receive threads to finish their
+	 * processing.
+	 */
+	pci_vtnet_txwait(sc);
+	pci_vtnet_rxwait(sc);
+
+	sc->vsc_rx_ready = 0;
+	sc->rx_merge = 1;
+	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
+
+	/* now reset rings, MSI-X vectors, and negotiated capabilities */
+	vi_reset_dev(&sc->vsc_vs);
+
+	sc->resetting = 0;
+}
+
+/*
+ * Called to send a buffer chain out to the tap device
+ */
+static void
+pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		 int len)
+{
+	static char pad[60]; /* all zero bytes */
+
+	if (sc->vsc_tapfd == -1)
+		return;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	(void) writev(sc->vsc_tapfd, iov, iovcnt);
+}
+
+/*
+ *  Called when there is read activity on the tap file descriptor.
+ * Each buffer posted by the guest is assumed to be able to contain
+ * an entire ethernet frame + rx header.
+ *  MP note: the dummybuf is only used for discarding frames, so there
+ * is no need for it to be per-vtnet or locked.
+ */
+static uint8_t dummybuf[2048];
+
+static __inline struct iovec *
+rx_iov_trim(struct iovec *iov, int *niov, int tlen)
+{
+	struct iovec *riov;
+
+	/* XXX short-cut: assume first segment is >= tlen */
+	assert(iov[0].iov_len >= tlen);
+
+	iov[0].iov_len -= tlen;
+	if (iov[0].iov_len == 0) {
+		assert(*niov > 1);
+		*niov -= 1;
+		riov = &iov[1];
+	} else {
+		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
+		riov = &iov[0];
+	}
+
+	return (riov);
+}
+
+static void
+pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
+{
+	struct iovec iov[VTNET_MAXSEGS], *riov;
+	struct vqueue_info *vq;
+	void *vrx;
+	int len, n;
+	uint16_t idx;
+
+	/*
+	 * Should never be called without a valid tap fd
+	 */
+	assert(sc->vsc_tapfd != -1);
+
+	/*
+	 * But, will be called when the rx ring hasn't yet
+	 * been set up or the guest is resetting the device.
+	 */
+	if (!sc->vsc_rx_ready || sc->resetting) {
+		/*
+		 * Drop the packet and try later.
+		 */
+		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+		return;
+	}
+
+	/*
+	 * Check for available rx buffers
+	 */
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	if (!vq_has_descs(vq)) {
+		/*
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
+		 */
+		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+		vq_endchains(vq, 1);
+		return;
+	}
+
+	do {
+		/*
+		 * Get descriptor chain.
+		 */
+		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+		assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+		/*
+		 * Get a pointer to the rx header, and use the
+		 * data immediately following it for the packet buffer.
+		 */
+		vrx = iov[0].iov_base;
+		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+		len = readv(sc->vsc_tapfd, riov, n);
+
+		if (len < 0 && errno == EWOULDBLOCK) {
+			/*
+			 * No more packets, but still some avail ring
+			 * entries.  Interrupt if needed/appropriate.
+			 */
+			vq_retchain(vq);
+			vq_endchains(vq, 0);
+			return;
+		}
+
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers if merged rx bufs were negotiated.
+		 */
+		memset(vrx, 0, sc->rx_vhdrlen);
+
+		if (sc->rx_merge) {
+			struct virtio_net_rxhdr *vrxh;
+
+			vrxh = vrx;
+			vrxh->vrh_bufs = 1;
+		}
+
+		/*
+		 * Release this chain and handle more chains.
+		 */
+		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+	} while (vq_has_descs(vq));
+
+	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_endchains(vq, 1);
+}
+
+static int
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int r, i;
+	int len = 0;
+
+	for (r = nmd->cur_tx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_tx_ring)
+				r = nmd->first_tx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+
+		for (i = 0; i < iovcnt; i++) {
+			memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
+			len += iov[i].iov_len;
+		}
+		ring->slot[cur].len = len;
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_tx_ring = r;
+		ioctl(nmd->fd, NIOCTXSYNC, NULL);
+		break;
+	}
+
+	return (len);
+}
+
+static inline int
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int len = 0;
+	int i = 0;
+	int r;
+
+	for (r = nmd->cur_rx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+		size_t left;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_rx_ring)
+				r = nmd->first_rx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+		left = ring->slot[cur].len;
+
+		for (i = 0; i < iovcnt && left > 0; i++) {
+			if (iov[i].iov_len > left)
+				iov[i].iov_len = left;
+			memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
+			len += iov[i].iov_len;
+			left -= iov[i].iov_len;
+		}
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_rx_ring = r;
+		ioctl(nmd->fd, NIOCRXSYNC, NULL);
+		break;
+	}
+	for (; i < iovcnt; i++)
+		iov[i].iov_len = 0;
+
+	return (len);
+}
+
+/*
+ * Called to send a buffer chain out to the vale port
+ */
+static void
+pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		    int len)
+{
+	static char pad[60]; /* all zero bytes */
+
+	if (sc->vsc_nmd == NULL)
+		return;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+}
+
+static void
+pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
+{
+	struct iovec iov[VTNET_MAXSEGS], *riov;
+	struct vqueue_info *vq;
+	void *vrx;
+	int len, n;
+	uint16_t idx;
+
+	/*
+	 * Should never be called without a valid netmap descriptor
+	 */
+	assert(sc->vsc_nmd != NULL);
+
+	/*
+	 * But, will be called when the rx ring hasn't yet
+	 * been set up or the guest is resetting the device.
+	 */
+	if (!sc->vsc_rx_ready || sc->resetting) {
+		/*
+		 * Drop the packet and try later.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		return;
+	}
+
+	/*
+	 * Check for available rx buffers
+	 */
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	if (!vq_has_descs(vq)) {
+		/*
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		vq_endchains(vq, 1);
+		return;
+	}
+
+	do {
+		/*
+		 * Get descriptor chain.
+		 */
+		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+		assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+		/*
+		 * Get a pointer to the rx header, and use the
+		 * data immediately following it for the packet buffer.
+		 */
+		vrx = iov[0].iov_base;
+		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+		len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+
+		if (len == 0) {
+			/*
+			 * No more packets, but still some avail ring
+			 * entries.  Interrupt if needed/appropriate.
+			 */
+			vq_endchains(vq, 0);
+			return;
+		}
+
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers if merged rx bufs were negotiated.
+		 */
+		memset(vrx, 0, sc->rx_vhdrlen);
+
+		if (sc->rx_merge) {
+			struct virtio_net_rxhdr *vrxh;
+
+			vrxh = vrx;
+			vrxh->vrh_bufs = 1;
+		}
+
+		/*
+		 * Release this chain and handle more chains.
+		 */
+		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+	} while (vq_has_descs(vq));
+
+	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_endchains(vq, 1);
+}
+
+static void
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+
+	pthread_mutex_lock(&sc->rx_mtx);
+	sc->rx_in_progress = 1;
+	sc->pci_vtnet_rx(sc);
+	sc->rx_in_progress = 0;
+	pthread_mutex_unlock(&sc->rx_mtx);
+
+}
+
+static void
+pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * A qnotify means that the rx process can now begin
+	 */
+	if (sc->vsc_rx_ready == 0) {
+		sc->vsc_rx_ready = 1;
+		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+	}
+}
+
+static void
+pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
+{
+	struct iovec iov[VTNET_MAXSEGS + 1];
+	int i, n;
+	int plen, tlen;
+	uint16_t idx;
+
+	/*
+	 * Obtain chain of descriptors.  The first one is
+	 * really the header descriptor, so we need to sum
+	 * up two lengths: packet length and transfer length.
+	 */
+	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+	assert(n >= 1 && n <= VTNET_MAXSEGS);
+	plen = 0;
+	tlen = iov[0].iov_len;
+	for (i = 1; i < n; i++) {
+		plen += iov[i].iov_len;
+		tlen += iov[i].iov_len;
+	}
+
+	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
+	sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
+
+	/* chain is processed, release it and set tlen */
+	vq_relchain(vq, idx, tlen);
+}
+
+static void
+pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * Any ring entries to process?
+	 */
+	if (!vq_has_descs(vq))
+		return;
+
+	/* Signal the tx thread for processing */
+	pthread_mutex_lock(&sc->tx_mtx);
+	vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+	if (sc->tx_in_progress == 0)
+		pthread_cond_signal(&sc->tx_cond);
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * Thread which will handle processing of TX desc
+ */
+static void *
+pci_vtnet_tx_thread(void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+	struct vqueue_info *vq;
+	int error;
+
+	vq = &sc->vsc_queues[VTNET_TXQ];
+
+	/*
+	 * Let us wait till the tx queue pointers get initialised &
+	 * first tx signaled
+	 */
+	pthread_mutex_lock(&sc->tx_mtx);
+	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+	assert(error == 0);
+
+	for (;;) {
+		/* note - tx mutex is locked here */
+		while (sc->resetting || !vq_has_descs(vq)) {
+			vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
+			mb();
+			if (!sc->resetting && vq_has_descs(vq))
+				break;
+
+			sc->tx_in_progress = 0;
+			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+			assert(error == 0);
+		}
+		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+		sc->tx_in_progress = 1;
+		pthread_mutex_unlock(&sc->tx_mtx);
+
+		do {
+			/*
+			 * Run through entries, placing them into
+			 * iovecs and sending when an end-of-packet
+			 * is found
+			 */
+			pci_vtnet_proctx(sc, vq);
+		} while (vq_has_descs(vq));
+
+		/*
+		 * Generate an interrupt if needed.
+		 */
+		vq_endchains(vq, 1);
+
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+}
+
+#ifdef notyet
+static void
+pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
+{
+
+	DPRINTF(("vtnet: control qnotify!\n\r"));
+}
+#endif
+
+static int
+pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+        struct ether_addr *ea;
+        char *tmpstr;
+        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+        tmpstr = strsep(&mac_str,"=");
+       
+        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+                ea = ether_aton(mac_str);
+
+                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+			fprintf(stderr, "Invalid MAC %s\n", mac_str);
+                        return (EINVAL);
+                } else
+                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+        }
+
+        return (0);
+}
+
+static void
+pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
+{
+	char tbuf[80];
+
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, devname, sizeof(tbuf));
+
+	sc->pci_vtnet_rx = pci_vtnet_tap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_tap_tx;
+
+	sc->vsc_tapfd = open(tbuf, O_RDWR);
+	if (sc->vsc_tapfd == -1) {
+		WPRINTF(("open of tap device %s failed\n", tbuf));
+		return;
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	int opt = 1;
+	if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+		WPRINTF(("tap device O_NONBLOCK failed\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+}
+
+static void
+pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
+{
+	sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
+
+	sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
+	if (sc->vsc_nmd == NULL) {
+		WPRINTF(("open of netmap device %s failed\n", ifname));
+		return;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		nm_close(sc->vsc_nmd);
+		sc->vsc_nmd = NULL;
+	}
+}
+
+static int
+pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	MD5_CTX mdctx;
+	unsigned char digest[16];
+	char nstr[80];
+	char tname[MAXCOMLEN + 1];
+	struct pci_vtnet_softc *sc;
+	char *devname;
+	char *vtopts;
+	int mac_provided;
+
+	sc = calloc(1, sizeof(struct pci_vtnet_softc));
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
+	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
+	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
+#ifdef notyet
+	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
+        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
+#endif
+ 
+	/*
+	 * Attempt to open the tap device and read the MAC address
+	 * if specified
+	 */
+	mac_provided = 0;
+	sc->vsc_tapfd = -1;
+	sc->vsc_nmd = NULL;
+	if (opts != NULL) {
+		int err;
+
+		devname = vtopts = strdup(opts);
+		(void) strsep(&vtopts, ",");
+
+		if (vtopts != NULL) {
+			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
+			if (err != 0) {
+				free(devname);
+				return (err);
+			}
+			mac_provided = 1;
+		}
+
+		if (strncmp(devname, "vale", 4) == 0)
+			pci_vtnet_netmap_setup(sc, devname);
+		if ((strncmp(devname, "tap", 3) == 0) ||
+		    (strncmp(devname, "vmnet", 5) == 0))
+			pci_vtnet_tap_setup(sc, devname);
+
+		free(devname);
+	}
+
+	/*
+	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
+	 * followed by an MD5 of the PCI slot/func number and dev name
+	 */
+	if (!mac_provided) {
+		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+		    pi->pi_func, vmname);
+
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, nstr, strlen(nstr));
+		MD5Final(digest, &mdctx);
+
+		sc->vsc_config.mac[0] = 0x00;
+		sc->vsc_config.mac[1] = 0xa0;
+		sc->vsc_config.mac[2] = 0x98;
+		sc->vsc_config.mac[3] = digest[0];
+		sc->vsc_config.mac[4] = digest[1];
+		sc->vsc_config.mac[5] = digest[2];
+	}
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	/* Link is up if we managed to open tap device. */
+	sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
+	
+	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
+	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+
+	/* use BAR 0 to map config regs in IO space */
+	vi_set_io_bar(&sc->vsc_vs, 0);
+
+	sc->resetting = 0;
+
+	sc->rx_merge = 1;
+	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
+	sc->rx_in_progress = 0;
+	pthread_mutex_init(&sc->rx_mtx, NULL); 
+
+	/* 
+	 * Initialize tx semaphore & spawn TX processing thread.
+	 * As of now, only one thread for TX desc processing is
+	 * spawned. 
+	 */
+	sc->tx_in_progress = 0;
+	pthread_mutex_init(&sc->tx_mtx, NULL);
+	pthread_cond_init(&sc->tx_cond, NULL);
+	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
+	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
+	    pi->pi_func);
+        pthread_set_name_np(sc->tx_tid, tname);
+
+	return (0);
+}
+
+static int
+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	if (offset < 6) {
+		assert(offset + size <= 6);
+		/*
+		 * The driver is allowed to change the MAC address
+		 */
+		ptr = &sc->vsc_config.mac[offset];
+		memcpy(ptr, &value, size);
+	} else {
+		/* silently ignore other writes */
+		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
+	}
+
+	return (0);
+}
+
+static int
+pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)&sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static void
+pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	sc->vsc_features = negotiated_features;
+
+	if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
+		sc->rx_merge = 0;
+		/* non-merge rx header is 2 bytes shorter */
+		sc->rx_vhdrlen -= 2;
+	}
+}
+
+struct pci_devemu pci_de_vnet = {
+	.pe_emu = 	"virtio-net",
+	.pe_init =	pci_vtnet_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vnet);


Property changes on: trunk/usr.sbin/bhyve/pci_virtio_net.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pci_virtio_rnd.c
===================================================================
--- trunk/usr.sbin/bhyve/pci_virtio_rnd.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pci_virtio_rnd.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,190 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * virtio entropy device emulation.
+ * Randomness is sourced from /dev/random which does not block
+ * once it has been seeded at bootup.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_rnd.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+#define VTRND_RINGSZ	64
+
+
+static int pci_vtrnd_debug;
+#define DPRINTF(params) if (pci_vtrnd_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtrnd_softc {
+	struct virtio_softc vrsc_vs;
+	struct vqueue_info  vrsc_vq;
+	pthread_mutex_t     vrsc_mtx;
+	uint64_t            vrsc_cfg;
+	int                 vrsc_fd;
+};
+
+static void pci_vtrnd_reset(void *);
+static void pci_vtrnd_notify(void *, struct vqueue_info *);
+
+static struct virtio_consts vtrnd_vi_consts = {
+	"vtrnd",		/* our name */
+	1,			/* we support 1 virtqueue */
+	0,			/* config reg size */
+	pci_vtrnd_reset,	/* reset */
+	pci_vtrnd_notify,	/* device-wide qnotify */
+	NULL,			/* read virtio config */
+	NULL,			/* write virtio config */
+	NULL,			/* apply negotiated features */
+	0,			/* our capabilities */
+};
+
+
+static void
+pci_vtrnd_reset(void *vsc)
+{
+	struct pci_vtrnd_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vtrnd: device reset requested !\n"));
+	vi_reset_dev(&sc->vrsc_vs);
+}
+
+
+static void
+pci_vtrnd_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct iovec iov;
+	struct pci_vtrnd_softc *sc;
+	int len;
+	uint16_t idx;
+
+	sc = vsc;
+
+	if (sc->vrsc_fd < 0) {
+		vq_endchains(vq, 0);
+		return;
+	}
+
+	while (vq_has_descs(vq)) {
+		vq_getchain(vq, &idx, &iov, 1, NULL);
+
+		len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
+
+		DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len));
+
+		/* Catastrophe if unable to read from /dev/random */
+		assert(len > 0);
+
+		/*
+		 * Release this chain and handle more
+		 */
+		vq_relchain(vq, idx, len);
+	}
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
+}
+
+
+static int
+pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	struct pci_vtrnd_softc *sc;
+	int fd;
+	int len;
+	uint8_t v;
+
+	/*
+	 * Should always be able to open /dev/random.
+	 */
+	fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
+
+	assert(fd >= 0);
+
+	/*
+	 * Check that device is seeded and non-blocking.
+	 */
+	len = read(fd, &v, sizeof(v));
+	if (len <= 0) {
+		WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
+		return (1);
+	}
+
+	sc = calloc(1, sizeof(struct pci_vtrnd_softc));
+
+	vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
+	sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
+
+	sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
+
+	/* keep /dev/random opened while emulating */
+	sc->vrsc_fd = fd;
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_bar(&sc->vrsc_vs, 0);
+
+	return (0);
+}
+
+
+struct pci_devemu pci_de_vrnd = {
+	.pe_emu =	"virtio-rnd",
+	.pe_init =	pci_vtrnd_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vrnd);


Property changes on: trunk/usr.sbin/bhyve/pci_virtio_rnd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/pm.c
===================================================================
--- trunk/usr.sbin/bhyve/pm.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/pm.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,313 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pm.c 283927 2015-06-02 19:20:39Z jhb $");
+
+#include <sys/types.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "mevent.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct mevent *power_button;
+static sig_t old_power_handler;
+
+/*
+ * Reset Control register at I/O port 0xcf9.  Bit 2 forces a system
+ * reset when it transitions from 0 to 1.  Bit 1 selects the type of
+ * reset to attempt: 0 selects a "soft" reset, and 1 selects a "hard"
+ * reset.
+ */
+static int
+reset_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+	int error;
+
+	static uint8_t reset_control;
+
+	if (bytes != 1)
+		return (-1);
+	if (in)
+		*eax = reset_control;
+	else {
+		reset_control = *eax;
+
+		/* Treat hard and soft resets the same. */
+		if (reset_control & 0x4) {
+			error = vm_suspend(ctx, VM_SUSPEND_RESET);
+			assert(error == 0 || errno == EALREADY);
+		}
+	}
+	return (0);
+}
+INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
+
+/*
+ * ACPI's SCI is a level-triggered interrupt.
+ */
+static int sci_active;
+
+static void
+sci_assert(struct vmctx *ctx)
+{
+
+	if (sci_active)
+		return;
+	vm_isa_assert_irq(ctx, SCI_INT, SCI_INT);
+	sci_active = 1;
+}
+
+static void
+sci_deassert(struct vmctx *ctx)
+{
+
+	if (!sci_active)
+		return;
+	vm_isa_deassert_irq(ctx, SCI_INT, SCI_INT);
+	sci_active = 0;
+}
+
+/*
+ * Power Management 1 Event Registers
+ *
+ * The only power management event supported is a power button upon
+ * receiving SIGTERM.
+ */
+static uint16_t pm1_enable, pm1_status;
+
+#define	PM1_TMR_STS		0x0001
+#define	PM1_BM_STS		0x0010
+#define	PM1_GBL_STS		0x0020
+#define	PM1_PWRBTN_STS		0x0100
+#define	PM1_SLPBTN_STS		0x0200
+#define	PM1_RTC_STS		0x0400
+#define	PM1_WAK_STS		0x8000
+
+#define	PM1_TMR_EN		0x0001
+#define	PM1_GBL_EN		0x0020
+#define	PM1_PWRBTN_EN		0x0100
+#define	PM1_SLPBTN_EN		0x0200
+#define	PM1_RTC_EN		0x0400
+
+static void
+sci_update(struct vmctx *ctx)
+{
+	int need_sci;
+
+	/* See if the SCI should be active or not. */
+	need_sci = 0;
+	if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
+		need_sci = 1;
+	if (need_sci)
+		sci_assert(ctx);
+	else
+		sci_deassert(ctx);
+}
+
+static int
+pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (bytes != 2)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	if (in)
+		*eax = pm1_status;
+	else {
+		/*
+		 * Writes are only permitted to clear certain bits by
+		 * writing 1 to those flags.
+		 */
+		pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
+		    PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+
+static int
+pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (bytes != 2)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	if (in)
+		*eax = pm1_enable;
+	else {
+		/*
+		 * Only permit certain bits to be set.  We never use
+		 * the global lock, but ACPI-CA whines profusely if it
+		 * can't set GBL_EN.
+		 */
+		pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
+INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, pm1_enable_handler);
+
+static void
+power_button_handler(int signal, enum ev_type type, void *arg)
+{
+	struct vmctx *ctx;
+
+	ctx = arg;
+	pthread_mutex_lock(&pm_lock);
+	if (!(pm1_status & PM1_PWRBTN_STS)) {
+		pm1_status |= PM1_PWRBTN_STS;
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+}
+
+/*
+ * Power Management 1 Control Register
+ *
+ * This is mostly unimplemented except that we wish to handle writes that
+ * set SPL_EN to handle S5 (soft power off).
+ */
+static uint16_t pm1_control;
+
+#define	PM1_SCI_EN	0x0001
+#define	PM1_SLP_TYP	0x1c00
+#define	PM1_SLP_EN	0x2000
+#define	PM1_ALWAYS_ZERO	0xc003
+
+static int
+pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+	int error;
+
+	if (bytes != 2)
+		return (-1);
+	if (in)
+		*eax = pm1_control;
+	else {
+		/*
+		 * Various bits are write-only or reserved, so force them
+		 * to zero in pm1_control.  Always preserve SCI_EN as OSPM
+		 * can never change it.
+		 */
+		pm1_control = (pm1_control & PM1_SCI_EN) |
+		    (*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO));
+
+		/*
+		 * If SLP_EN is set, check for S5.  Bhyve's _S5_ method
+		 * says that '5' should be stored in SLP_TYP for S5.
+		 */
+		if (*eax & PM1_SLP_EN) {
+			if ((pm1_control & PM1_SLP_TYP) >> 10 == 5) {
+				error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
+				assert(error == 0 || errno == EALREADY);
+			}
+		}
+	}
+	return (0);
+}
+INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
+SYSRES_IO(PM1A_EVT_ADDR, 8);
+
+/*
+ * ACPI SMI Command Register
+ *
+ * This write-only register is used to enable and disable ACPI.
+ */
+static int
+smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	assert(!in);
+	if (bytes != 1)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	switch (*eax) {
+	case BHYVE_ACPI_ENABLE:
+		pm1_control |= PM1_SCI_EN;
+		if (power_button == NULL) {
+			power_button = mevent_add(SIGTERM, EVF_SIGNAL,
+			    power_button_handler, ctx);
+			old_power_handler = signal(SIGTERM, SIG_IGN);
+		}
+		break;
+	case BHYVE_ACPI_DISABLE:
+		pm1_control &= ~PM1_SCI_EN;
+		if (power_button != NULL) {
+			mevent_delete(power_button);
+			power_button = NULL;
+			signal(SIGTERM, old_power_handler);
+		}
+		break;
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
+SYSRES_IO(SMI_CMD, 1);
+
+void
+sci_init(struct vmctx *ctx)
+{
+
+	/*
+	 * Mark ACPI's SCI as level trigger and bump its use count
+	 * in the PIRQ router.
+	 */
+	pci_irq_use(SCI_INT);
+	vm_isa_set_irq_trigger(ctx, SCI_INT, LEVEL_TRIGGER);
+}


Property changes on: trunk/usr.sbin/bhyve/pm.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/post.c
===================================================================
--- trunk/usr.sbin/bhyve/post.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/post.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,54 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/post.c 261265 2014-01-29 13:35:12Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/post.c 261265 2014-01-29 13:35:12Z jhb $");
+
+#include <sys/types.h>
+
+#include <assert.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+
+static int
+post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		  uint32_t *eax, void *arg)
+{
+	assert(in == 1);
+
+	if (bytes != 1)
+		return (-1);
+
+	*eax = 0xff;		/* return some garbage */
+	return (0);
+}
+
+INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
+SYSRES_IO(0x84, 1);


Property changes on: trunk/usr.sbin/bhyve/post.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/rtc.c
===================================================================
--- trunk/usr.sbin/bhyve/rtc.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/rtc.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,130 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/rtc.c 284894 2015-06-27 22:48:22Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/rtc.c 284894 2015-06-27 22:48:22Z neel $");
+
+#include <sys/types.h>
+
+#include <time.h>
+#include <assert.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "pci_lpc.h"
+#include "rtc.h"
+
+#define	IO_RTC		0x70
+
+#define	RTC_LMEM_LSB	0x34
+#define	RTC_LMEM_MSB	0x35
+#define	RTC_HMEM_LSB	0x5b
+#define	RTC_HMEM_SB	0x5c
+#define	RTC_HMEM_MSB	0x5d
+
+#define m_64KB		(64*1024)
+#define	m_16MB		(16*1024*1024)
+#define	m_4GB		(4ULL*1024*1024*1024)
+
+/*
+ * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
+ */
+static time_t
+rtc_time(struct vmctx *ctx, int use_localtime)
+{
+	struct tm tm;
+	time_t t;
+
+	time(&t);
+	if (use_localtime) {
+		localtime_r(&t, &tm);
+		t = timegm(&tm);
+	}
+	return (t);
+}
+
+void
+rtc_init(struct vmctx *ctx, int use_localtime)
+{	
+	size_t himem;
+	size_t lomem;
+	int err;
+
+	/* XXX init diag/reset code/equipment/checksum ? */
+
+	/*
+	 * Report guest memory size in nvram cells as required by UEFI.
+	 * Little-endian encoding.
+	 * 0x34/0x35 - 64KB chunks above 16MB, below 4GB
+	 * 0x5b/0x5c/0x5d - 64KB chunks above 4GB
+	 */
+	lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
+	err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8);
+	assert(err == 0);
+
+	himem = vm_get_highmem_size(ctx) / m_64KB;
+	err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16);
+	assert(err == 0);
+
+	err = vm_rtc_settime(ctx, rtc_time(ctx, use_localtime));
+	assert(err == 0);
+}
+
+static void
+rtc_dsdt(void)
+{
+
+	dsdt_line("");
+	dsdt_line("Device (RTC)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0B00\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_RTC, 2);
+	dsdt_fixed_irq(8);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+}
+LPC_DSDT(rtc_dsdt);
+
+/*
+ * Reserve the extended RTC I/O ports although they are not emulated at this
+ * time.
+ */
+SYSRES_IO(0x72, 6);


Property changes on: trunk/usr.sbin/bhyve/rtc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/rtc.h
===================================================================
--- trunk/usr.sbin/bhyve/rtc.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/rtc.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,35 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2013  Peter Grehan <grehan at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/rtc.h 284894 2015-06-27 22:48:22Z neel $
+ */
+
+#ifndef _RTC_H_
+#define _RTC_H_
+
+void	rtc_init(struct vmctx *ctx, int use_localtime);
+
+#endif /* _RTC_H_ */


Property changes on: trunk/usr.sbin/bhyve/rtc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/smbiostbl.c
===================================================================
--- trunk/usr.sbin/bhyve/smbiostbl.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/smbiostbl.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,828 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale at pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/smbiostbl.c 272147 2014-09-25 23:09:35Z grehan $");
+
+#include <sys/param.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <md5.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <uuid.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "smbiostbl.h"
+
+#define	MB			(1024*1024)
+#define	GB			(1024ULL*1024*1024)
+
+#define SMBIOS_BASE		0xF1000
+
+/* BHYVE_ACPI_BASE - SMBIOS_BASE) */
+#define	SMBIOS_MAX_LENGTH	(0xF2400 - 0xF1000)
+
+#define	SMBIOS_TYPE_BIOS	0
+#define	SMBIOS_TYPE_SYSTEM	1
+#define	SMBIOS_TYPE_CHASSIS	3
+#define	SMBIOS_TYPE_PROCESSOR	4
+#define	SMBIOS_TYPE_MEMARRAY	16
+#define	SMBIOS_TYPE_MEMDEVICE	17
+#define	SMBIOS_TYPE_MEMARRAYMAP	19
+#define	SMBIOS_TYPE_BOOT	32
+#define	SMBIOS_TYPE_EOT		127
+
+struct smbios_structure {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	handle;
+} __packed;
+
+typedef int (*initializer_func_t)(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_template_entry {
+	struct smbios_structure	*entry;
+	const char		**strings;
+	initializer_func_t	initializer;
+};
+
+/*
+ * SMBIOS Structure Table Entry Point
+ */
+#define	SMBIOS_ENTRY_EANCHOR	"_SM_"
+#define	SMBIOS_ENTRY_EANCHORLEN	4
+#define	SMBIOS_ENTRY_IANCHOR	"_DMI_"
+#define	SMBIOS_ENTRY_IANCHORLEN	5
+
+struct smbios_entry_point {
+	char		eanchor[4];	/* anchor tag */
+	uint8_t		echecksum;	/* checksum of entry point structure */
+	uint8_t		eplen;		/* length in bytes of entry point */
+	uint8_t		major;		/* major version of the SMBIOS spec */
+	uint8_t		minor;		/* minor version of the SMBIOS spec */
+	uint16_t	maxssize;	/* maximum size in bytes of a struct */
+	uint8_t		revision;	/* entry point structure revision */
+	uint8_t		format[5];	/* entry point rev-specific data */
+	char		ianchor[5];	/* intermediate anchor tag */
+	uint8_t		ichecksum;	/* intermediate checksum */
+	uint16_t	stlen;		/* len in bytes of structure table */
+	uint32_t	staddr;		/* physical addr of structure table */
+	uint16_t	stnum;		/* number of structure table entries */
+	uint8_t		bcdrev;		/* BCD value representing DMI ver */
+} __packed;
+
+/*
+ * BIOS Information
+ */
+#define	SMBIOS_FL_ISA		0x00000010	/* ISA is supported */
+#define	SMBIOS_FL_PCI		0x00000080	/* PCI is supported */
+#define	SMBIOS_FL_SHADOW	0x00001000	/* BIOS shadowing is allowed */
+#define	SMBIOS_FL_CDBOOT	0x00008000	/* Boot from CD is supported */
+#define	SMBIOS_FL_SELBOOT	0x00010000	/* Selectable Boot supported */
+#define	SMBIOS_FL_EDD		0x00080000	/* EDD Spec is supported */
+
+#define	SMBIOS_XB1_FL_ACPI	0x00000001	/* ACPI is supported */
+
+#define	SMBIOS_XB2_FL_BBS	0x00000001	/* BIOS Boot Specification */
+#define	SMBIOS_XB2_FL_VM	0x00000010	/* Virtual Machine */
+
+struct smbios_table_type0 {
+	struct smbios_structure	header;
+	uint8_t			vendor;		/* vendor string */
+	uint8_t			version;	/* version string */
+	uint16_t		segment;	/* address segment location */
+	uint8_t			rel_date;	/* release date */
+	uint8_t			size;		/* rom size */
+	uint64_t		cflags;		/* characteristics */
+	uint8_t			xc_bytes[2];	/* characteristics ext bytes */
+	uint8_t			sb_major_rel;	/* system bios version */
+	uint8_t			sb_minor_rele;
+	uint8_t			ecfw_major_rel;	/* embedded ctrl fw version */
+	uint8_t			ecfw_minor_rel;
+} __packed;
+
+/*
+ * System Information
+ */
+#define	SMBIOS_WAKEUP_SWITCH	0x06	/* power switch */
+
+struct smbios_table_type1 {
+	struct smbios_structure	header;
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			product;	/* product name string */
+	uint8_t			version;	/* version string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			uuid[16];	/* uuid byte array */
+	uint8_t			wakeup;		/* wake-up event */
+	uint8_t			sku;		/* sku number string */
+	uint8_t			family;		/* family name string */
+} __packed;
+
+/*
+ * System Enclosure or Chassis
+ */
+#define	SMBIOS_CHT_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_CHST_SAFE	0x03	/* safe */
+
+#define	SMBIOS_CHSC_NONE	0x03	/* none */
+
+struct smbios_table_type3 {
+	struct smbios_structure	header;
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			type;		/* type */
+	uint8_t			version;	/* version string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			bustate;	/* boot-up state */
+	uint8_t			psstate;	/* power supply state */
+	uint8_t			tstate;		/* thermal state */
+	uint8_t			security;	/* security status */
+	uint8_t			uheight;	/* height in 'u's */
+	uint8_t			cords;		/* number of power cords */
+	uint8_t			elems;		/* number of element records */
+	uint8_t			elemlen;	/* length of records */
+	uint8_t			sku;		/* sku number string */
+} __packed;
+
+/*
+ * Processor Information
+ */
+#define	SMBIOS_PRT_CENTRAL	0x03	/* central processor */
+
+#define	SMBIOS_PRF_OTHER	0x01	/* other */
+
+#define	SMBIOS_PRS_PRESENT	0x40	/* socket is populated */
+#define	SMBIOS_PRS_ENABLED	0x1	/* enabled */
+
+#define	SMBIOS_PRU_NONE		0x06	/* none */
+
+#define	SMBIOS_PFL_64B	0x04	/* 64-bit capable */
+
+struct smbios_table_type4 {
+	struct smbios_structure	header;
+	uint8_t			socket;		/* socket designation string */
+	uint8_t			type;		/* processor type */
+	uint8_t			family;		/* processor family */
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint64_t		cpuid;		/* processor cpuid */
+	uint8_t			version;	/* version string */
+	uint8_t			voltage;	/* voltage */
+	uint16_t		clkspeed;	/* ext clock speed in mhz */
+	uint16_t		maxspeed;	/* maximum speed in mhz */
+	uint16_t		curspeed;	/* current speed in mhz */
+	uint8_t			status;		/* status */
+	uint8_t			upgrade;	/* upgrade */
+	uint16_t		l1handle;	/* l1 cache handle */
+	uint16_t		l2handle;	/* l2 cache handle */
+	uint16_t		l3handle;	/* l3 cache handle */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			part;		/* part number string */
+	uint8_t			cores;		/* cores per socket */
+	uint8_t			ecores;		/* enabled cores */
+	uint8_t			threads;	/* threads per socket */
+	uint16_t		cflags;		/* processor characteristics */
+	uint16_t		family2;	/* processor family 2 */
+} __packed;
+
+/*
+ * Physical Memory Array
+ */
+#define	SMBIOS_MAL_SYSMB	0x03	/* system board or motherboard */
+
+#define	SMBIOS_MAU_SYSTEM	0x03	/* system memory */
+
+#define	SMBIOS_MAE_NONE		0x03	/* none */
+
+struct smbios_table_type16 {
+	struct smbios_structure	header;
+	uint8_t			location;	/* physical device location */
+	uint8_t			use;		/* device functional purpose */
+	uint8_t			ecc;		/* err detect/correct method */
+	uint32_t		size;		/* max mem capacity in kb */
+	uint16_t		errhand;	/* handle of error (if any) */
+	uint16_t		ndevs;		/* num of slots or sockets */
+	uint64_t		xsize;		/* max mem capacity in bytes */
+} __packed;
+
+/*
+ * Memory Device
+ */
+#define	SMBIOS_MDFF_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_MDT_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_MDF_UNKNOWN	0x0004	/* unknown */
+
+struct smbios_table_type17 {
+	struct smbios_structure	header;
+	uint16_t		arrayhand;	/* handle of physl mem array */
+	uint16_t		errhand;	/* handle of mem error data */
+	uint16_t		twidth;		/* total width in bits */
+	uint16_t		dwidth;		/* data width in bits */
+	uint16_t		size;		/* size in bytes */
+	uint8_t			form;		/* form factor */
+	uint8_t			set;		/* set */
+	uint8_t			dloc;		/* device locator string */
+	uint8_t			bloc;		/* phys bank locator string */
+	uint8_t			type;		/* memory type */
+	uint16_t		flags;		/* memory characteristics */
+	uint16_t		maxspeed;	/* maximum speed in mhz */
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			part;		/* part number string */
+	uint8_t			attributes;	/* attributes */
+	uint32_t		xsize;		/* extended size in mbs */
+	uint16_t		curspeed;	/* current speed in mhz */
+	uint16_t		minvoltage;	/* minimum voltage */
+	uint16_t		maxvoltage;	/* maximum voltage */
+	uint16_t		curvoltage;	/* configured voltage */
+} __packed;
+
+/*
+ * Memory Array Mapped Address
+ */
+struct smbios_table_type19 {
+	struct smbios_structure	header;
+	uint32_t		saddr;		/* start phys addr in kb */
+	uint32_t		eaddr;		/* end phys addr in kb */
+	uint16_t		arrayhand;	/* physical mem array handle */
+	uint8_t			width;		/* num of dev in row */
+	uint64_t		xsaddr;		/* start phys addr in bytes */
+	uint64_t		xeaddr;		/* end phys addr in bytes */
+} __packed;
+
+/*
+ * System Boot Information
+ */
+#define	SMBIOS_BOOT_NORMAL	0	/* no errors detected */
+
+struct smbios_table_type32 {
+	struct smbios_structure	header;
+	uint8_t			reserved[6];
+	uint8_t			status;		/* boot status */
+} __packed;
+
+/*
+ * End-of-Table
+ */
+struct smbios_table_type127 {
+	struct smbios_structure	header;
+} __packed;
+
+struct smbios_table_type0 smbios_type0_template = {
+	{ SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 },
+	1,	/* bios vendor string */
+	2,	/* bios version string */
+	0xF000,	/* bios address segment location */
+	3,	/* bios release date */
+	0x0,	/* bios size (64k * (n + 1) is the size in bytes) */
+	SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW |
+	    SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD,
+	{ SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM },
+	0x0,	/* bios major release */
+	0x0,	/* bios minor release */
+	0xff,	/* embedded controller firmware major release */
+	0xff	/* embedded controller firmware minor release */
+};
+
+const char *smbios_type0_strings[] = {
+	"BHYVE",	/* vendor string */
+	"1.00",		/* bios version string */
+	"03/14/2014",	/* bios release date string */
+	NULL
+};
+
+struct smbios_table_type1 smbios_type1_template = {
+	{ SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 },
+	1,		/* manufacturer string */
+	2,		/* product string */
+	3,		/* version string */
+	4,		/* serial number string */
+	{ 0 },
+	SMBIOS_WAKEUP_SWITCH,
+	5,		/* sku string */
+	6		/* family string */
+};
+
+static int smbios_type1_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+const char *smbios_type1_strings[] = {
+	" ",		/* manufacturer string */
+	"BHYVE",	/* product name string */
+	"1.0",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* sku string */
+	" ",		/* family name string */
+	NULL
+};
+
+struct smbios_table_type3 smbios_type3_template = {
+	{ SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 },
+	1,		/* manufacturer string */
+	SMBIOS_CHT_UNKNOWN,
+	2,		/* version string */
+	3,		/* serial number string */
+	4,		/* asset tag string */
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHSC_NONE,
+	0,		/* height in 'u's (0=enclosure height unspecified) */
+	0,		/* number of power cords (0=number unspecified) */
+	0,		/* number of contained element records */
+	0,		/* length of records */
+	5		/* sku number string */
+};
+
+const char *smbios_type3_strings[] = {
+	" ",		/* manufacturer string */
+	"1.0",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* sku number string */
+	NULL
+};
+
+struct smbios_table_type4 smbios_type4_template = {
+	{ SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 },
+	1,		/* socket designation string */
+	SMBIOS_PRT_CENTRAL,
+	SMBIOS_PRF_OTHER,
+	2,		/* manufacturer string */
+	0,		/* cpuid */
+	3,		/* version string */
+	0,		/* voltage */
+	0,		/* external clock frequency in mhz (0=unknown) */
+	0,		/* maximum frequency in mhz (0=unknown) */
+	0,		/* current frequency in mhz (0=unknown) */
+	SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED,
+	SMBIOS_PRU_NONE,
+	-1,		/* l1 cache handle */
+	-1,		/* l2 cache handle */
+	-1,		/* l3 cache handle */
+	4,		/* serial number string */
+	5,		/* asset tag string */
+	6,		/* part number string */
+	0,		/* cores per socket (0=unknown) */
+	0,		/* enabled cores per socket (0=unknown) */
+	0,		/* threads per socket (0=unknown) */
+	SMBIOS_PFL_64B,
+	SMBIOS_PRF_OTHER
+};
+
+const char *smbios_type4_strings[] = {
+	" ",		/* socket designation string */
+	" ",		/* manufacturer string */
+	" ",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* part number string */
+	NULL
+};
+
+static int smbios_type4_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type16 smbios_type16_template = {
+	{ SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16),  0 },
+	SMBIOS_MAL_SYSMB,
+	SMBIOS_MAU_SYSTEM,
+	SMBIOS_MAE_NONE,
+	0x80000000,	/* max mem capacity in kb (0x80000000=use extended) */
+	-1,		/* handle of error (if any) */
+	0,		/* number of slots or sockets (TBD) */
+	0		/* extended maximum memory capacity in bytes (TBD) */
+};
+
+static int smbios_type16_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type17 smbios_type17_template = {
+	{ SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17),  0 },
+	-1,		/* handle of physical memory array */
+	-1,		/* handle of memory error data */
+	64,		/* total width in bits including ecc */
+	64,		/* data width in bits */
+	0x7fff,		/* size in bytes (0x7fff=use extended)*/
+	SMBIOS_MDFF_UNKNOWN,
+	0,		/* set (0x00=none, 0xff=unknown) */
+	1,		/* device locator string */
+	2,		/* physical bank locator string */
+	SMBIOS_MDT_UNKNOWN,
+	SMBIOS_MDF_UNKNOWN,
+	0,		/* maximum memory speed in mhz (0=unknown) */
+	3,		/* manufacturer string */
+	4,		/* serial number string */
+	5,		/* asset tag string */
+	6,		/* part number string */
+	0,		/* attributes (0=unknown rank information) */
+	0,		/* extended size in mb (TBD) */
+	0,		/* current speed in mhz (0=unknown) */
+	0,		/* minimum voltage in mv (0=unknown) */
+	0,		/* maximum voltage in mv (0=unknown) */
+	0		/* configured voltage in mv (0=unknown) */
+};
+
+const char *smbios_type17_strings[] = {
+	" ",		/* device locator string */
+	" ",		/* physical bank locator string */
+	" ",		/* manufacturer string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* part number string */
+	NULL
+};
+
+static int smbios_type17_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type19 smbios_type19_template = {
+	{ SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19),  0 },
+	0xffffffff,	/* starting phys addr in kb (0xffffffff=use ext) */
+	0xffffffff,	/* ending phys addr in kb (0xffffffff=use ext) */
+	-1,		/* physical memory array handle */
+	1,		/* number of devices that form a row */
+	0,		/* extended starting phys addr in bytes (TDB) */
+	0		/* extended ending phys addr in bytes (TDB) */
+};
+
+static int smbios_type19_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type32 smbios_type32_template = {
+	{ SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32),  0 },
+	{ 0, 0, 0, 0, 0, 0 },
+	SMBIOS_BOOT_NORMAL
+};
+
+struct smbios_table_type127 smbios_type127_template = {
+	{ SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127),  0 }
+};
+
+static int smbios_generic_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+static struct smbios_template_entry smbios_template[] = {
+	{ (struct smbios_structure *)&smbios_type0_template,
+	  smbios_type0_strings,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type1_template,
+	  smbios_type1_strings,
+	  smbios_type1_initializer },
+	{ (struct smbios_structure *)&smbios_type3_template,
+	  smbios_type3_strings,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type4_template,
+	  smbios_type4_strings,
+	  smbios_type4_initializer },
+	{ (struct smbios_structure *)&smbios_type16_template,
+	  NULL,
+	  smbios_type16_initializer },
+	{ (struct smbios_structure *)&smbios_type17_template,
+	  smbios_type17_strings,
+	  smbios_type17_initializer },
+	{ (struct smbios_structure *)&smbios_type19_template,
+	  NULL,
+	  smbios_type19_initializer },
+	{ (struct smbios_structure *)&smbios_type32_template,
+	  NULL,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type127_template,
+	  NULL,
+	  smbios_generic_initializer },
+	{ NULL,NULL, NULL }
+};
+
+static uint64_t guest_lomem, guest_himem;
+static uint16_t type16_handle;
+
+static int
+smbios_generic_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_structure *entry;
+
+	memcpy(curaddr, template_entry, template_entry->length);
+	entry = (struct smbios_structure *)curaddr;
+	entry->handle = *n + 1;
+	curaddr += entry->length;
+	if (template_strings != NULL) {
+		int	i;
+
+		for (i = 0; template_strings[i] != NULL; i++) {
+			const char *string;
+			int len;
+
+			string = template_strings[i];
+			len = strlen(string) + 1;
+			memcpy(curaddr, string, len);
+			curaddr += len;
+		}
+		*curaddr = '\0';
+		curaddr++;
+	} else {
+		/* Minimum string section is double nul */
+		*curaddr = '\0';
+		curaddr++;
+		*curaddr = '\0';
+		curaddr++;
+	}
+	(*n)++;
+	*endaddr = curaddr;
+
+	return (0);
+}
+
+static int
+smbios_type1_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type1 *type1;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type1 = (struct smbios_table_type1 *)curaddr;
+
+	if (guest_uuid_str != NULL) {
+		uuid_t		uuid;
+		uint32_t	status;
+
+		uuid_from_string(guest_uuid_str, &uuid, &status);
+		if (status != uuid_s_ok)
+			return (-1);
+
+		uuid_enc_le(&type1->uuid, &uuid);
+	} else {
+		MD5_CTX		mdctx;
+		u_char		digest[16];
+		char		hostname[MAXHOSTNAMELEN];
+
+		/*
+		 * Universally unique and yet reproducible are an
+		 * oxymoron, however reproducible is desirable in
+		 * this case.
+		 */
+		if (gethostname(hostname, sizeof(hostname)))
+			return (-1);
+
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, vmname, strlen(vmname));
+		MD5Update(&mdctx, hostname, sizeof(hostname));
+		MD5Final(digest, &mdctx);
+
+		/*
+		 * Set the variant and version number.
+		 */
+		digest[6] &= 0x0F;
+		digest[6] |= 0x30;	/* version 3 */
+		digest[8] &= 0x3F;
+		digest[8] |= 0x80;
+
+		memcpy(&type1->uuid, digest, sizeof (digest));
+	}
+
+	return (0);
+}
+
+static int
+smbios_type4_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	int i;
+
+	for (i = 0; i < guest_ncpus; i++) {
+		struct smbios_table_type4 *type4;
+		char *p;
+		int nstrings, len;
+
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type4 = (struct smbios_table_type4 *)curaddr;
+		p = curaddr + sizeof (struct smbios_table_type4);
+		nstrings = 0;
+		while (p < *endaddr - 1) {
+			if (*p++ == '\0')
+				nstrings++;
+		}
+		len = sprintf(*endaddr - 1, "CPU #%d", i) + 1;
+		*endaddr += len - 1;
+		*(*endaddr) = '\0';
+		(*endaddr)++;
+		type4->socket = nstrings + 1;
+		curaddr = *endaddr;
+	}
+
+	return (0);
+}
+
+static int
+smbios_type16_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type16 *type16;
+
+	type16_handle = *n;
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type16 = (struct smbios_table_type16 *)curaddr;
+	type16->xsize = guest_lomem + guest_himem;
+	type16->ndevs = guest_himem > 0 ? 2 : 1;
+
+	return (0);
+}
+
+static int
+smbios_type17_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type17 *type17;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type17 = (struct smbios_table_type17 *)curaddr;
+	type17->arrayhand = type16_handle;
+	type17->xsize = guest_lomem;
+
+	if (guest_himem > 0) {
+		curaddr = *endaddr;
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type17 = (struct smbios_table_type17 *)curaddr;
+		type17->arrayhand = type16_handle;
+		type17->xsize = guest_himem;
+	}
+
+	return (0);
+}
+
+static int
+smbios_type19_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type19 *type19;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type19 = (struct smbios_table_type19 *)curaddr;
+	type19->arrayhand = type16_handle;
+	type19->xsaddr = 0;
+	type19->xeaddr = guest_lomem;
+
+	if (guest_himem > 0) {
+		curaddr = *endaddr;
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type19 = (struct smbios_table_type19 *)curaddr;
+		type19->arrayhand = type16_handle;
+		type19->xsaddr = 4*GB;
+		type19->xeaddr = guest_himem;
+	}
+
+	return (0);
+}
+
+static void
+smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr)
+{
+	memset(smbios_ep, 0, sizeof(*smbios_ep));
+	memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR,
+	    SMBIOS_ENTRY_EANCHORLEN);
+	smbios_ep->eplen = 0x1F;
+	assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen);
+	smbios_ep->major = 2;
+	smbios_ep->minor = 6;
+	smbios_ep->revision = 0;
+	memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR,
+	    SMBIOS_ENTRY_IANCHORLEN);
+	smbios_ep->staddr = staddr;
+	smbios_ep->bcdrev = 0x24;
+}
+
+static void
+smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len,
+    uint16_t num, uint16_t maxssize)
+{
+	uint8_t	checksum;
+	int	i;
+
+	smbios_ep->maxssize = maxssize;
+	smbios_ep->stlen = len;
+	smbios_ep->stnum = num;
+
+	checksum = 0;
+	for (i = 0x10; i < 0x1f; i++) {
+		checksum -= ((uint8_t *)smbios_ep)[i];
+	}
+	smbios_ep->ichecksum = checksum;
+
+	checksum = 0;
+	for (i = 0; i < 0x1f; i++) {
+		checksum -= ((uint8_t *)smbios_ep)[i];
+	}
+	smbios_ep->echecksum = checksum;
+}
+
+int
+smbios_build(struct vmctx *ctx)
+{
+	struct smbios_entry_point	*smbios_ep;
+	uint16_t			n;
+	uint16_t			maxssize;
+	char				*curaddr, *startaddr, *ststartaddr;
+	int				i;
+	int				err;
+
+	guest_lomem = vm_get_lowmem_size(ctx);
+	guest_himem = vm_get_highmem_size(ctx);
+
+	startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH);
+	if (startaddr == NULL) {
+		fprintf(stderr, "smbios table requires mapped mem\n");
+		return (ENOMEM);
+	}
+
+	curaddr = startaddr;
+
+	smbios_ep = (struct smbios_entry_point *)curaddr;
+	smbios_ep_initializer(smbios_ep, SMBIOS_BASE +
+	    sizeof(struct smbios_entry_point));
+	curaddr += sizeof(struct smbios_entry_point);
+	ststartaddr = curaddr;
+
+	n = 0;
+	maxssize = 0;
+	for (i = 0; smbios_template[i].entry != NULL; i++) {
+		struct smbios_structure	*entry;
+		const char		**strings;
+		initializer_func_t      initializer;
+		char			*endaddr;
+		uint16_t		size;
+
+		entry = smbios_template[i].entry;
+		strings = smbios_template[i].strings;
+		initializer = smbios_template[i].initializer;
+
+		err = (*initializer)(entry, strings, curaddr, &endaddr,
+		    &n, &size);
+		if (err != 0)
+			return (err);
+
+		if (size > maxssize)
+			maxssize = size;
+
+		curaddr = endaddr;
+	}
+
+	assert(curaddr - startaddr < SMBIOS_MAX_LENGTH);
+	smbios_ep_finalizer(smbios_ep, curaddr - ststartaddr, n, maxssize);
+
+	return (0);
+}


Property changes on: trunk/usr.sbin/bhyve/smbiostbl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/smbiostbl.h
===================================================================
--- trunk/usr.sbin/bhyve/smbiostbl.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/smbiostbl.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,37 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale at pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/smbiostbl.h 267450 2014-06-13 21:30:40Z jhb $
+ */
+
+#ifndef _SMBIOSTBL_H_
+#define _SMBIOSTBL_H_
+
+struct vmctx;
+
+int	smbios_build(struct vmctx *ctx);
+
+#endif /* _SMBIOSTBL_H_ */


Property changes on: trunk/usr.sbin/bhyve/smbiostbl.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/spinup_ap.c
===================================================================
--- trunk/usr.sbin/bhyve/spinup_ap.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/spinup_ap.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,105 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/spinup_ap.c 268894 2014-07-19 22:24:29Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/spinup_ap.c 268894 2014-07-19 22:24:29Z jhb $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "bhyverun.h"
+#include "spinup_ap.h"
+
+static void
+spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip)
+{
+	int vector, error;
+	uint16_t cs;
+	uint64_t desc_base;
+	uint32_t desc_limit, desc_access;
+
+	vector = *rip >> PAGE_SHIFT;
+	*rip = 0;
+
+	/*
+	 * Update the %cs and %rip of the guest so that it starts
+	 * executing real mode code at at 'vector << 12'.
+	 */
+	error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
+	assert(error == 0);
+
+	error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
+			    &desc_limit, &desc_access);
+	assert(error == 0);
+
+	desc_base = vector << PAGE_SHIFT;
+	error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
+			    desc_base, desc_limit, desc_access);
+	assert(error == 0);
+
+	cs = (vector << PAGE_SHIFT) >> 4;
+	error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
+	assert(error == 0);
+}
+
+int
+spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
+{
+	int error;
+
+	assert(newcpu != 0);
+	assert(newcpu < guest_ncpus);
+
+	error = vcpu_reset(ctx, newcpu);
+	assert(error == 0);
+
+	fbsdrun_set_capabilities(ctx, newcpu);
+
+	/*
+	 * Enable the 'unrestricted guest' mode for 'newcpu'.
+	 *
+	 * Set up the processor state in power-on 16-bit mode, with the CS:IP
+	 * init'd to the specified low-mem 4K page.
+	 */
+	error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+	assert(error == 0);
+
+	spinup_ap_realmode(ctx, newcpu, &rip);
+
+	fbsdrun_addcpu(ctx, vcpu, newcpu, rip);
+
+	return (newcpu);
+}


Property changes on: trunk/usr.sbin/bhyve/spinup_ap.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/spinup_ap.h
===================================================================
--- trunk/usr.sbin/bhyve/spinup_ap.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/spinup_ap.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,35 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/spinup_ap.h 240912 2012-09-25 02:33:25Z neel $
+ */
+
+#ifndef	_SPINUP_AP_H_
+#define	_SPINUP_AP_H_
+
+int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/spinup_ap.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/task_switch.c
===================================================================
--- trunk/usr.sbin/bhyve/task_switch.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/task_switch.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,940 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/task_switch.c 302705 2016-07-13 06:09:34Z ngie $");
+
+#include <sys/param.h>
+#include <sys/_iovec.h>
+#include <sys/mman.h>
+
+#include <x86/psl.h>
+#include <x86/segments.h>
+#include <x86/specialreg.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+
+/*
+ * Using 'struct i386tss' is tempting but causes myriad sign extension
+ * issues because all of its fields are defined as signed integers.
+ */
+struct tss32 {
+	uint16_t	tss_link;
+	uint16_t	rsvd1;
+	uint32_t	tss_esp0;
+	uint16_t	tss_ss0;
+	uint16_t	rsvd2;
+	uint32_t	tss_esp1;
+	uint16_t	tss_ss1;
+	uint16_t	rsvd3;
+	uint32_t	tss_esp2;
+	uint16_t	tss_ss2;
+	uint16_t	rsvd4;
+	uint32_t	tss_cr3;
+	uint32_t	tss_eip;
+	uint32_t	tss_eflags;
+	uint32_t	tss_eax;
+	uint32_t	tss_ecx;
+	uint32_t	tss_edx;
+	uint32_t	tss_ebx;
+	uint32_t	tss_esp;
+	uint32_t	tss_ebp;
+	uint32_t	tss_esi;
+	uint32_t	tss_edi;
+	uint16_t	tss_es;
+	uint16_t	rsvd5;
+	uint16_t	tss_cs;
+	uint16_t	rsvd6;
+	uint16_t	tss_ss;
+	uint16_t	rsvd7;
+	uint16_t	tss_ds;
+	uint16_t	rsvd8;
+	uint16_t	tss_fs;
+	uint16_t	rsvd9;
+	uint16_t	tss_gs;
+	uint16_t	rsvd10;
+	uint16_t	tss_ldt;
+	uint16_t	rsvd11;
+	uint16_t	tss_trap;
+	uint16_t	tss_iomap;
+};
+static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
+
+#define	SEL_START(sel)	(((sel) & ~0x7))
+#define	SEL_LIMIT(sel)	(((sel) | 0x7))
+#define	TSS_BUSY(type)	(((type) & 0x2) != 0)
+
+static uint64_t
+GETREG(struct vmctx *ctx, int vcpu, int reg)
+{
+	uint64_t val;
+	int error;
+
+	error = vm_get_register(ctx, vcpu, reg, &val);
+	assert(error == 0);
+	return (val);
+}
+
+static void
+SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
+{
+	int error;
+
+	error = vm_set_register(ctx, vcpu, reg, val);
+	assert(error == 0);
+}
+
+static struct seg_desc
+usd_to_seg_desc(struct user_segment_descriptor *usd)
+{
+	struct seg_desc seg_desc;
+
+	seg_desc.base = (u_int)USD_GETBASE(usd);
+	if (usd->sd_gran)
+		seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
+	else
+		seg_desc.limit = (u_int)USD_GETLIMIT(usd);
+	seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
+	seg_desc.access |= usd->sd_xx << 12;
+	seg_desc.access |= usd->sd_def32 << 14;
+	seg_desc.access |= usd->sd_gran << 15;
+
+	return (seg_desc);
+}
+
+/*
+ * Inject an exception with an error code that is a segment selector.
+ * The format of the error code is described in section 6.13, "Error Code",
+ * Intel SDM volume 3.
+ *
+ * Bit 0 (EXT) denotes whether the exception occurred during delivery
+ * of an external event like an interrupt.
+ *
+ * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
+ * in the IDT.
+ *
+ * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
+ */
+static void
+sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext)
+{
+	/*
+	 * Bit 2 from the selector is retained as-is in the error code.
+	 *
+	 * Bit 1 can be safely cleared because none of the selectors
+	 * encountered during task switch emulation refer to a task
+	 * gate in the IDT.
+	 *
+	 * Bit 0 is set depending on the value of 'ext'.
+	 */
+	sel &= ~0x3;
+	if (ext)
+		sel |= 0x1;
+	vm_inject_fault(ctx, vcpu, vector, 1, sel);
+}
+
+/*
+ * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
+ * and non-zero otherwise.
+ */
+static int
+desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
+{
+	uint64_t base;
+	uint32_t limit, access;
+	int error, reg;
+
+	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
+	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
+	assert(error == 0);
+
+	if (reg == VM_REG_GUEST_LDTR) {
+		if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
+			return (-1);
+	}
+
+	if (limit < SEL_LIMIT(sel))
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
+ * by the selector 'sel'.
+ *
+ * Returns 0 on success.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ */
+static int
+desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint16_t sel, struct user_segment_descriptor *desc, bool doread,
+    int *faultptr)
+{
+	struct iovec iov[2];
+	uint64_t base;
+	uint32_t limit, access;
+	int error, reg;
+
+	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
+	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
+	assert(error == 0);
+	assert(limit >= SEL_LIMIT(sel));
+
+	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
+	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	if (doread)
+		vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
+	else
+		vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
+	return (0);
+}
+
+static int
+desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
+{
+	return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
+}
+
+static int
+desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
+{
+	return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
+}
+
+/*
+ * Read the TSS descriptor referenced by 'sel' into 'desc'.
+ *
+ * Returns 0 on success.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ */
+static int
+read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
+{
+	struct vm_guest_paging sup_paging;
+	int error;
+
+	assert(!ISLDT(sel));
+	assert(IDXSEL(sel) != 0);
+
+	/* Fetch the new TSS descriptor */
+	if (desc_table_limit_check(ctx, vcpu, sel)) {
+		if (ts->reason == TSR_IRET)
+			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+		else
+			sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
+		return (1);
+	}
+
+	sup_paging = ts->paging;
+	sup_paging.cpl = 0;		/* implicit supervisor mode */
+	error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
+	return (error);
+}
+
+static bool
+code_desc(int sd_type)
+{
+	/* code descriptor */
+	return ((sd_type & 0x18) == 0x18);
+}
+
+static bool
+stack_desc(int sd_type)
+{
+	/* writable data descriptor */
+	return ((sd_type & 0x1A) == 0x12);
+}
+
+static bool
+data_desc(int sd_type)
+{
+	/* data descriptor or a readable code descriptor */
+	return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
+}
+
+static bool
+ldt_desc(int sd_type)
+{
+
+	return (sd_type == SDT_SYSLDT);
+}
+
+/*
+ * Validate the descriptor 'seg_desc' associated with 'segment'.
+ */
+static int
+validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
+    int segment, struct seg_desc *seg_desc, int *faultptr)
+{
+	struct vm_guest_paging sup_paging;
+	struct user_segment_descriptor usd;
+	int error, idtvec;
+	int cpl, dpl, rpl;
+	uint16_t sel, cs;
+	bool ldtseg, codeseg, stackseg, dataseg, conforming;
+
+	ldtseg = codeseg = stackseg = dataseg = false;
+	switch (segment) {
+	case VM_REG_GUEST_LDTR:
+		ldtseg = true;
+		break;
+	case VM_REG_GUEST_CS:
+		codeseg = true;
+		break;
+	case VM_REG_GUEST_SS:
+		stackseg = true;
+		break;
+	case VM_REG_GUEST_DS:
+	case VM_REG_GUEST_ES:
+	case VM_REG_GUEST_FS:
+	case VM_REG_GUEST_GS:
+		dataseg = true;
+		break;
+	default:
+		assert(0);
+	}
+
+	/* Get the segment selector */
+	sel = GETREG(ctx, vcpu, segment);
+
+	/* LDT selector must point into the GDT */
+	if (ldtseg && ISLDT(sel)) {
+		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+		return (1);
+	}
+
+	/* Descriptor table limit check */
+	if (desc_table_limit_check(ctx, vcpu, sel)) {
+		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+		return (1);
+	}
+
+	/* NULL selector */
+	if (IDXSEL(sel) == 0) {
+		/* Code and stack segment selectors cannot be NULL */
+		if (codeseg || stackseg) {
+			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+			return (1);
+		}
+		seg_desc->base = 0;
+		seg_desc->limit = 0;
+		seg_desc->access = 0x10000;	/* unusable */
+		return (0);
+	}
+
+	/* Read the descriptor from the GDT/LDT */
+	sup_paging = ts->paging;
+	sup_paging.cpl = 0;	/* implicit supervisor mode */
+	error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	/* Verify that the descriptor type is compatible with the segment */
+	if ((ldtseg && !ldt_desc(usd.sd_type)) ||
+	    (codeseg && !code_desc(usd.sd_type)) ||
+	    (dataseg && !data_desc(usd.sd_type)) ||
+	    (stackseg && !stack_desc(usd.sd_type))) {
+		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+		return (1);
+	}
+
+	/* Segment must be marked present */
+	if (!usd.sd_p) {
+		if (ldtseg)
+			idtvec = IDT_TS;
+		else if (stackseg)
+			idtvec = IDT_SS;
+		else
+			idtvec = IDT_NP;
+		sel_exception(ctx, vcpu, idtvec, sel, ts->ext);
+		return (1);
+	}
+
+	cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
+	cpl = cs & SEL_RPL_MASK;
+	rpl = sel & SEL_RPL_MASK;
+	dpl = usd.sd_dpl;
+
+	if (stackseg && (rpl != cpl || dpl != cpl)) {
+		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+		return (1);
+	}
+
+	if (codeseg) {
+		conforming = (usd.sd_type & 0x4) ? true : false;
+		if ((conforming && (cpl < dpl)) ||
+		    (!conforming && (cpl != dpl))) {
+			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+			return (1);
+		}
+	}
+
+	if (dataseg) {
+		/*
+		 * A data segment is always non-conforming except when it's
+		 * descriptor is a readable, conforming code segment.
+		 */
+		if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
+			conforming = true;
+		else
+			conforming = false;
+
+		if (!conforming && (rpl > dpl || cpl > dpl)) {
+			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
+			return (1);
+		}
+	}
+	*seg_desc = usd_to_seg_desc(&usd);
+	return (0);
+}
+
+static void
+tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch,
+    uint32_t eip, struct tss32 *tss, struct iovec *iov)
+{
+
+	/* General purpose registers */
+	tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX);
+	tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX);
+	tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX);
+	tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX);
+	tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
+	tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP);
+	tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI);
+	tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI);
+
+	/* Segment selectors */
+	tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES);
+	tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
+	tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
+	tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS);
+	tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS);
+	tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS);
+
+	/* eflags and eip */
+	tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
+	if (task_switch->reason == TSR_IRET)
+		tss->tss_eflags &= ~PSL_NT;
+	tss->tss_eip = eip;
+
+	/* Copy updated old TSS into guest memory */
+	vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32));
+}
+
+static void
+update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
+{
+	int error;
+
+	error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
+	assert(error == 0);
+}
+
+/*
+ * Update the vcpu registers to reflect the state of the new task.
+ */
+static int
+tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
+    uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
+{
+	struct seg_desc seg_desc, seg_desc2;
+	uint64_t *pdpte, maxphyaddr, reserved;
+	uint32_t eflags;
+	int error, i;
+	bool nested;
+
+	nested = false;
+	if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
+		tss->tss_link = ot_sel;
+		nested = true;
+	}
+
+	eflags = tss->tss_eflags;
+	if (nested)
+		eflags |= PSL_NT;
+
+	/* LDTR */
+	SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
+
+	/* PBDR */
+	if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
+		if (ts->paging.paging_mode == PAGING_MODE_PAE) {
+			/*
+			 * XXX Assuming 36-bit MAXPHYADDR.
+			 */
+			maxphyaddr = (1UL << 36) - 1;
+			pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
+			for (i = 0; i < 4; i++) {
+				/* Check reserved bits if the PDPTE is valid */
+				if (!(pdpte[i] & 0x1))
+					continue;
+				/*
+				 * Bits 2:1, 8:5 and bits above the processor's
+				 * maximum physical address are reserved.
+				 */
+				reserved = ~maxphyaddr | 0x1E6;
+				if (pdpte[i] & reserved) {
+					vm_inject_gp(ctx, vcpu);
+					return (1);
+				}
+			}
+			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
+			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
+			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
+			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
+		}
+		SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
+		ts->paging.cr3 = tss->tss_cr3;
+	}
+
+	/* eflags and eip */
+	SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
+
+	/* General purpose registers */
+	SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
+
+	/* Segment selectors */
+	SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es);
+	SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs);
+	SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss);
+	SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds);
+	SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs);
+	SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs);
+
+	/*
+	 * If this is a nested task then write out the new TSS to update
+	 * the previous link field.
+	 */
+	if (nested)
+		vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
+
+	/* Validate segment descriptors */
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
+
+	/*
+	 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
+	 *
+	 * The SS and CS attribute checks on VM-entry are inter-dependent so
+	 * we need to make sure that both segments are valid before updating
+	 * either of them. This ensures that the VMCS state can pass the
+	 * VM-entry checks so the guest can handle any exception injected
+	 * during task switch emulation.
+	 */
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
+	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
+
+	return (0);
+}
+
+/*
+ * Push an error code on the stack of the new task. This is needed if the
+ * task switch was triggered by a hardware exception that causes an error
+ * code to be saved (e.g. #PF).
+ */
+static int
+push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    int task_type, uint32_t errcode, int *faultptr)
+{
+	struct iovec iov[2];
+	struct seg_desc seg_desc;
+	int stacksize, bytes, error;
+	uint64_t gla, cr0, rflags;
+	uint32_t esp;
+	uint16_t stacksel;
+
+	*faultptr = 0;
+
+	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
+	rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
+	stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
+
+	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
+	    &seg_desc.limit, &seg_desc.access);
+	assert(error == 0);
+
+	/*
+	 * Section "Error Code" in the Intel SDM vol 3: the error code is
+	 * pushed on the stack as a doubleword or word (depending on the
+	 * default interrupt, trap or task gate size).
+	 */
+	if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
+		bytes = 4;
+	else
+		bytes = 2;
+
+	/*
+	 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
+	 * stack-segment descriptor determines the size of the stack
+	 * pointer outside of 64-bit mode.
+	 */
+	if (SEG_DESC_DEF32(seg_desc.access))
+		stacksize = 4;
+	else
+		stacksize = 2;
+
+	esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
+	esp -= bytes;
+
+	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
+	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
+		sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
+		*faultptr = 1;
+		return (0);
+	}
+
+	if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
+		vm_inject_ac(ctx, vcpu, 1);
+		*faultptr = 1;
+		return (0);
+	}
+
+	error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
+	    iov, nitems(iov), faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	vm_copyout(ctx, vcpu, &errcode, iov, bytes);
+	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
+	return (0);
+}
+
+/*
+ * Evaluate return value from helper functions and potentially return to
+ * the VM run loop.
+ */
+#define	CHKERR(error,fault)						\
+	do {								\
+		assert((error == 0) || (error == EFAULT));		\
+		if (error)						\
+			return (VMEXIT_ABORT);				\
+		else if (fault)						\
+			return (VMEXIT_CONTINUE);			\
+	} while (0)
+
+int
+vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	struct seg_desc nt;
+	struct tss32 oldtss, newtss;
+	struct vm_task_switch *task_switch;
+	struct vm_guest_paging *paging, sup_paging;
+	struct user_segment_descriptor nt_desc, ot_desc;
+	struct iovec nt_iov[2], ot_iov[2];
+	uint64_t cr0, ot_base;
+	uint32_t eip, ot_lim, access;
+	int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
+	enum task_switch_reason reason;
+	uint16_t nt_sel, ot_sel;
+
+	task_switch = &vmexit->u.task_switch;
+	nt_sel = task_switch->tsssel;
+	ext = vmexit->u.task_switch.ext;
+	reason = vmexit->u.task_switch.reason;
+	paging = &vmexit->u.task_switch.paging;
+	vcpu = *pvcpu;
+
+	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
+
+	/*
+	 * Calculate the instruction pointer to store in the old TSS.
+	 */
+	eip = vmexit->rip + vmexit->inst_length;
+
+	/*
+	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
+	 * The following page table accesses are implicitly supervisor mode:
+	 * - accesses to GDT or LDT to load segment descriptors
+	 * - accesses to the task state segment during task switch
+	 */
+	sup_paging = *paging;
+	sup_paging.cpl = 0;	/* implicit supervisor mode */
+
+	/* Fetch the new TSS descriptor */
+	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
+	    &fault);
+	CHKERR(error, fault);
+
+	nt = usd_to_seg_desc(&nt_desc);
+
+	/* Verify the type of the new TSS */
+	nt_type = SEG_DESC_TYPE(nt.access);
+	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
+	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
+		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
+		goto done;
+	}
+
+	/* TSS descriptor must have present bit set */
+	if (!SEG_DESC_PRESENT(nt.access)) {
+		sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
+		goto done;
+	}
+
+	/*
+	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
+	 * 44 bytes for a 16-bit TSS.
+	 */
+	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
+		minlimit = 104 - 1;
+	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
+		minlimit = 44 - 1;
+	else
+		minlimit = 0;
+
+	assert(minlimit > 0);
+	if (nt.limit < minlimit) {
+		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
+		goto done;
+	}
+
+	/* TSS must be busy if task switch is due to IRET */
+	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
+		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
+		goto done;
+	}
+
+	/*
+	 * TSS must be available (not busy) if task switch reason is
+	 * CALL, JMP, exception or interrupt.
+	 */
+	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
+		sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
+		goto done;
+	}
+
+	/* Fetch the new TSS */
+	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
+	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
+	CHKERR(error, fault);
+	vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
+
+	/* Get the old TSS selector from the guest's task register */
+	ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
+	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
+		/*
+		 * This might happen if a task switch was attempted without
+		 * ever loading the task register with LTR. In this case the
+		 * TR would contain the values from power-on:
+		 * (sel = 0, base = 0, limit = 0xffff).
+		 */
+		sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
+		goto done;
+	}
+
+	/* Get the old TSS base and limit from the guest's task register */
+	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
+	    &access);
+	assert(error == 0);
+	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
+	ot_type = SEG_DESC_TYPE(access);
+	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
+
+	/* Fetch the old TSS descriptor */
+	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
+	    &fault);
+	CHKERR(error, fault);
+
+	/* Get the old TSS */
+	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
+	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
+	CHKERR(error, fault);
+	vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
+
+	/*
+	 * Clear the busy bit in the old TSS descriptor if the task switch
+	 * due to an IRET or JMP instruction.
+	 */
+	if (reason == TSR_IRET || reason == TSR_JMP) {
+		ot_desc.sd_type &= ~0x2;
+		error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
+		    &ot_desc, &fault);
+		CHKERR(error, fault);
+	}
+
+	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
+		fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
+		return (VMEXIT_ABORT);
+	}
+
+	/* Save processor state in old TSS */
+	tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
+
+	/*
+	 * If the task switch was triggered for any reason other than IRET
+	 * then set the busy bit in the new TSS descriptor.
+	 */
+	if (reason != TSR_IRET) {
+		nt_desc.sd_type |= 0x2;
+		error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
+		    &nt_desc, &fault);
+		CHKERR(error, fault);
+	}
+
+	/* Update task register to point at the new TSS */
+	SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
+
+	/* Update the hidden descriptor state of the task register */
+	nt = usd_to_seg_desc(&nt_desc);
+	update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);
+
+	/* Set CR0.TS */
+	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
+	SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
+
+	/*
+	 * We are now committed to the task switch. Any exceptions encountered
+	 * after this point will be handled in the context of the new task and
+	 * the saved instruction pointer will belong to the new task.
+	 */
+	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
+	assert(error == 0);
+
+	/* Load processor state from new TSS */
+	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
+	    &fault);
+	CHKERR(error, fault);
+
+	/*
+	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
+	 * caused an error code to be generated, this error code is copied
+	 * to the stack of the new task.
+	 */
+	if (task_switch->errcode_valid) {
+		assert(task_switch->ext);
+		assert(task_switch->reason == TSR_IDT_GATE);
+		error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
+		    task_switch->errcode, &fault);
+		CHKERR(error, fault);
+	}
+
+	/*
+	 * Treatment of virtual-NMI blocking if NMI is delivered through
+	 * a task gate.
+	 *
+	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
+	 * If the virtual NMIs VM-execution control is 1, VM entry injects
+	 * an NMI, and delivery of the NMI causes a task switch that causes
+	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
+	 * commences.
+	 *
+	 * Thus, virtual-NMI blocking is in effect at the time of the task
+	 * switch VM exit.
+	 */
+
+	/*
+	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
+	 *
+	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
+	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
+	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
+	 *
+	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
+	 * VM exit.
+	 */
+
+	/*
+	 * If the task switch was triggered by an event delivered through
+	 * the IDT then extinguish the pending event from the vcpu's
+	 * exitintinfo.
+	 */
+	if (task_switch->reason == TSR_IDT_GATE) {
+		error = vm_set_intinfo(ctx, vcpu, 0);
+		assert(error == 0);
+	}
+
+	/*
+	 * XXX should inject debug exception if 'T' bit is 1
+	 */
+done:
+	return (VMEXIT_CONTINUE);
+}


Property changes on: trunk/usr.sbin/bhyve/task_switch.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/uart_emul.c
===================================================================
--- trunk/usr.sbin/bhyve/uart_emul.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/uart_emul.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,675 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2013 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/uart_emul.c 295124 2016-02-01 14:56:11Z grehan $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/uart_emul.c 295124 2016-02-01 14:56:11Z grehan $");
+
+#include <sys/types.h>
+#include <dev/ic/ns16550.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "mevent.h"
+#include "uart_emul.h"
+
+#define	COM1_BASE      	0x3F8
+#define COM1_IRQ	4
+#define	COM2_BASE      	0x2F8
+#define COM2_IRQ	3
+
+#define	DEFAULT_RCLK	1843200
+#define	DEFAULT_BAUD	9600
+
+#define	FCR_RX_MASK	0xC0
+
+#define	MCR_OUT1	0x04
+#define	MCR_OUT2	0x08
+
+#define	MSR_DELTA_MASK	0x0f
+
+#ifndef REG_SCR
+#define REG_SCR		com_scr
+#endif
+
+#define	FIFOSZ	16
+
+static bool uart_stdio;		/* stdio in use for i/o */
+static struct termios tio_stdio_orig;
+
+static struct {
+	int	baseaddr;
+	int	irq;
+	bool	inuse;
+} uart_lres[] = {
+	{ COM1_BASE, COM1_IRQ, false},
+	{ COM2_BASE, COM2_IRQ, false},
+};
+
+#define	UART_NLDEVS	(sizeof(uart_lres) / sizeof(uart_lres[0]))
+
+struct fifo {
+	uint8_t	buf[FIFOSZ];
+	int	rindex;		/* index to read from */
+	int	windex;		/* index to write to */
+	int	num;		/* number of characters in the fifo */
+	int	size;		/* size of the fifo */
+};
+
+struct ttyfd {
+	bool	opened;
+	int	fd;		/* tty device file descriptor */
+	struct termios tio_orig, tio_new;    /* I/O Terminals */
+};
+
+struct uart_softc {
+	pthread_mutex_t mtx;	/* protects all softc elements */
+	uint8_t	data;		/* Data register (R/W) */
+	uint8_t ier;		/* Interrupt enable register (R/W) */
+	uint8_t lcr;		/* Line control register (R/W) */
+	uint8_t mcr;		/* Modem control register (R/W) */
+	uint8_t lsr;		/* Line status register (R/W) */
+	uint8_t msr;		/* Modem status register (R/W) */
+	uint8_t fcr;		/* FIFO control register (W) */
+	uint8_t scr;		/* Scratch register (R/W) */
+
+	uint8_t dll;		/* Baudrate divisor latch LSB */
+	uint8_t dlh;		/* Baudrate divisor latch MSB */
+
+	struct fifo rxfifo;
+	struct mevent *mev;
+
+	struct ttyfd tty;
+	bool	thre_int_pending;	/* THRE interrupt pending */
+
+	void	*arg;
+	uart_intr_func_t intr_assert;
+	uart_intr_func_t intr_deassert;
+};
+
+static void uart_drain(int fd, enum ev_type ev, void *arg);
+
+static void
+ttyclose(void)
+{
+
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig);
+}
+
+static void
+ttyopen(struct ttyfd *tf)
+{
+
+	tcgetattr(tf->fd, &tf->tio_orig);
+
+	tf->tio_new = tf->tio_orig;
+	cfmakeraw(&tf->tio_new);
+	tf->tio_new.c_cflag |= CLOCAL;
+	tcsetattr(tf->fd, TCSANOW, &tf->tio_new);
+
+	if (tf->fd == STDIN_FILENO) {
+		tio_stdio_orig = tf->tio_orig;
+		atexit(ttyclose);
+	}
+}
+
+static int
+ttyread(struct ttyfd *tf)
+{
+	unsigned char rb;
+
+	if (read(tf->fd, &rb, 1) == 1)
+		return (rb);
+	else
+		return (-1);
+}
+
+static void
+ttywrite(struct ttyfd *tf, unsigned char wb)
+{
+
+	(void)write(tf->fd, &wb, 1);
+}
+
+static void
+rxfifo_reset(struct uart_softc *sc, int size)
+{
+	char flushbuf[32];
+	struct fifo *fifo;
+	ssize_t nread;
+	int error;
+
+	fifo = &sc->rxfifo;
+	bzero(fifo, sizeof(struct fifo));
+	fifo->size = size;
+
+	if (sc->tty.opened) {
+		/*
+		 * Flush any unread input from the tty buffer.
+		 */
+		while (1) {
+			nread = read(sc->tty.fd, flushbuf, sizeof(flushbuf));
+			if (nread != sizeof(flushbuf))
+				break;
+		}
+
+		/*
+		 * Enable mevent to trigger when new characters are available
+		 * on the tty fd.
+		 */
+		error = mevent_enable(sc->mev);
+		assert(error == 0);
+	}
+}
+
+static int
+rxfifo_available(struct uart_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->rxfifo;
+	return (fifo->num < fifo->size);
+}
+
+static int
+rxfifo_putchar(struct uart_softc *sc, uint8_t ch)
+{
+	struct fifo *fifo;
+	int error;
+
+	fifo = &sc->rxfifo;
+
+	if (fifo->num < fifo->size) {
+		fifo->buf[fifo->windex] = ch;
+		fifo->windex = (fifo->windex + 1) % fifo->size;
+		fifo->num++;
+		if (!rxfifo_available(sc)) {
+			if (sc->tty.opened) {
+				/*
+				 * Disable mevent callback if the FIFO is full.
+				 */
+				error = mevent_disable(sc->mev);
+				assert(error == 0);
+			}
+		}
+		return (0);
+	} else
+		return (-1);
+}
+
+static int
+rxfifo_getchar(struct uart_softc *sc)
+{
+	struct fifo *fifo;
+	int c, error, wasfull;
+
+	wasfull = 0;
+	fifo = &sc->rxfifo;
+	if (fifo->num > 0) {
+		if (!rxfifo_available(sc))
+			wasfull = 1;
+		c = fifo->buf[fifo->rindex];
+		fifo->rindex = (fifo->rindex + 1) % fifo->size;
+		fifo->num--;
+		if (wasfull) {
+			if (sc->tty.opened) {
+				error = mevent_enable(sc->mev);
+				assert(error == 0);
+			}
+		}
+		return (c);
+	} else
+		return (-1);
+}
+
+static int
+rxfifo_numchars(struct uart_softc *sc)
+{
+	struct fifo *fifo = &sc->rxfifo;
+
+	return (fifo->num);
+}
+
+static void
+uart_opentty(struct uart_softc *sc)
+{
+
+	ttyopen(&sc->tty);
+	sc->mev = mevent_add(sc->tty.fd, EVF_READ, uart_drain, sc);
+	assert(sc->mev != NULL);
+}
+
+static uint8_t
+modem_status(uint8_t mcr)
+{
+	uint8_t msr;
+
+	if (mcr & MCR_LOOPBACK) {
+		/*
+		 * In the loopback mode certain bits from the MCR are
+		 * reflected back into MSR.
+		 */
+		msr = 0;
+		if (mcr & MCR_RTS)
+			msr |= MSR_CTS;
+		if (mcr & MCR_DTR)
+			msr |= MSR_DSR;
+		if (mcr & MCR_OUT1)
+			msr |= MSR_RI;
+		if (mcr & MCR_OUT2)
+			msr |= MSR_DCD;
+	} else {
+		/*
+		 * Always assert DCD and DSR so tty open doesn't block
+		 * even if CLOCAL is turned off.
+		 */
+		msr = MSR_DCD | MSR_DSR;
+	}
+	assert((msr & MSR_DELTA_MASK) == 0);
+
+	return (msr);
+}
+
+/*
+ * The IIR returns a prioritized interrupt reason:
+ * - receive data available
+ * - transmit holding register empty
+ * - modem status change
+ *
+ * Return an interrupt reason if one is available.
+ */
+static int
+uart_intr_reason(struct uart_softc *sc)
+{
+
+	if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0)
+		return (IIR_RLS);
+	else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0)
+		return (IIR_RXTOUT);
+	else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0)
+		return (IIR_TXRDY);
+	else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0)
+		return (IIR_MLSC);
+	else
+		return (IIR_NOPEND);
+}
+
+static void
+uart_reset(struct uart_softc *sc)
+{
+	uint16_t divisor;
+
+	divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
+	sc->dll = divisor;
+	sc->dlh = divisor >> 16;
+	sc->msr = modem_status(sc->mcr);
+
+	rxfifo_reset(sc, 1);	/* no fifo until enabled by software */
+}
+
+/*
+ * Toggle the COM port's intr pin depending on whether or not we have an
+ * interrupt condition to report to the processor.
+ */
+static void
+uart_toggle_intr(struct uart_softc *sc)
+{
+	uint8_t intr_reason;
+
+	intr_reason = uart_intr_reason(sc);
+
+	if (intr_reason == IIR_NOPEND)
+		(*sc->intr_deassert)(sc->arg);
+	else
+		(*sc->intr_assert)(sc->arg);
+}
+
+static void
+uart_drain(int fd, enum ev_type ev, void *arg)
+{
+	struct uart_softc *sc;
+	int ch;
+
+	sc = arg;	
+
+	assert(fd == sc->tty.fd);
+	assert(ev == EVF_READ);
+	
+	/*
+	 * This routine is called in the context of the mevent thread
+	 * to take out the softc lock to protect against concurrent
+	 * access from a vCPU i/o exit
+	 */
+	pthread_mutex_lock(&sc->mtx);
+
+	if ((sc->mcr & MCR_LOOPBACK) != 0) {
+		(void) ttyread(&sc->tty);
+	} else {
+		while (rxfifo_available(sc) &&
+		       ((ch = ttyread(&sc->tty)) != -1)) {
+			rxfifo_putchar(sc, ch);
+		}
+		uart_toggle_intr(sc);
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+void
+uart_write(struct uart_softc *sc, int offset, uint8_t value)
+{
+	int fifosz;
+	uint8_t msr;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Take care of the special case DLAB accesses first
+	 */
+	if ((sc->lcr & LCR_DLAB) != 0) {
+		if (offset == REG_DLL) {
+			sc->dll = value;
+			goto done;
+		}
+		
+		if (offset == REG_DLH) {
+			sc->dlh = value;
+			goto done;
+		}
+	}
+
+        switch (offset) {
+	case REG_DATA:
+		if (sc->mcr & MCR_LOOPBACK) {
+			if (rxfifo_putchar(sc, value) != 0)
+				sc->lsr |= LSR_OE;
+		} else if (sc->tty.opened) {
+			ttywrite(&sc->tty, value);
+		} /* else drop on floor */
+		sc->thre_int_pending = true;
+		break;
+	case REG_IER:
+		/*
+		 * Apply mask so that bits 4-7 are 0
+		 * Also enables bits 0-3 only if they're 1
+		 */
+		sc->ier = value & 0x0F;
+		break;
+		case REG_FCR:
+			/*
+			 * When moving from FIFO and 16450 mode and vice versa,
+			 * the FIFO contents are reset.
+			 */
+			if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) {
+				fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1;
+				rxfifo_reset(sc, fifosz);
+			}
+
+			/*
+			 * The FCR_ENABLE bit must be '1' for the programming
+			 * of other FCR bits to be effective.
+			 */
+			if ((value & FCR_ENABLE) == 0) {
+				sc->fcr = 0;
+			} else {
+				if ((value & FCR_RCV_RST) != 0)
+					rxfifo_reset(sc, FIFOSZ);
+
+				sc->fcr = value &
+					 (FCR_ENABLE | FCR_DMA | FCR_RX_MASK);
+			}
+			break;
+		case REG_LCR:
+			sc->lcr = value;
+			break;
+		case REG_MCR:
+			/* Apply mask so that bits 5-7 are 0 */
+			sc->mcr = value & 0x1F;
+			msr = modem_status(sc->mcr);
+
+			/*
+			 * Detect if there has been any change between the
+			 * previous and the new value of MSR. If there is
+			 * then assert the appropriate MSR delta bit.
+			 */
+			if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS))
+				sc->msr |= MSR_DCTS;
+			if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR))
+				sc->msr |= MSR_DDSR;
+			if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD))
+				sc->msr |= MSR_DDCD;
+			if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0)
+				sc->msr |= MSR_TERI;
+
+			/*
+			 * Update the value of MSR while retaining the delta
+			 * bits.
+			 */
+			sc->msr &= MSR_DELTA_MASK;
+			sc->msr |= msr;
+			break;
+		case REG_LSR:
+			/*
+			 * Line status register is not meant to be written to
+			 * during normal operation.
+			 */
+			break;
+		case REG_MSR:
+			/*
+			 * As far as I can tell MSR is a read-only register.
+			 */
+			break;
+		case REG_SCR:
+			sc->scr = value;
+			break;
+		default:
+			break;
+	}
+
+done:
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+uint8_t
+uart_read(struct uart_softc *sc, int offset)
+{
+	uint8_t iir, intr_reason, reg;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Take care of the special case DLAB accesses first
+	 */
+	if ((sc->lcr & LCR_DLAB) != 0) {
+		if (offset == REG_DLL) {
+			reg = sc->dll;
+			goto done;
+		}
+		
+		if (offset == REG_DLH) {
+			reg = sc->dlh;
+			goto done;
+		}
+	}
+
+	switch (offset) {
+	case REG_DATA:
+		reg = rxfifo_getchar(sc);
+		break;
+	case REG_IER:
+		reg = sc->ier;
+		break;
+	case REG_IIR:
+		iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0;
+
+		intr_reason = uart_intr_reason(sc);
+			
+		/*
+		 * Deal with side effects of reading the IIR register
+		 */
+		if (intr_reason == IIR_TXRDY)
+			sc->thre_int_pending = false;
+
+		iir |= intr_reason;
+
+		reg = iir;
+		break;
+	case REG_LCR:
+		reg = sc->lcr;
+		break;
+	case REG_MCR:
+		reg = sc->mcr;
+		break;
+	case REG_LSR:
+		/* Transmitter is always ready for more data */
+		sc->lsr |= LSR_TEMT | LSR_THRE;
+
+		/* Check for new receive data */
+		if (rxfifo_numchars(sc) > 0)
+			sc->lsr |= LSR_RXRDY;
+		else
+			sc->lsr &= ~LSR_RXRDY;
+
+		reg = sc->lsr;
+
+		/* The LSR_OE bit is cleared on LSR read */
+		sc->lsr &= ~LSR_OE;
+		break;
+	case REG_MSR:
+		/*
+		 * MSR delta bits are cleared on read
+		 */
+		reg = sc->msr;
+		sc->msr &= ~MSR_DELTA_MASK;
+		break;
+	case REG_SCR:
+		reg = sc->scr;
+		break;
+	default:
+		reg = 0xFF;
+		break;
+	}
+
+done:
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (reg);
+}
+
+int
+uart_legacy_alloc(int which, int *baseaddr, int *irq)
+{
+
+	if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse)
+		return (-1);
+
+	uart_lres[which].inuse = true;
+	*baseaddr = uart_lres[which].baseaddr;
+	*irq = uart_lres[which].irq;
+
+	return (0);
+}
+
+struct uart_softc *
+uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert,
+    void *arg)
+{
+	struct uart_softc *sc;
+
+	sc = calloc(1, sizeof(struct uart_softc));
+
+	sc->arg = arg;
+	sc->intr_assert = intr_assert;
+	sc->intr_deassert = intr_deassert;
+
+	pthread_mutex_init(&sc->mtx, NULL);
+
+	uart_reset(sc);
+
+	return (sc);
+}
+
+static int
+uart_tty_backend(struct uart_softc *sc, const char *opts)
+{
+	int fd;
+	int retval;
+
+	retval = -1;
+
+	fd = open(opts, O_RDWR | O_NONBLOCK);
+	if (fd > 0 && isatty(fd)) {
+		sc->tty.fd = fd;
+		sc->tty.opened = true;
+		retval = 0;
+	}
+	    
+	return (retval);
+}
+
+int
+uart_set_backend(struct uart_softc *sc, const char *opts)
+{
+	int retval;
+
+	retval = -1;
+
+	if (opts == NULL)
+		return (0);
+
+	if (strcmp("stdio", opts) == 0) {
+		if (!uart_stdio) {
+			sc->tty.fd = STDIN_FILENO;
+			sc->tty.opened = true;
+			uart_stdio = true;
+			retval = 0;
+		}
+	} else if (uart_tty_backend(sc, opts) == 0) {
+		retval = 0;
+	}
+
+	/* Make the backend file descriptor non-blocking */
+	if (retval == 0)
+		retval = fcntl(sc->tty.fd, F_SETFL, O_NONBLOCK);
+
+	if (retval == 0)
+		uart_opentty(sc);
+
+	return (retval);
+}


Property changes on: trunk/usr.sbin/bhyve/uart_emul.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/uart_emul.h
===================================================================
--- trunk/usr.sbin/bhyve/uart_emul.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/uart_emul.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,46 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Neel Natu <neel at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/uart_emul.h 257396 2013-10-30 20:42:09Z neel $
+ */
+
+#ifndef _UART_EMUL_H_
+#define	_UART_EMUL_H_
+
+
+#define	UART_IO_BAR_SIZE	8
+
+struct uart_softc;
+
+typedef void (*uart_intr_func_t)(void *arg);
+struct uart_softc *uart_init(uart_intr_func_t intr_assert,
+		uart_intr_func_t intr_deassert, void *arg);
+
+int	uart_legacy_alloc(int unit, int *ioaddr, int *irq);
+uint8_t	uart_read(struct uart_softc *sc, int offset);
+void	uart_write(struct uart_softc *sc, int offset, uint8_t value);
+int	uart_set_backend(struct uart_softc *sc, const char *opt);
+#endif


Property changes on: trunk/usr.sbin/bhyve/uart_emul.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/virtio.c
===================================================================
--- trunk/usr.sbin/bhyve/virtio.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/virtio.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,778 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/virtio.c 284900 2015-06-28 03:22:26Z neel $");
+
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+/*
+ * Functions for dealing with generalized "virtual devices" as
+ * defined by <https://www.google.com/#output=search&q=virtio+spec>
+ */
+
+/*
+ * In case we decide to relax the "virtio softc comes at the
+ * front of virtio-based device softc" constraint, let's use
+ * this to convert.
+ */
+#define DEV_SOFTC(vs) ((void *)(vs))
+
+/*
+ * Link a virtio_softc to its constants, the device softc, and
+ * the PCI emulation.
+ */
+void
+vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+		void *dev_softc, struct pci_devinst *pi,
+		struct vqueue_info *queues)
+{
+	int i;
+
+	/* vs and dev_softc addresses must match */
+	assert((void *)vs == dev_softc);
+	vs->vs_vc = vc;
+	vs->vs_pi = pi;
+	pi->pi_arg = vs;
+
+	vs->vs_queues = queues;
+	for (i = 0; i < vc->vc_nvq; i++) {
+		queues[i].vq_vs = vs;
+		queues[i].vq_num = i;
+	}
+}
+
+/*
+ * Reset device (device-wide).  This erases all queues, i.e.,
+ * all the queues become invalid (though we don't wipe out the
+ * internal pointers, we just clear the VQ_ALLOC flag).
+ *
+ * It resets negotiated features to "none".
+ *
+ * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
+ */
+void
+vi_reset_dev(struct virtio_softc *vs)
+{
+	struct vqueue_info *vq;
+	int i, nvq;
+
+	if (vs->vs_mtx)
+		assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
+	nvq = vs->vs_vc->vc_nvq;
+	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
+		vq->vq_flags = 0;
+		vq->vq_last_avail = 0;
+		vq->vq_save_used = 0;
+		vq->vq_pfn = 0;
+		vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
+	}
+	vs->vs_negotiated_caps = 0;
+	vs->vs_curq = 0;
+	/* vs->vs_status = 0; -- redundant */
+	if (vs->vs_isr)
+		pci_lintr_deassert(vs->vs_pi);
+	vs->vs_isr = 0;
+	vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
+}
+
+/*
+ * Set I/O BAR (usually 0) to map PCI config registers.
+ */
+void
+vi_set_io_bar(struct virtio_softc *vs, int barnum)
+{
+	size_t size;
+
+	/*
+	 * ??? should we use CFG0 if MSI-X is disabled?
+	 * Existing code did not...
+	 */
+	size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize;
+	pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size);
+}
+
+/*
+ * Initialize MSI-X vector capabilities if we're to use MSI-X,
+ * or MSI capabilities if not.
+ *
+ * We assume we want one MSI-X vector per queue, here, plus one
+ * for the config vec.
+ */
+int
+vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
+{
+	int nvec;
+
+	if (use_msix) {
+		vs->vs_flags |= VIRTIO_USE_MSIX;
+		VS_LOCK(vs);
+		vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
+		VS_UNLOCK(vs);
+		nvec = vs->vs_vc->vc_nvq + 1;
+		if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
+			return (1);
+	} else
+		vs->vs_flags &= ~VIRTIO_USE_MSIX;
+
+	/* Only 1 MSI vector for bhyve */
+	pci_emul_add_msicap(vs->vs_pi, 1);
+
+	/* Legacy interrupts are mandatory for virtio devices */
+	pci_lintr_request(vs->vs_pi);
+
+	return (0);
+}
+
+/*
+ * Initialize the currently-selected virtio queue (vs->vs_curq).
+ * The guest just gave us a page frame number, from which we can
+ * calculate the addresses of the queue.
+ */
+void
+vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
+{
+	struct vqueue_info *vq;
+	uint64_t phys;
+	size_t size;
+	char *base;
+
+	vq = &vs->vs_queues[vs->vs_curq];
+	vq->vq_pfn = pfn;
+	phys = (uint64_t)pfn << VRING_PFN;
+	size = vring_size(vq->vq_qsize);
+	base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
+
+	/* First page(s) are descriptors... */
+	vq->vq_desc = (struct virtio_desc *)base;
+	base += vq->vq_qsize * sizeof(struct virtio_desc);
+
+	/* ... immediately followed by "avail" ring (entirely uint16_t's) */
+	vq->vq_avail = (struct vring_avail *)base;
+	base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+
+	/* Then it's rounded up to the next page... */
+	base = (char *)roundup2((uintptr_t)base, VRING_ALIGN);
+
+	/* ... and the last page(s) are the used ring. */
+	vq->vq_used = (struct vring_used *)base;
+
+	/* Mark queue as allocated, and start at 0 when we use it. */
+	vq->vq_flags = VQ_ALLOC;
+	vq->vq_last_avail = 0;
+	vq->vq_save_used = 0;
+}
+
+/*
+ * Helper inline for vq_getchain(): record the i'th "real"
+ * descriptor.
+ */
+static inline void
+_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx,
+	   struct iovec *iov, int n_iov, uint16_t *flags) {
+
+	if (i >= n_iov)
+		return;
+	iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
+	iov[i].iov_len = vd->vd_len;
+	if (flags != NULL)
+		flags[i] = vd->vd_flags;
+}
+#define	VQ_MAX_DESCRIPTORS	512	/* see below */
+
+/*
+ * Examine the chain of descriptors starting at the "next one" to
+ * make sure that they describe a sensible request.  If so, return
+ * the number of "real" descriptors that would be needed/used in
+ * acting on this request.  This may be smaller than the number of
+ * available descriptors, e.g., if there are two available but
+ * they are two separate requests, this just returns 1.  Or, it
+ * may be larger: if there are indirect descriptors involved,
+ * there may only be one descriptor available but it may be an
+ * indirect pointing to eight more.  We return 8 in this case,
+ * i.e., we do not count the indirect descriptors, only the "real"
+ * ones.
+ *
+ * Basically, this vets the vd_flags and vd_next field of each
+ * descriptor and tells you how many are involved.  Since some may
+ * be indirect, this also needs the vmctx (in the pci_devinst
+ * at vs->vs_pi) so that it can find indirect descriptors.
+ *
+ * As we process each descriptor, we copy and adjust it (guest to
+ * host address wise, also using the vmtctx) into the given iov[]
+ * array (of the given size).  If the array overflows, we stop
+ * placing values into the array but keep processing descriptors,
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
+ * So you, the caller, must not assume that iov[] is as big as the
+ * return value (you can process the same thing twice to allocate
+ * a larger iov array if needed, or supply a zero length to find
+ * out how much space is needed).
+ *
+ * If you want to verify the WRITE flag on each descriptor, pass a
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
+ * as n_iov and we'll copy each vd_flags field after unwinding any
+ * indirects.
+ *
+ * If some descriptor(s) are invalid, this prints a diagnostic message
+ * and returns -1.  If no descriptors are ready now it simply returns 0.
+ *
+ * You are assumed to have done a vq_ring_ready() if needed (note
+ * that vq_has_descs() does one).
+ */
+int
+vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+	    struct iovec *iov, int n_iov, uint16_t *flags)
+{
+	int i;
+	u_int ndesc, n_indir;
+	u_int idx, next;
+	volatile struct virtio_desc *vdir, *vindir, *vp;
+	struct vmctx *ctx;
+	struct virtio_softc *vs;
+	const char *name;
+
+	vs = vq->vq_vs;
+	name = vs->vs_vc->vc_name;
+
+	/*
+	 * Note: it's the responsibility of the guest not to
+	 * update vq->vq_avail->va_idx until all of the descriptors
+         * the guest has written are valid (including all their
+         * vd_next fields and vd_flags).
+	 *
+	 * Compute (last_avail - va_idx) in integers mod 2**16.  This is
+	 * the number of descriptors the device has made available
+	 * since the last time we updated vq->vq_last_avail.
+	 *
+	 * We just need to do the subtraction as an unsigned int,
+	 * then trim off excess bits.
+	 */
+	idx = vq->vq_last_avail;
+	ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
+	if (ndesc == 0)
+		return (0);
+	if (ndesc > vq->vq_qsize) {
+		/* XXX need better way to diagnose issues */
+		fprintf(stderr,
+		    "%s: ndesc (%u) out of range, driver confused?\r\n",
+		    name, (u_int)ndesc);
+		return (-1);
+	}
+
+	/*
+	 * Now count/parse "involved" descriptors starting from
+	 * the head of the chain.
+	 *
+	 * To prevent loops, we could be more complicated and
+	 * check whether we're re-visiting a previously visited
+	 * index, but we just abort if the count gets excessive.
+	 */
+	ctx = vs->vs_pi->pi_vmctx;
+	*pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
+	vq->vq_last_avail++;
+	for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
+		if (next >= vq->vq_qsize) {
+			fprintf(stderr,
+			    "%s: descriptor index %u out of range, "
+			    "driver confused?\r\n",
+			    name, next);
+			return (-1);
+		}
+		vdir = &vq->vq_desc[next];
+		if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
+			_vq_record(i, vdir, ctx, iov, n_iov, flags);
+			i++;
+		} else if ((vs->vs_vc->vc_hv_caps &
+		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+			fprintf(stderr,
+			    "%s: descriptor has forbidden INDIRECT flag, "
+			    "driver confused?\r\n",
+			    name);
+			return (-1);
+		} else {
+			n_indir = vdir->vd_len / 16;
+			if ((vdir->vd_len & 0xf) || n_indir == 0) {
+				fprintf(stderr,
+				    "%s: invalid indir len 0x%x, "
+				    "driver confused?\r\n",
+				    name, (u_int)vdir->vd_len);
+				return (-1);
+			}
+			vindir = paddr_guest2host(ctx,
+			    vdir->vd_addr, vdir->vd_len);
+			/*
+			 * Indirects start at the 0th, then follow
+			 * their own embedded "next"s until those run
+			 * out.  Each one's indirect flag must be off
+			 * (we don't really have to check, could just
+			 * ignore errors...).
+			 */
+			next = 0;
+			for (;;) {
+				vp = &vindir[next];
+				if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
+					fprintf(stderr,
+					    "%s: indirect desc has INDIR flag,"
+					    " driver confused?\r\n",
+					    name);
+					return (-1);
+				}
+				_vq_record(i, vp, ctx, iov, n_iov, flags);
+				if (++i > VQ_MAX_DESCRIPTORS)
+					goto loopy;
+				if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
+					break;
+				next = vp->vd_next;
+				if (next >= n_indir) {
+					fprintf(stderr,
+					    "%s: invalid next %u > %u, "
+					    "driver confused?\r\n",
+					    name, (u_int)next, n_indir);
+					return (-1);
+				}
+			}
+		}
+		if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
+			return (i);
+	}
+loopy:
+	fprintf(stderr,
+	    "%s: descriptor loop? count > %d - driver confused?\r\n",
+	    name, i);
+	return (-1);
+}
+
+/*
+ * Return the currently-first request chain back to the available queue.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_retchain(struct vqueue_info *vq)
+{
+
+	vq->vq_last_avail--;
+}
+
+/*
+ * Return specified request chain to the guest, setting its I/O length
+ * to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
+{
+	uint16_t uidx, mask;
+	volatile struct vring_used *vuh;
+	volatile struct virtio_used *vue;
+
+	/*
+	 * Notes:
+	 *  - mask is N-1 where N is a power of 2 so computes x % N
+	 *  - vuh points to the "used" data shared with guest
+	 *  - vue points to the "used" ring entry we want to update
+	 *  - head is the same value we compute in vq_iovecs().
+	 *
+	 * (I apologize for the two fields named vu_idx; the
+	 * virtio spec calls the one that vue points to, "id"...)
+	 */
+	mask = vq->vq_qsize - 1;
+	vuh = vq->vq_used;
+
+	uidx = vuh->vu_idx;
+	vue = &vuh->vu_ring[uidx++ & mask];
+	vue->vu_idx = idx;
+	vue->vu_tlen = iolen;
+	vuh->vu_idx = uidx;
+}
+
+/*
+ * Driver has finished processing "available" chains and calling
+ * vq_relchain on each one.  If driver used all the available
+ * chains, used_all should be set.
+ *
+ * If the "used" index moved we may need to inform the guest, i.e.,
+ * deliver an interrupt.  Even if the used index did NOT move we
+ * may need to deliver an interrupt, if the avail ring is empty and
+ * we are supposed to interrupt on empty.
+ *
+ * Note that used_all_avail is provided by the caller because it's
+ * a snapshot of the ring state when he decided to finish interrupt
+ * processing -- it's possible that descriptors became available after
+ * that point.  (It's also typically a constant 1/True as well.)
+ */
+void
+vq_endchains(struct vqueue_info *vq, int used_all_avail)
+{
+	struct virtio_softc *vs;
+	uint16_t event_idx, new_idx, old_idx;
+	int intr;
+
+	/*
+	 * Interrupt generation: if we're using EVENT_IDX,
+	 * interrupt if we've crossed the event threshold.
+	 * Otherwise interrupt is generated if we added "used" entries,
+	 * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
+	 *
+	 * In any case, though, if NOTIFY_ON_EMPTY is set and the
+	 * entire avail was processed, we need to interrupt always.
+	 */
+	vs = vq->vq_vs;
+	old_idx = vq->vq_save_used;
+	vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
+	if (used_all_avail &&
+	    (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
+		intr = 1;
+	else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
+		event_idx = VQ_USED_EVENT_IDX(vq);
+		/*
+		 * This calculation is per docs and the kernel
+		 * (see src/sys/dev/virtio/virtio_ring.h).
+		 */
+		intr = (uint16_t)(new_idx - event_idx - 1) <
+			(uint16_t)(new_idx - old_idx);
+	} else {
+		intr = new_idx != old_idx &&
+		    !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
+	}
+	if (intr)
+		vq_interrupt(vs, vq);
+}
+
+/* Note: these are in sorted order to make for a fast search */
+static struct config_reg {
+	uint16_t	cr_offset;	/* register offset */
+	uint8_t		cr_size;	/* size (bytes) */
+	uint8_t		cr_ro;		/* true => reg is read only */
+	const char	*cr_name;	/* name of reg */
+} config_regs[] = {
+	{ VTCFG_R_HOSTCAP,	4, 1, "HOSTCAP" },
+	{ VTCFG_R_GUESTCAP,	4, 0, "GUESTCAP" },
+	{ VTCFG_R_PFN,		4, 0, "PFN" },
+	{ VTCFG_R_QNUM,		2, 1, "QNUM" },
+	{ VTCFG_R_QSEL,		2, 0, "QSEL" },
+	{ VTCFG_R_QNOTIFY,	2, 0, "QNOTIFY" },
+	{ VTCFG_R_STATUS,	1, 0, "STATUS" },
+	{ VTCFG_R_ISR,		1, 0, "ISR" },
+	{ VTCFG_R_CFGVEC,	2, 0, "CFGVEC" },
+	{ VTCFG_R_QVEC,		2, 0, "QVEC" },
+};
+
+static inline struct config_reg *
+vi_find_cr(int offset) {
+	u_int hi, lo, mid;
+	struct config_reg *cr;
+
+	lo = 0;
+	hi = sizeof(config_regs) / sizeof(*config_regs) - 1;
+	while (hi >= lo) {
+		mid = (hi + lo) >> 1;
+		cr = &config_regs[mid];
+		if (cr->cr_offset == offset)
+			return (cr);
+		if (cr->cr_offset < offset)
+			lo = mid + 1;
+		else
+			hi = mid - 1;
+	}
+	return (NULL);
+}
+
+/*
+ * Handle pci config space reads.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+uint64_t
+vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	    int baridx, uint64_t offset, int size)
+{
+	struct virtio_softc *vs = pi->pi_arg;
+	struct virtio_consts *vc;
+	struct config_reg *cr;
+	uint64_t virtio_config_size, max;
+	const char *name;
+	uint32_t newoff;
+	uint32_t value;
+	int error;
+
+	if (vs->vs_flags & VIRTIO_USE_MSIX) {
+		if (baridx == pci_msix_table_bar(pi) ||
+		    baridx == pci_msix_pba_bar(pi)) {
+			return (pci_emul_msix_tread(pi, offset, size));
+		}
+	}
+
+	/* XXX probably should do something better than just assert() */
+	assert(baridx == 0);
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+	value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (pci_msix_enabled(pi))
+		virtio_config_size = VTCFG_R_CFG1;
+	else
+		virtio_config_size = VTCFG_R_CFG0;
+
+	if (offset >= virtio_config_size) {
+		/*
+		 * Subtract off the standard size (including MSI-X
+		 * registers if enabled) and dispatch to underlying driver.
+		 * If that fails, fall into general code.
+		 */
+		newoff = offset - virtio_config_size;
+		max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
+		if (newoff + size > max)
+			goto bad;
+		error = (*vc->vc_cfgread)(DEV_SOFTC(vs), newoff, size, &value);
+		if (!error)
+			goto done;
+	}
+
+bad:
+	cr = vi_find_cr(offset);
+	if (cr == NULL || cr->cr_size != size) {
+		if (cr != NULL) {
+			/* offset must be OK, so size must be bad */
+			fprintf(stderr,
+			    "%s: read from %s: bad size %d\r\n",
+			    name, cr->cr_name, size);
+		} else {
+			fprintf(stderr,
+			    "%s: read from bad offset/size %jd/%d\r\n",
+			    name, (uintmax_t)offset, size);
+		}
+		goto done;
+	}
+
+	switch (offset) {
+	case VTCFG_R_HOSTCAP:
+		value = vc->vc_hv_caps;
+		break;
+	case VTCFG_R_GUESTCAP:
+		value = vs->vs_negotiated_caps;
+		break;
+	case VTCFG_R_PFN:
+		if (vs->vs_curq < vc->vc_nvq)
+			value = vs->vs_queues[vs->vs_curq].vq_pfn;
+		break;
+	case VTCFG_R_QNUM:
+		value = vs->vs_curq < vc->vc_nvq ?
+		    vs->vs_queues[vs->vs_curq].vq_qsize : 0;
+		break;
+	case VTCFG_R_QSEL:
+		value = vs->vs_curq;
+		break;
+	case VTCFG_R_QNOTIFY:
+		value = 0;	/* XXX */
+		break;
+	case VTCFG_R_STATUS:
+		value = vs->vs_status;
+		break;
+	case VTCFG_R_ISR:
+		value = vs->vs_isr;
+		vs->vs_isr = 0;		/* a read clears this flag */
+		if (value)
+			pci_lintr_deassert(pi);
+		break;
+	case VTCFG_R_CFGVEC:
+		value = vs->vs_msix_cfg_idx;
+		break;
+	case VTCFG_R_QVEC:
+		value = vs->vs_curq < vc->vc_nvq ?
+		    vs->vs_queues[vs->vs_curq].vq_msix_idx :
+		    VIRTIO_MSI_NO_VECTOR;
+		break;
+	}
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+	return (value);
+}
+
+/*
+ * Handle pci config space writes.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+void
+vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	     int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct virtio_softc *vs = pi->pi_arg;
+	struct vqueue_info *vq;
+	struct virtio_consts *vc;
+	struct config_reg *cr;
+	uint64_t virtio_config_size, max;
+	const char *name;
+	uint32_t newoff;
+	int error;
+
+	if (vs->vs_flags & VIRTIO_USE_MSIX) {
+		if (baridx == pci_msix_table_bar(pi) ||
+		    baridx == pci_msix_pba_bar(pi)) {
+			pci_emul_msix_twrite(pi, offset, size, value);
+			return;
+		}
+	}
+
+	/* XXX probably should do something better than just assert() */
+	assert(baridx == 0);
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (pci_msix_enabled(pi))
+		virtio_config_size = VTCFG_R_CFG1;
+	else
+		virtio_config_size = VTCFG_R_CFG0;
+
+	if (offset >= virtio_config_size) {
+		/*
+		 * Subtract off the standard size (including MSI-X
+		 * registers if enabled) and dispatch to underlying driver.
+		 */
+		newoff = offset - virtio_config_size;
+		max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
+		if (newoff + size > max)
+			goto bad;
+		error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), newoff, size, value);
+		if (!error)
+			goto done;
+	}
+
+bad:
+	cr = vi_find_cr(offset);
+	if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
+		if (cr != NULL) {
+			/* offset must be OK, wrong size and/or reg is R/O */
+			if (cr->cr_size != size)
+				fprintf(stderr,
+				    "%s: write to %s: bad size %d\r\n",
+				    name, cr->cr_name, size);
+			if (cr->cr_ro)
+				fprintf(stderr,
+				    "%s: write to read-only reg %s\r\n",
+				    name, cr->cr_name);
+		} else {
+			fprintf(stderr,
+			    "%s: write to bad offset/size %jd/%d\r\n",
+			    name, (uintmax_t)offset, size);
+		}
+		goto done;
+	}
+
+	switch (offset) {
+	case VTCFG_R_GUESTCAP:
+		vs->vs_negotiated_caps = value & vc->vc_hv_caps;
+		if (vc->vc_apply_features)
+			(*vc->vc_apply_features)(DEV_SOFTC(vs),
+			    vs->vs_negotiated_caps);
+		break;
+	case VTCFG_R_PFN:
+		if (vs->vs_curq >= vc->vc_nvq)
+			goto bad_qindex;
+		vi_vq_init(vs, value);
+		break;
+	case VTCFG_R_QSEL:
+		/*
+		 * Note that the guest is allowed to select an
+		 * invalid queue; we just need to return a QNUM
+		 * of 0 while the bad queue is selected.
+		 */
+		vs->vs_curq = value;
+		break;
+	case VTCFG_R_QNOTIFY:
+		if (value >= vc->vc_nvq) {
+			fprintf(stderr, "%s: queue %d notify out of range\r\n",
+				name, (int)value);
+			goto done;
+		}
+		vq = &vs->vs_queues[value];
+		if (vq->vq_notify)
+			(*vq->vq_notify)(DEV_SOFTC(vs), vq);
+		else if (vc->vc_qnotify)
+			(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
+		else
+			fprintf(stderr,
+			    "%s: qnotify queue %d: missing vq/vc notify\r\n",
+				name, (int)value);
+		break;
+	case VTCFG_R_STATUS:
+		vs->vs_status = value;
+		if (value == 0)
+			(*vc->vc_reset)(DEV_SOFTC(vs));
+		break;
+	case VTCFG_R_CFGVEC:
+		vs->vs_msix_cfg_idx = value;
+		break;
+	case VTCFG_R_QVEC:
+		if (vs->vs_curq >= vc->vc_nvq)
+			goto bad_qindex;
+		vq = &vs->vs_queues[vs->vs_curq];
+		vq->vq_msix_idx = value;
+		break;
+	}
+	goto done;
+
+bad_qindex:
+	fprintf(stderr,
+	    "%s: write config reg %s: curq %d >= max %d\r\n",
+	    name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+}


Property changes on: trunk/usr.sbin/bhyve/virtio.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/virtio.h
===================================================================
--- trunk/usr.sbin/bhyve/virtio.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/virtio.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,465 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/virtio.h 313812 2017-02-16 17:08:43Z grehan $
+ */
+
+#ifndef	_VIRTIO_H_
+#define	_VIRTIO_H_
+
+/*
+ * These are derived from several virtio specifications.
+ *
+ * Some useful links:
+ *    https://github.com/rustyrussell/virtio-spec
+ *    http://people.redhat.com/pbonzini/virtio-spec.pdf
+ */
+
+/*
+ * A virtual device has zero or more "virtual queues" (virtqueue).
+ * Each virtqueue uses at least two 4096-byte pages, laid out thus:
+ *
+ *      +-----------------------------------------------+
+ *      |    "desc":  <N> descriptors, 16 bytes each    |
+ *      |   -----------------------------------------   |
+ *      |   "avail":   2 uint16; <N> uint16; 1 uint16   |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *      |   "used": 2 x uint16; <N> elems; 1 uint16     |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *
+ * The number <N> that appears here is always a power of two and is
+ * limited to no more than 32768 (as it must fit in a 16-bit field).
+ * If <N> is sufficiently large, the above will occupy more than
+ * two pages.  In any case, all pages must be physically contiguous
+ * within the guest's physical address space.
+ *
+ * The <N> 16-byte "desc" descriptors consist of a 64-bit guest
+ * physical address <addr>, a 32-bit length <len>, a 16-bit
+ * <flags>, and a 16-bit <next> field (all in guest byte order).
+ *
+ * There are three flags that may be set :
+ *	NEXT    descriptor is chained, so use its "next" field
+ *	WRITE   descriptor is for host to write into guest RAM
+ *		(else host is to read from guest RAM)
+ *	INDIRECT   descriptor address field is (guest physical)
+ *		address of a linear array of descriptors
+ *
+ * Unless INDIRECT is set, <len> is the number of bytes that may
+ * be read/written from guest physical address <addr>.  If
+ * INDIRECT is set, WRITE is ignored and <len> provides the length
+ * of the indirect descriptors (and <len> must be a multiple of
+ * 16).  Note that NEXT may still be set in the main descriptor
+ * pointing to the indirect, and should be set in each indirect
+ * descriptor that uses the next descriptor (these should generally
+ * be numbered sequentially).  However, INDIRECT must not be set
+ * in the indirect descriptors.  Upon reaching an indirect descriptor
+ * without a NEXT bit, control returns to the direct descriptors.
+ *
+ * Except inside an indirect, each <next> value must be in the
+ * range [0 .. N) (i.e., the half-open interval).  (Inside an
+ * indirect, each <next> must be in the range [0 .. <len>/16).)
+ *
+ * The "avail" data structures reside in the same pages as the
+ * "desc" structures since both together are used by the device to
+ * pass information to the hypervisor's virtual driver.  These
+ * begin with a 16-bit <flags> field and 16-bit index <idx>, then
+ * have <N> 16-bit <ring> values, followed by one final 16-bit
+ * field <used_event>.  The <N> <ring> entries are simply indices
+ * indices into the descriptor ring (and thus must meet the same
+ * constraints as each <next> value).  However, <idx> is counted
+ * up from 0 (initially) and simply wraps around after 65535; it
+ * is taken mod <N> to find the next available entry.
+ *
+ * The "used" ring occupies a separate page or pages, and contains
+ * values written from the virtual driver back to the guest OS.
+ * This begins with a 16-bit <flags> and 16-bit <idx>, then there
+ * are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
+ * The <N> "vring_used" elements consist of a 32-bit <id> and a
+ * 32-bit <len> (vu_tlen below).  The <id> is simply the index of
+ * the head of a descriptor chain the guest made available
+ * earlier, and the <len> is the number of bytes actually written,
+ * e.g., in the case of a network driver that provided a large
+ * receive buffer but received only a small amount of data.
+ *
+ * The two event fields, <used_event> and <avail_event>, in the
+ * avail and used rings (respectively -- note the reversal!), are
+ * always provided, but are used only if the virtual device
+ * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
+ * negotiation.  Similarly, both rings provide a flag --
+ * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
+ * their <flags> field, indicating that the guest does not need an
+ * interrupt, or that the hypervisor driver does not need a
+ * notify, when descriptors are added to the corresponding ring.
+ * (These are provided only for interrupt optimization and need
+ * not be implemented.)
+ */
+#define VRING_ALIGN	4096
+
+#define VRING_DESC_F_NEXT	(1 << 0)
+#define VRING_DESC_F_WRITE	(1 << 1)
+#define VRING_DESC_F_INDIRECT	(1 << 2)
+
+struct virtio_desc {			/* AKA vring_desc */
+	uint64_t	vd_addr;	/* guest physical address */
+	uint32_t	vd_len;		/* length of scatter/gather seg */
+	uint16_t	vd_flags;	/* VRING_F_DESC_* */
+	uint16_t	vd_next;	/* next desc if F_NEXT */
+} __packed;
+
+struct virtio_used {			/* AKA vring_used_elem */
+	uint32_t	vu_idx;		/* head of used descriptor chain */
+	uint32_t	vu_tlen;	/* length written-to */
+} __packed;
+
+#define VRING_AVAIL_F_NO_INTERRUPT   1
+
+struct vring_avail {
+	uint16_t	va_flags;	/* VRING_AVAIL_F_* */
+	uint16_t	va_idx;		/* counts to 65535, then cycles */
+	uint16_t	va_ring[];	/* size N, reported in QNUM value */
+/*	uint16_t	va_used_event;	-- after N ring entries */
+} __packed;
+
+#define	VRING_USED_F_NO_NOTIFY		1
+struct vring_used {
+	uint16_t	vu_flags;	/* VRING_USED_F_* */
+	uint16_t	vu_idx;		/* counts to 65535, then cycles */
+	struct virtio_used vu_ring[];	/* size N */
+/*	uint16_t	vu_avail_event;	-- after N ring entries */
+} __packed;
+
+/*
+ * The address of any given virtual queue is determined by a single
+ * Page Frame Number register.  The guest writes the PFN into the
+ * PCI config space.  However, a device that has two or more
+ * virtqueues can have a different PFN, and size, for each queue.
+ * The number of queues is determinable via the PCI config space
+ * VTCFG_R_QSEL register.  Writes to QSEL select the queue: 0 means
+ * queue #0, 1 means queue#1, etc.  Once a queue is selected, the
+ * remaining PFN and QNUM registers refer to that queue.
+ *
+ * QNUM is a read-only register containing a nonzero power of two
+ * that indicates the (hypervisor's) queue size.  Or, if reading it
+ * produces zero, the hypervisor does not have a corresponding
+ * queue.  (The number of possible queues depends on the virtual
+ * device.  The block device has just one; the network device
+ * provides either two -- 0 = receive, 1 = transmit -- or three,
+ * with 2 = control.)
+ *
+ * PFN is a read/write register giving the physical page address of
+ * the virtqueue in guest memory (the guest must allocate enough space
+ * based on the hypervisor's provided QNUM).
+ *
+ * QNOTIFY is effectively write-only: when the guest writes a queue
+ * number to the register, the hypervisor should scan the specified
+ * virtqueue. (Reading QNOTIFY currently always gets 0).
+ */
+
+/*
+ * PFN register shift amount
+ */
+#define VRING_PFN               12
+
+/*
+ * Virtio device types
+ *
+ * XXX Should really be merged with <dev/virtio/virtio.h> defines
+ */
+#define	VIRTIO_TYPE_NET		1
+#define	VIRTIO_TYPE_BLOCK	2
+#define	VIRTIO_TYPE_CONSOLE	3
+#define	VIRTIO_TYPE_ENTROPY	4
+#define	VIRTIO_TYPE_BALLOON	5
+#define	VIRTIO_TYPE_IOMEMORY	6
+#define	VIRTIO_TYPE_RPMSG	7
+#define	VIRTIO_TYPE_SCSI	8
+#define	VIRTIO_TYPE_9P		9
+
+/* experimental IDs start at 65535 and work down */
+
+/*
+ * PCI vendor/device IDs
+ */
+#define	VIRTIO_VENDOR		0x1AF4
+#define	VIRTIO_DEV_NET		0x1000
+#define	VIRTIO_DEV_BLOCK	0x1001
+#define	VIRTIO_DEV_RANDOM	0x1005
+
+/*
+ * PCI config space constants.
+ *
+ * If MSI-X is enabled, the ISR register is generally not used,
+ * and the configuration vector and queue vector appear at offsets
+ * 20 and 22 with the remaining configuration registers at 24.
+ * If MSI-X is not enabled, those two registers disappear and
+ * the remaining configuration registers start at offset 20.
+ */
+#define VTCFG_R_HOSTCAP		0
+#define VTCFG_R_GUESTCAP	4
+#define VTCFG_R_PFN		8
+#define VTCFG_R_QNUM		12
+#define VTCFG_R_QSEL		14
+#define VTCFG_R_QNOTIFY		16
+#define VTCFG_R_STATUS		18
+#define VTCFG_R_ISR		19
+#define VTCFG_R_CFGVEC		20
+#define VTCFG_R_QVEC		22
+#define VTCFG_R_CFG0		20	/* No MSI-X */
+#define VTCFG_R_CFG1		24	/* With MSI-X */
+#define VTCFG_R_MSIX		20
+
+/*
+ * Bits in VTCFG_R_STATUS.  Guests need not actually set any of these,
+ * but a guest writing 0 to this register means "please reset".
+ */
+#define	VTCFG_STATUS_ACK	0x01	/* guest OS has acknowledged dev */
+#define	VTCFG_STATUS_DRIVER	0x02	/* guest OS driver is loaded */
+#define	VTCFG_STATUS_DRIVER_OK	0x04	/* guest OS driver ready */
+#define	VTCFG_STATUS_FAILED	0x80	/* guest has given up on this dev */
+
+/*
+ * Bits in VTCFG_R_ISR.  These apply only if not using MSI-X.
+ *
+ * (We don't [yet?] ever use CONF_CHANGED.)
+ */
+#define	VTCFG_ISR_QUEUES	0x01	/* re-scan queues */
+#define	VTCFG_ISR_CONF_CHANGED	0x80	/* configuration changed */
+
+#define VIRTIO_MSI_NO_VECTOR	0xFFFF
+
+/*
+ * Feature flags.
+ * Note: bits 0 through 23 are reserved to each device type.
+ */
+#define	VIRTIO_F_NOTIFY_ON_EMPTY	(1 << 24)
+#define	VIRTIO_RING_F_INDIRECT_DESC	(1 << 28)
+#define	VIRTIO_RING_F_EVENT_IDX		(1 << 29)
+
+/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
+static inline size_t
+vring_size(u_int qsz)
+{
+	size_t size;
+
+	/* constant 3 below = va_flags, va_idx, va_used_event */
+	size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
+	size = roundup2(size, VRING_ALIGN);
+
+	/* constant 3 below = vu_flags, vu_idx, vu_avail_event */
+	size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
+	size = roundup2(size, VRING_ALIGN);
+
+	return (size);
+}
+
+struct vmctx;
+struct pci_devinst;
+struct vqueue_info;
+
+/*
+ * A virtual device, with some number (possibly 0) of virtual
+ * queues and some size (possibly 0) of configuration-space
+ * registers private to the device.  The virtio_softc should come
+ * at the front of each "derived class", so that a pointer to the
+ * virtio_softc is also a pointer to the more specific, derived-
+ * from-virtio driver's softc.
+ *
+ * Note: inside each hypervisor virtio driver, changes to these
+ * data structures must be locked against other threads, if any.
+ * Except for PCI config space register read/write, we assume each
+ * driver does the required locking, but we need a pointer to the
+ * lock (if there is one) for PCI config space read/write ops.
+ *
+ * When the guest reads or writes the device's config space, the
+ * generic layer checks for operations on the special registers
+ * described above.  If the offset of the register(s) being read
+ * or written is past the CFG area (CFG0 or CFG1), the request is
+ * passed on to the virtual device, after subtracting off the
+ * generic-layer size.  (So, drivers can just use the offset as
+ * an offset into "struct config", for instance.)
+ *
+ * (The virtio layer also makes sure that the read or write is to/
+ * from a "good" config offset, hence vc_cfgsize, and on BAR #0.
+ * However, the driver must verify the read or write size and offset
+ * and that no one is writing a readonly register.)
+ *
+ * The BROKED flag ("this thing done gone and broked") is for future
+ * use.
+ */
+#define	VIRTIO_USE_MSIX		0x01
+#define	VIRTIO_EVENT_IDX	0x02	/* use the event-index values */
+#define	VIRTIO_BROKED		0x08	/* ??? */
+
+struct virtio_softc {
+	struct virtio_consts *vs_vc;	/* constants (see below) */
+	int	vs_flags;		/* VIRTIO_* flags from above */
+	pthread_mutex_t *vs_mtx;	/* POSIX mutex, if any */
+	struct pci_devinst *vs_pi;	/* PCI device instance */
+	uint32_t vs_negotiated_caps;	/* negotiated capabilities */
+	struct vqueue_info *vs_queues;	/* one per vc_nvq */
+	int	vs_curq;		/* current queue */
+	uint8_t	vs_status;		/* value from last status write */
+	uint8_t	vs_isr;			/* ISR flags, if not MSI-X */
+	uint16_t vs_msix_cfg_idx;	/* MSI-X vector for config event */
+};
+
+#define	VS_LOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_lock(vs->vs_mtx);				\
+} while (0)
+
+#define	VS_UNLOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_unlock(vs->vs_mtx);			\
+} while (0)
+
+struct virtio_consts {
+	const char *vc_name;		/* name of driver (for diagnostics) */
+	int	vc_nvq;			/* number of virtual queues */
+	size_t	vc_cfgsize;		/* size of dev-specific config regs */
+	void	(*vc_reset)(void *);	/* called on virtual device reset */
+	void	(*vc_qnotify)(void *, struct vqueue_info *);
+					/* called on QNOTIFY if no VQ notify */
+	int	(*vc_cfgread)(void *, int, int, uint32_t *);
+					/* called to read config regs */
+	int	(*vc_cfgwrite)(void *, int, int, uint32_t);
+					/* called to write config regs */
+	void    (*vc_apply_features)(void *, uint64_t);
+				/* called to apply negotiated features */
+	uint64_t vc_hv_caps;		/* hypervisor-provided capabilities */
+};
+
+/*
+ * Data structure allocated (statically) per virtual queue.
+ *
+ * Drivers may change vq_qsize after a reset.  When the guest OS
+ * requests a device reset, the hypervisor first calls
+ * vs->vs_vc->vc_reset(); then the data structure below is
+ * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
+ *
+ * The remaining fields should only be fussed-with by the generic
+ * code.
+ *
+ * Note: the addresses of vq_desc, vq_avail, and vq_used are all
+ * computable from each other, but it's a lot simpler if we just
+ * keep a pointer to each one.  The event indices are similarly
+ * (but more easily) computable, and this time we'll compute them:
+ * they're just XX_ring[N].
+ */
+#define	VQ_ALLOC	0x01	/* set once we have a pfn */
+#define	VQ_BROKED	0x02	/* ??? */
+struct vqueue_info {
+	uint16_t vq_qsize;	/* size of this queue (a power of 2) */
+	void	(*vq_notify)(void *, struct vqueue_info *);
+				/* called instead of vc_notify, if not NULL */
+
+	struct virtio_softc *vq_vs;	/* backpointer to softc */
+	uint16_t vq_num;	/* we're the num'th queue in the softc */
+
+	uint16_t vq_flags;	/* flags (see above) */
+	uint16_t vq_last_avail;	/* a recent value of vq_avail->va_idx */
+	uint16_t vq_save_used;	/* saved vq_used->vu_idx; see vq_endchains */
+	uint16_t vq_msix_idx;	/* MSI-X index, or VIRTIO_MSI_NO_VECTOR */
+
+	uint32_t vq_pfn;	/* PFN of virt queue (not shifted!) */
+
+	volatile struct virtio_desc *vq_desc;	/* descriptor array */
+	volatile struct vring_avail *vq_avail;	/* the "avail" ring */
+	volatile struct vring_used *vq_used;	/* the "used" ring */
+
+};
+/* as noted above, these are sort of backwards, name-wise */
+#define VQ_AVAIL_EVENT_IDX(vq) \
+	(*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
+#define VQ_USED_EVENT_IDX(vq) \
+	((vq)->vq_avail->va_ring[(vq)->vq_qsize])
+
+/*
+ * Is this ring ready for I/O?
+ */
+static inline int
+vq_ring_ready(struct vqueue_info *vq)
+{
+
+	return (vq->vq_flags & VQ_ALLOC);
+}
+
+/*
+ * Are there "available" descriptors?  (This does not count
+ * how many, just returns True if there are some.)
+ */
+static inline int
+vq_has_descs(struct vqueue_info *vq)
+{
+
+	return (vq_ring_ready(vq) && vq->vq_last_avail !=
+	    vq->vq_avail->va_idx);
+}
+
+/*
+ * Deliver an interrupt to guest on the given virtual queue
+ * (if possible, or a generic MSI interrupt if not using MSI-X).
+ */
+static inline void
+vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+{
+
+	if (pci_msix_enabled(vs->vs_pi))
+		pci_generate_msix(vs->vs_pi, vq->vq_msix_idx);
+	else {
+		VS_LOCK(vs);
+		vs->vs_isr |= VTCFG_ISR_QUEUES;
+		pci_generate_msi(vs->vs_pi, 0);
+		pci_lintr_assert(vs->vs_pi);
+		VS_UNLOCK(vs);
+	}
+}
+
+struct iovec;
+void	vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+			void *dev_softc, struct pci_devinst *pi,
+			struct vqueue_info *queues);
+int	vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
+void	vi_reset_dev(struct virtio_softc *);
+void	vi_set_io_bar(struct virtio_softc *, int);
+
+int	vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+		    struct iovec *iov, int n_iov, uint16_t *flags);
+void	vq_retchain(struct vqueue_info *vq);
+void	vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
+void	vq_endchains(struct vqueue_info *vq, int used_all_avail);
+
+uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		     int baridx, uint64_t offset, int size);
+void	vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		     int baridx, uint64_t offset, int size, uint64_t value);
+#endif	/* _VIRTIO_H_ */


Property changes on: trunk/usr.sbin/bhyve/virtio.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/xmsr.c
===================================================================
--- trunk/usr.sbin/bhyve/xmsr.c	                        (rev 0)
+++ trunk/usr.sbin/bhyve/xmsr.c	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,231 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/xmsr.c 284894 2015-06-27 22:48:22Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/xmsr.c 284894 2015-06-27 22:48:22Z neel $");
+
+#include <sys/types.h>
+
+#include <machine/cpufunc.h>
+#include <machine/vmm.h>
+#include <machine/specialreg.h>
+
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xmsr.h"
+
+static int cpu_vendor_intel, cpu_vendor_amd;
+
+int
+emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val)
+{
+
+	if (cpu_vendor_intel) {
+		switch (num) {
+		case 0xd04:		/* Sandy Bridge uncore PMCs */
+		case 0xc24:
+			return (0);
+		case MSR_BIOS_UPDT_TRIG:
+			return (0);
+		case MSR_BIOS_SIGN:
+			return (0);
+		default:
+			break;
+		}
+	} else if (cpu_vendor_amd) {
+		switch (num) {
+		case MSR_HWCR:
+			/*
+			 * Ignore writes to hardware configuration MSR.
+			 */
+			return (0);
+
+		case MSR_NB_CFG1:
+		case MSR_IC_CFG:
+			return (0);	/* Ignore writes */
+
+		case MSR_PERFEVSEL0:
+		case MSR_PERFEVSEL1:
+		case MSR_PERFEVSEL2:
+		case MSR_PERFEVSEL3:
+			/* Ignore writes to the PerfEvtSel MSRs */
+			return (0);
+
+		case MSR_K7_PERFCTR0:
+		case MSR_K7_PERFCTR1:
+		case MSR_K7_PERFCTR2:
+		case MSR_K7_PERFCTR3:
+			/* Ignore writes to the PerfCtr MSRs */
+			return (0);
+
+		case MSR_P_STATE_CONTROL:
+			/* Ignore write to change the P-state */
+			return (0);
+
+		default:
+			break;
+		}
+	}
+	return (-1);
+}
+
+int
+emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
+{
+	int error = 0;
+
+	if (cpu_vendor_intel) {
+		switch (num) {
+		case MSR_BIOS_SIGN:
+		case MSR_IA32_PLATFORM_ID:
+		case MSR_PKG_ENERGY_STATUS:
+		case MSR_PP0_ENERGY_STATUS:
+		case MSR_PP1_ENERGY_STATUS:
+		case MSR_DRAM_ENERGY_STATUS:
+			*val = 0;
+			break;
+		case MSR_RAPL_POWER_UNIT:
+			/*
+			 * Use the default value documented in section
+			 * "RAPL Interfaces" in Intel SDM vol3.
+			 */
+			*val = 0x000a1003;
+			break;
+		default:
+			error = -1;
+			break;
+		}
+	} else if (cpu_vendor_amd) {
+		switch (num) {
+		case MSR_BIOS_SIGN:
+			*val = 0;
+			break;
+		case MSR_HWCR:
+			/*
+			 * Bios and Kernel Developer's Guides for AMD Families
+			 * 12H, 14H, 15H and 16H.
+			 */
+			*val = 0x01000010;	/* Reset value */
+			*val |= 1 << 9;		/* MONITOR/MWAIT disable */
+			break;
+
+		case MSR_NB_CFG1:
+		case MSR_IC_CFG:
+			/*
+			 * The reset value is processor family dependent so
+			 * just return 0.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_PERFEVSEL0:
+		case MSR_PERFEVSEL1:
+		case MSR_PERFEVSEL2:
+		case MSR_PERFEVSEL3:
+			/*
+			 * PerfEvtSel MSRs are not properly virtualized so just
+			 * return zero.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_K7_PERFCTR0:
+		case MSR_K7_PERFCTR1:
+		case MSR_K7_PERFCTR2:
+		case MSR_K7_PERFCTR3:
+			/*
+			 * PerfCtr MSRs are not properly virtualized so just
+			 * return zero.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_SMM_ADDR:
+		case MSR_SMM_MASK:
+			/*
+			 * Return the reset value defined in the AMD Bios and
+			 * Kernel Developer's Guide.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_P_STATE_LIMIT:
+		case MSR_P_STATE_CONTROL:
+		case MSR_P_STATE_STATUS:
+		case MSR_P_STATE_CONFIG(0):	/* P0 configuration */
+			*val = 0;
+			break;
+
+		/*
+		 * OpenBSD guests test bit 0 of this MSR to detect if the
+		 * workaround for erratum 721 is already applied.
+		 * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf
+		 */
+		case 0xC0011029:
+			*val = 1;
+			break;
+
+		default:
+			error = -1;
+			break;
+		}
+	} else {
+		error = -1;
+	}
+	return (error);
+}
+
+int
+init_msr(void)
+{
+	int error;
+	u_int regs[4];
+	char cpu_vendor[13];
+
+	do_cpuid(0, regs);
+	((u_int *)&cpu_vendor)[0] = regs[1];
+	((u_int *)&cpu_vendor)[1] = regs[3];
+	((u_int *)&cpu_vendor)[2] = regs[2];
+	cpu_vendor[12] = '\0';
+
+	error = 0;
+	if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+		cpu_vendor_amd = 1;
+	} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+		cpu_vendor_intel = 1;
+	} else {
+		fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
+		error = -1;
+	}
+	return (error);
+}


Property changes on: trunk/usr.sbin/bhyve/xmsr.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/usr.sbin/bhyve/xmsr.h
===================================================================
--- trunk/usr.sbin/bhyve/xmsr.h	                        (rev 0)
+++ trunk/usr.sbin/bhyve/xmsr.h	2018-06-09 21:54:00 UTC (rev 10719)
@@ -0,0 +1,37 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/usr.sbin/bhyve/xmsr.h 276349 2014-12-28 21:27:13Z neel $
+ */
+
+#ifndef	_XMSR_H_
+#define	_XMSR_H_
+
+int init_msr(void);
+int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
+int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val);
+
+#endif


Property changes on: trunk/usr.sbin/bhyve/xmsr.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property


More information about the Midnightbsd-cvs mailing list