[Midnightbsd-cvs] src [10104] trunk/sys/dev: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 19:46:33 EDT 2018


Revision: 10104
          http://svnweb.midnightbsd.org/src/?rev=10104
Author:   laffer1
Date:     2018-05-27 19:46:32 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/dev/ic/cd1400.h
    trunk/sys/dev/ic/cd180.h
    trunk/sys/dev/ic/esp.h
    trunk/sys/dev/ic/hd64570.h
    trunk/sys/dev/ic/i8237.h
    trunk/sys/dev/ic/i8251.h
    trunk/sys/dev/ic/i8253reg.h
    trunk/sys/dev/ic/i8255.h
    trunk/sys/dev/ic/i82586.h
    trunk/sys/dev/ic/i8259.h
    trunk/sys/dev/ic/nec765.h
    trunk/sys/dev/ic/ns16550.h
    trunk/sys/dev/ic/quicc.h
    trunk/sys/dev/ic/rsa.h
    trunk/sys/dev/ic/sab82532.h
    trunk/sys/dev/ic/via6522reg.h
    trunk/sys/dev/ic/wd33c93reg.h
    trunk/sys/dev/ic/z8530.h
    trunk/sys/dev/ichsmb/ichsmb.c
    trunk/sys/dev/ichsmb/ichsmb_pci.c
    trunk/sys/dev/ichsmb/ichsmb_reg.h
    trunk/sys/dev/ichsmb/ichsmb_var.h
    trunk/sys/dev/iicbus/ad7417.c
    trunk/sys/dev/iicbus/ad7418.c
    trunk/sys/dev/iicbus/ds133x.c
    trunk/sys/dev/iicbus/ds1672.c
    trunk/sys/dev/iicbus/ds1775.c
    trunk/sys/dev/iicbus/icee.c
    trunk/sys/dev/iicbus/if_ic.c
    trunk/sys/dev/iicbus/iic.c
    trunk/sys/dev/iicbus/iic.h
    trunk/sys/dev/iicbus/iicbb.c
    trunk/sys/dev/iicbus/iicbb_if.m
    trunk/sys/dev/iicbus/iicbus.c
    trunk/sys/dev/iicbus/iicbus.h
    trunk/sys/dev/iicbus/iicbus_if.m
    trunk/sys/dev/iicbus/iiconf.c
    trunk/sys/dev/iicbus/iiconf.h
    trunk/sys/dev/iicbus/iicsmb.c
    trunk/sys/dev/iicbus/max6690.c
    trunk/sys/dev/iicbus/pcf8563.c
    trunk/sys/dev/iicbus/pcf8563reg.h

Added Paths:
-----------
    trunk/sys/dev/hyperv/
    trunk/sys/dev/hyperv/include/
    trunk/sys/dev/hyperv/include/hyperv.h
    trunk/sys/dev/hyperv/include/hyperv_busdma.h
    trunk/sys/dev/hyperv/include/vmbus.h
    trunk/sys/dev/hyperv/include/vmbus_xact.h
    trunk/sys/dev/hyperv/input/
    trunk/sys/dev/hyperv/input/hv_kbd.c
    trunk/sys/dev/hyperv/input/hv_kbdc.c
    trunk/sys/dev/hyperv/input/hv_kbdc.h
    trunk/sys/dev/hyperv/netvsc/
    trunk/sys/dev/hyperv/netvsc/hn_nvs.c
    trunk/sys/dev/hyperv/netvsc/hn_nvs.h
    trunk/sys/dev/hyperv/netvsc/hn_rndis.c
    trunk/sys/dev/hyperv/netvsc/hn_rndis.h
    trunk/sys/dev/hyperv/netvsc/if_hn.c
    trunk/sys/dev/hyperv/netvsc/if_hnreg.h
    trunk/sys/dev/hyperv/netvsc/if_hnvar.h
    trunk/sys/dev/hyperv/netvsc/ndis.h
    trunk/sys/dev/hyperv/pcib/
    trunk/sys/dev/hyperv/pcib/vmbus_pcib.c
    trunk/sys/dev/hyperv/storvsc/
    trunk/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
    trunk/sys/dev/hyperv/storvsc/hv_vstorage.h
    trunk/sys/dev/hyperv/utilities/
    trunk/sys/dev/hyperv/utilities/hv_kvp.c
    trunk/sys/dev/hyperv/utilities/hv_kvp.h
    trunk/sys/dev/hyperv/utilities/hv_snapshot.c
    trunk/sys/dev/hyperv/utilities/hv_snapshot.h
    trunk/sys/dev/hyperv/utilities/hv_utilreg.h
    trunk/sys/dev/hyperv/utilities/unicode.h
    trunk/sys/dev/hyperv/utilities/vmbus_heartbeat.c
    trunk/sys/dev/hyperv/utilities/vmbus_ic.c
    trunk/sys/dev/hyperv/utilities/vmbus_icreg.h
    trunk/sys/dev/hyperv/utilities/vmbus_icvar.h
    trunk/sys/dev/hyperv/utilities/vmbus_shutdown.c
    trunk/sys/dev/hyperv/utilities/vmbus_timesync.c
    trunk/sys/dev/hyperv/vmbus/
    trunk/sys/dev/hyperv/vmbus/amd64/
    trunk/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
    trunk/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
    trunk/sys/dev/hyperv/vmbus/hyperv.c
    trunk/sys/dev/hyperv/vmbus/hyperv_busdma.c
    trunk/sys/dev/hyperv/vmbus/hyperv_machdep.h
    trunk/sys/dev/hyperv/vmbus/hyperv_reg.h
    trunk/sys/dev/hyperv/vmbus/hyperv_var.h
    trunk/sys/dev/hyperv/vmbus/i386/
    trunk/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c
    trunk/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
    trunk/sys/dev/hyperv/vmbus/vmbus.c
    trunk/sys/dev/hyperv/vmbus/vmbus_br.c
    trunk/sys/dev/hyperv/vmbus/vmbus_brvar.h
    trunk/sys/dev/hyperv/vmbus/vmbus_chan.c
    trunk/sys/dev/hyperv/vmbus/vmbus_chanvar.h
    trunk/sys/dev/hyperv/vmbus/vmbus_et.c
    trunk/sys/dev/hyperv/vmbus/vmbus_if.m
    trunk/sys/dev/hyperv/vmbus/vmbus_reg.h
    trunk/sys/dev/hyperv/vmbus/vmbus_res.c
    trunk/sys/dev/hyperv/vmbus/vmbus_var.h
    trunk/sys/dev/hyperv/vmbus/vmbus_xact.c
    trunk/sys/dev/iicbus/adt746x.c
    trunk/sys/dev/iicbus/ds1374.c
    trunk/sys/dev/iicbus/ds1631.c
    trunk/sys/dev/iicbus/iicoc.c
    trunk/sys/dev/iicbus/iicoc.h
    trunk/sys/dev/iicbus/s35390a.c
    trunk/sys/dev/imcsmb/
    trunk/sys/dev/imcsmb/imcsmb.c
    trunk/sys/dev/imcsmb/imcsmb_pci.c
    trunk/sys/dev/imcsmb/imcsmb_reg.h
    trunk/sys/dev/imcsmb/imcsmb_var.h
    trunk/sys/dev/ioat/
    trunk/sys/dev/ioat/ioat.c
    trunk/sys/dev/ioat/ioat.h
    trunk/sys/dev/ioat/ioat_hw.h
    trunk/sys/dev/ioat/ioat_internal.h
    trunk/sys/dev/ioat/ioat_test.c
    trunk/sys/dev/ioat/ioat_test.h

Property Changed:
----------------
    trunk/sys/dev/iicbus/iicbb_if.m
    trunk/sys/dev/iicbus/iicbus_if.m

Added: trunk/sys/dev/hyperv/include/hyperv.h
===================================================================
--- trunk/sys/dev/hyperv/include/hyperv.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/include/hyperv.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,98 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/include/hyperv.h 324573 2017-10-13 02:16:35Z sephe $
+ */
+
+#ifndef _HYPERV_H_
+#define _HYPERV_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#define MSR_HV_TIME_REF_COUNT		0x40000020
+
+#define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
+#define CPUID_HV_MSR_SYNIC		0x0004	/* MSRs for SynIC */
+#define CPUID_HV_MSR_SYNTIMER		0x0008	/* MSRs for SynTimer */
+#define CPUID_HV_MSR_APIC		0x0010	/* MSR_HV_{EOI,ICR,TPR} */
+#define CPUID_HV_MSR_HYPERCALL		0x0020	/* MSR_HV_GUEST_OS_ID
+						 * MSR_HV_HYPERCALL */
+#define CPUID_HV_MSR_VP_INDEX		0x0040	/* MSR_HV_VP_INDEX */
+#define CPUID_HV_MSR_REFERENCE_TSC	0x0200	/* MSR_HV_REFERENCE_TSC */
+#define CPUID_HV_MSR_GUEST_IDLE		0x0400	/* MSR_HV_GUEST_IDLE */
+
+#ifndef NANOSEC
+#define NANOSEC				1000000000ULL
+#endif
+#define HYPERV_TIMER_NS_FACTOR		100ULL
+#define HYPERV_TIMER_FREQ		(NANOSEC / HYPERV_TIMER_NS_FACTOR)
+
+#endif	/* _KERNEL */
+
+#define HYPERV_REFTSC_DEVNAME		"hv_tsc"
+
+/*
+ * Hyper-V Reference TSC
+ */
+struct hyperv_reftsc {
+	volatile uint32_t		tsc_seq;
+	volatile uint32_t		tsc_rsvd1;
+	volatile uint64_t		tsc_scale;
+	volatile int64_t		tsc_ofs;
+} __packed __aligned(PAGE_SIZE);
+#ifdef CTASSERT
+CTASSERT(sizeof(struct hyperv_reftsc) == PAGE_SIZE);
+#endif
+
+#ifdef _KERNEL
+
+struct hyperv_guid {
+	uint8_t				hv_guid[16];
+} __packed;
+
+#define HYPERV_GUID_STRLEN		40
+
+typedef uint64_t			(*hyperv_tc64_t)(void);
+
+int			hyperv_guid2str(const struct hyperv_guid *, char *,
+			    size_t);
+
+/*
+ * hyperv_tc64 could be NULL, if there were no suitable Hyper-V
+ * specific timecounter.
+ */
+extern hyperv_tc64_t	hyperv_tc64;
+extern u_int		hyperv_features;	/* CPUID_HV_MSR_ */
+extern u_int		hyperv_ver_major;
+
+#endif	/* _KERNEL */
+
+#endif  /* _HYPERV_H_ */


Property changes on: trunk/sys/dev/hyperv/include/hyperv.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/include/hyperv_busdma.h
===================================================================
--- trunk/sys/dev/hyperv/include/hyperv_busdma.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/include/hyperv_busdma.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,50 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/include/hyperv_busdma.h 307133 2016-10-12 08:58:03Z sephe $
+ */
+
+#ifndef _HYPERV_BUSDMA_H_
+#define _HYPERV_BUSDMA_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+struct hyperv_dma {
+	bus_addr_t	hv_paddr;
+	bus_dma_tag_t	hv_dtag;
+	bus_dmamap_t	hv_dmap;
+};
+
+void		hyperv_dma_map_paddr(void *arg, bus_dma_segment_t *segs,
+		    int nseg, int error);
+void		*hyperv_dmamem_alloc(bus_dma_tag_t parent_dtag,
+		    bus_size_t alignment, bus_addr_t boundary, bus_size_t size,
+		    struct hyperv_dma *dma, int flags);
+void		hyperv_dmamem_free(struct hyperv_dma *dma, void *ptr);
+
+#endif	/* !_HYPERV_BUSDMA_H_ */


Property changes on: trunk/sys/dev/hyperv/include/hyperv_busdma.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/include/vmbus.h
===================================================================
--- trunk/sys/dev/hyperv/include/vmbus.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/include/vmbus.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,232 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/include/vmbus.h 310802 2016-12-30 02:18:34Z sephe $
+ */
+
+#ifndef _VMBUS_H_
+#define _VMBUS_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+/*
+ * VMBUS version is 32 bit, upper 16 bit for major_number and lower
+ * 16 bit for minor_number.
+ *
+ * 0.13  --  Windows Server 2008
+ * 1.1   --  Windows 7
+ * 2.4   --  Windows 8
+ * 3.0   --  Windows 8.1
+ */
+#define VMBUS_VERSION_WS2008		((0 << 16) | (13))
+#define VMBUS_VERSION_WIN7		((1 << 16) | (1))
+#define VMBUS_VERSION_WIN8		((2 << 16) | (4))
+#define VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
+
+#define VMBUS_VERSION_MAJOR(ver)	(((uint32_t)(ver)) >> 16)
+#define VMBUS_VERSION_MINOR(ver)	(((uint32_t)(ver)) & 0xffff)
+
+#define VMBUS_CHAN_POLLHZ_MIN		100	/* 10ms interval */
+#define VMBUS_CHAN_POLLHZ_MAX		1000000	/* 1us interval */
+
+/*
+ * GPA stuffs.
+ */
+struct vmbus_gpa_range {
+	uint32_t	gpa_len;
+	uint32_t	gpa_ofs;
+	uint64_t	gpa_page[0];
+} __packed;
+
+/* This is actually vmbus_gpa_range.gpa_page[1] */
+struct vmbus_gpa {
+	uint32_t	gpa_len;
+	uint32_t	gpa_ofs;
+	uint64_t	gpa_page;
+} __packed;
+
+#define VMBUS_CHANPKT_SIZE_SHIFT	3
+
+#define VMBUS_CHANPKT_GETLEN(pktlen)	\
+	(((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT)
+
+struct vmbus_chanpkt_hdr {
+	uint16_t	cph_type;	/* VMBUS_CHANPKT_TYPE_ */
+	uint16_t	cph_hlen;	/* header len, in 8 bytes */
+	uint16_t	cph_tlen;	/* total len, in 8 bytes */
+	uint16_t	cph_flags;	/* VMBUS_CHANPKT_FLAG_ */
+	uint64_t	cph_xactid;
+} __packed;
+
+#define VMBUS_CHANPKT_TYPE_INBAND	0x0006
+#define VMBUS_CHANPKT_TYPE_RXBUF	0x0007
+#define VMBUS_CHANPKT_TYPE_GPA		0x0009
+#define VMBUS_CHANPKT_TYPE_COMP		0x000b
+
+#define VMBUS_CHANPKT_FLAG_NONE		0
+#define VMBUS_CHANPKT_FLAG_RC		0x0001	/* report completion */
+
+#define VMBUS_CHANPKT_CONST_DATA(pkt)		\
+	(const void *)((const uint8_t *)(pkt) +	\
+	VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen))
+
+/* Include padding */
+#define VMBUS_CHANPKT_DATALEN(pkt)		\
+	(VMBUS_CHANPKT_GETLEN((pkt)->cph_tlen) -\
+	 VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen))
+
+struct vmbus_rxbuf_desc {
+	uint32_t	rb_len;
+	uint32_t	rb_ofs;
+} __packed;
+
+struct vmbus_chanpkt_rxbuf {
+	struct vmbus_chanpkt_hdr cp_hdr;
+	uint16_t	cp_rxbuf_id;
+	uint16_t	cp_rsvd;
+	uint32_t	cp_rxbuf_cnt;
+	struct vmbus_rxbuf_desc cp_rxbuf[];
+} __packed;
+
+struct vmbus_chan_br {
+	void		*cbr;
+	bus_addr_t	cbr_paddr;
+	int		cbr_txsz;
+	int		cbr_rxsz;
+};
+
+struct vmbus_channel;
+struct vmbus_xact;
+struct vmbus_xact_ctx;
+struct hyperv_guid;
+struct task;
+struct taskqueue;
+
+typedef void	(*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
+
+static __inline struct vmbus_channel *
+vmbus_get_channel(device_t dev)
+{
+	return device_get_ivars(dev);
+}
+
+/*
+ * vmbus_chan_open_br()
+ *
+ * Return values:
+ * 0			Succeeded.
+ * EISCONN		Failed, and the memory passed through 'br' is still
+ *			connected.  Callers must _not_ free the the memory
+ *			passed through 'br', if this error happens.
+ * other values		Failed.  The memory passed through 'br' is no longer
+ *			connected.  Callers are free to do anything with the
+ *			memory passed through 'br'.
+ *
+ *
+ *
+ * vmbus_chan_close_direct()
+ *
+ * NOTE:
+ * Callers of this function _must_ make sure to close all sub-channels before
+ * closing the primary channel.
+ *
+ * Return values:
+ * 0			Succeeded.
+ * EISCONN		Failed, and the memory associated with the bufring
+ *			is still connected.  Callers must _not_ free the the
+ *			memory associated with the bufring, if this error
+ *			happens.
+ * other values		Failed.  The memory associated with the bufring is
+ *			no longer connected.  Callers are free to do anything
+ *			with the memory associated with the bufring.
+ */
+int		vmbus_chan_open(struct vmbus_channel *chan,
+		    int txbr_size, int rxbr_size, const void *udata, int udlen,
+		    vmbus_chan_callback_t cb, void *cbarg);
+int		vmbus_chan_open_br(struct vmbus_channel *chan,
+		    const struct vmbus_chan_br *cbr, const void *udata,
+		    int udlen, vmbus_chan_callback_t cb, void *cbarg);
+void		vmbus_chan_close(struct vmbus_channel *chan);
+int		vmbus_chan_close_direct(struct vmbus_channel *chan);
+void		vmbus_chan_intr_drain(struct vmbus_channel *chan);
+void		vmbus_chan_run_task(struct vmbus_channel *chan,
+		    struct task *task);
+void		vmbus_chan_set_orphan(struct vmbus_channel *chan,
+		    struct vmbus_xact_ctx *);
+void		vmbus_chan_unset_orphan(struct vmbus_channel *chan);
+const void	*vmbus_chan_xact_wait(const struct vmbus_channel *chan,
+		    struct vmbus_xact *xact, size_t *resp_len, bool can_sleep);
+
+int		vmbus_chan_gpadl_connect(struct vmbus_channel *chan,
+		    bus_addr_t paddr, int size, uint32_t *gpadl);
+int		vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan,
+		    uint32_t gpadl);
+
+void		vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu);
+void		vmbus_chan_cpu_rr(struct vmbus_channel *chan);
+void		vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on);
+
+struct vmbus_channel **
+		vmbus_subchan_get(struct vmbus_channel *pri_chan,
+		    int subchan_cnt);
+void		vmbus_subchan_rel(struct vmbus_channel **subchan,
+		    int subchan_cnt);
+void		vmbus_subchan_drain(struct vmbus_channel *pri_chan);
+
+int		vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen,
+		    uint64_t *xactid);
+int		vmbus_chan_recv_pkt(struct vmbus_channel *chan,
+		    struct vmbus_chanpkt_hdr *pkt, int *pktlen);
+
+int		vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
+		    uint16_t flags, void *data, int dlen, uint64_t xactid);
+int		vmbus_chan_send_sglist(struct vmbus_channel *chan,
+		    struct vmbus_gpa sg[], int sglen, void *data, int dlen,
+		    uint64_t xactid);
+int		vmbus_chan_send_prplist(struct vmbus_channel *chan,
+		    struct vmbus_gpa_range *prp, int prp_cnt, void *data,
+		    int dlen, uint64_t xactid);
+
+uint32_t	vmbus_chan_id(const struct vmbus_channel *chan);
+uint32_t	vmbus_chan_subidx(const struct vmbus_channel *chan);
+bool		vmbus_chan_is_primary(const struct vmbus_channel *chan);
+bool		vmbus_chan_is_revoked(const struct vmbus_channel *chan);
+const struct hyperv_guid *
+		vmbus_chan_guid_inst(const struct vmbus_channel *chan);
+int		vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
+		    int dlen_max);
+bool		vmbus_chan_rx_empty(const struct vmbus_channel *chan);
+bool		vmbus_chan_tx_empty(const struct vmbus_channel *chan);
+struct taskqueue *
+		vmbus_chan_mgmt_tq(const struct vmbus_channel *chan);
+
+void		vmbus_chan_poll_enable(struct vmbus_channel *chan,
+		    u_int pollhz);
+void		vmbus_chan_poll_disable(struct vmbus_channel *chan);
+
+#endif	/* !_VMBUS_H_ */


Property changes on: trunk/sys/dev/hyperv/include/vmbus.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/include/vmbus_xact.h
===================================================================
--- trunk/sys/dev/hyperv/include/vmbus_xact.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/include/vmbus_xact.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,67 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/include/vmbus_xact.h 310761 2016-12-29 07:27:13Z sephe $
+ */
+
+#ifndef _VMBUS_XACT_H_
+#define _VMBUS_XACT_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+struct vmbus_xact;
+struct vmbus_xact_ctx;
+
+struct vmbus_xact_ctx	*vmbus_xact_ctx_create(bus_dma_tag_t dtag,
+			    size_t req_size, size_t resp_size,
+			    size_t priv_size);
+void			vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx);
+bool			vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx);
+
+struct vmbus_xact	*vmbus_xact_get(struct vmbus_xact_ctx *ctx,
+			    size_t req_len);
+void			vmbus_xact_put(struct vmbus_xact *xact);
+
+void			*vmbus_xact_req_data(const struct vmbus_xact *xact);
+bus_addr_t		vmbus_xact_req_paddr(const struct vmbus_xact *xact);
+void			*vmbus_xact_priv(const struct vmbus_xact *xact,
+			    size_t priv_len);
+void			vmbus_xact_activate(struct vmbus_xact *xact);
+void			vmbus_xact_deactivate(struct vmbus_xact *xact);
+const void		*vmbus_xact_wait(struct vmbus_xact *xact,
+			    size_t *resp_len);
+const void		*vmbus_xact_busywait(struct vmbus_xact *xact,
+			    size_t *resp_len);
+const void		*vmbus_xact_poll(struct vmbus_xact *xact,
+			    size_t *resp_len);
+void			vmbus_xact_wakeup(struct vmbus_xact *xact,
+			    const void *data, size_t dlen);
+void			vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx,
+			    const void *data, size_t dlen);
+
+#endif	/* !_VMBUS_XACT_H_ */


Property changes on: trunk/sys/dev/hyperv/include/vmbus_xact.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/input/hv_kbd.c
===================================================================
--- trunk/sys/dev/hyperv/input/hv_kbd.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/input/hv_kbd.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,565 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/input/hv_kbd.c 317823 2017-05-05 06:00:32Z sephe $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/taskqueue.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/kthread.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/sema.h>
+#include <sys/signal.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/mutex.h>
+#include <sys/callout.h>
+
+#include <sys/kbio.h>
+#include <dev/kbd/kbdreg.h>
+#include <dev/kbd/kbdtables.h>
+
+#include "dev/hyperv/input/hv_kbdc.h"
+
+#define HVKBD_MTX_LOCK(_m) do {		\
+	mtx_lock(_m);			\
+} while (0)
+
+#define HVKBD_MTX_UNLOCK(_m) do {	\
+	mtx_unlock(_m);			\
+} while (0)
+
+#define HVKBD_MTX_ASSERT(_m, _t) do {	\
+	mtx_assert(_m, _t);		\
+} while (0)
+
+#define	HVKBD_LOCK()		HVKBD_MTX_LOCK(&Giant)
+#define	HVKBD_UNLOCK()		HVKBD_MTX_UNLOCK(&Giant)
+#define	HVKBD_LOCK_ASSERT()	HVKBD_MTX_ASSERT(&Giant, MA_OWNED)
+
+#define HVKBD_FLAG_POLLING	0x00000002
+
+/* early keyboard probe, not supported */
+static int
+hvkbd_configure(int flags)
+{
+	return (0);
+}
+
+/* detect a keyboard, not used */
+static int
+hvkbd_probe(int unit, void *arg, int flags)
+{
+	return (ENXIO);
+}
+
+/* reset and initialize the device, not used */
+static int
+hvkbd_init(int unit, keyboard_t **kbdp, void *arg, int flags)
+{
+	DEBUG_HVKBD(*kbdp, "%s\n", __func__);
+	return (ENXIO);
+}
+
+/* test the interface to the device, not used */
+static int
+hvkbd_test_if(keyboard_t *kbd)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (0);
+}
+
+/* finish using this keyboard, not used */
+static int
+hvkbd_term(keyboard_t *kbd)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (ENXIO);
+}
+
+/* keyboard interrupt routine, not used */
+static int
+hvkbd_intr(keyboard_t *kbd, void *arg)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (0);
+}
+
+/* lock the access to the keyboard, not used */
+static int
+hvkbd_lock(keyboard_t *kbd, int lock)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (1);
+}
+
+/* save the internal state, not used */
+static int
+hvkbd_get_state(keyboard_t *kbd, void *buf, size_t len)
+{
+	DEBUG_HVKBD(kbd,"%s\n",  __func__);
+	return (len == 0) ? 1 : -1;
+}
+
+/* set the internal state, not used */
+static int
+hvkbd_set_state(keyboard_t *kbd, void *buf, size_t len)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (EINVAL);
+}
+
+static int
+hvkbd_poll(keyboard_t *kbd, int on)
+{
+	hv_kbd_sc *sc = kbd->kb_data;
+
+	HVKBD_LOCK();
+	/*
+	 * Keep a reference count on polling to allow recursive
+	 * cngrab() during a panic for example.
+	 */
+	if (on)
+		sc->sc_polling++;
+	else if (sc->sc_polling > 0)
+		sc->sc_polling--;
+
+	if (sc->sc_polling != 0) {
+		sc->sc_flags |= HVKBD_FLAG_POLLING;
+	} else {
+		sc->sc_flags &= ~HVKBD_FLAG_POLLING;
+	}
+	HVKBD_UNLOCK();
+	return (0);
+}
+
+/*
+ * Enable the access to the device; until this function is called,
+ * the client cannot read from the keyboard.
+ */
+static int
+hvkbd_enable(keyboard_t *kbd)
+{
+	HVKBD_LOCK();
+	KBD_ACTIVATE(kbd);
+	HVKBD_UNLOCK();
+	return (0);
+}
+
+/* disallow the access to the device */
+static int
+hvkbd_disable(keyboard_t *kbd)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	HVKBD_LOCK();
+	KBD_DEACTIVATE(kbd);
+	HVKBD_UNLOCK();
+	return (0);
+}
+
+static void
+hvkbd_do_poll(hv_kbd_sc *sc, uint8_t wait)
+{
+	while (!hv_kbd_prod_is_ready(sc)) {
+		hv_kbd_read_channel(sc->hs_chan, sc);
+		if (!wait)
+			break;
+	}
+}
+
+/* check if data is waiting */
+/* Currently unused. */
+static int
+hvkbd_check(keyboard_t *kbd)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	return (0);
+}
+
+/* check if char is waiting */
+static int
+hvkbd_check_char_locked(keyboard_t *kbd)
+{
+	HVKBD_LOCK_ASSERT();
+	if (!KBD_IS_ACTIVE(kbd))
+		return (FALSE);
+
+	hv_kbd_sc *sc = kbd->kb_data;
+	if (sc->sc_flags & HVKBD_FLAG_POLLING)
+		hvkbd_do_poll(sc, 0);
+	if (hv_kbd_prod_is_ready(sc)) {
+		return (TRUE);
+	}
+	return (FALSE);
+}
+
+static int
+hvkbd_check_char(keyboard_t *kbd)
+{
+	int result;
+
+	HVKBD_LOCK();
+	result = hvkbd_check_char_locked(kbd);
+	HVKBD_UNLOCK();
+
+	return (result);
+}
+
+/* read char from the keyboard */
+static uint32_t
+hvkbd_read_char_locked(keyboard_t *kbd, int wait)
+{
+	uint32_t scancode = NOKEY;
+	keystroke ks;
+	hv_kbd_sc *sc = kbd->kb_data;
+	HVKBD_LOCK_ASSERT();
+
+	if (!KBD_IS_ACTIVE(kbd) || !hv_kbd_prod_is_ready(sc))
+		return (NOKEY);
+	if (sc->sc_mode == K_RAW) {
+		if (hv_kbd_fetch_top(sc, &ks)) {
+			return (NOKEY);
+		}
+		if ((ks.info & IS_E0) || (ks.info & IS_E1)) {
+			/**
+			 * Emulate the generation of E0 or E1 scancode,
+			 * the real scancode will be consumed next time.
+			 */
+			if (ks.info & IS_E0) {
+				scancode = XTKBD_EMUL0;
+				ks.info &= ~IS_E0;
+			} else if (ks.info & IS_E1) {
+				scancode = XTKBD_EMUL1;
+				ks.info &= ~IS_E1;
+			}
+			/**
+			 * Change the top item to avoid encountering
+			 * E0 or E1 twice.
+			 */
+			hv_kbd_modify_top(sc, &ks);
+		} else if (ks.info & IS_UNICODE) {
+			/**
+			 * XXX: Hyperv host send unicode to VM through
+			 * 'Type clipboard text', the mapping from
+			 * unicode to scancode depends on the keymap.
+			 * It is so complicated that we do not plan to
+			 * support it yet.
+			 */
+			if (bootverbose)
+				device_printf(sc->dev, "Unsupported unicode\n");
+			hv_kbd_remove_top(sc);
+			return (NOKEY);
+		} else {
+			scancode = ks.makecode;
+			if (ks.info & IS_BREAK) {
+				scancode |= XTKBD_RELEASE;
+			}
+			hv_kbd_remove_top(sc);
+		}
+	} else {
+		if (bootverbose)
+			device_printf(sc->dev, "Unsupported mode: %d\n", sc->sc_mode);
+	}
+	++kbd->kb_count;
+	DEBUG_HVKBD(kbd, "read scan: 0x%x\n", scancode);
+	return scancode;
+}
+
+/* Currently wait is always false. */
+static uint32_t
+hvkbd_read_char(keyboard_t *kbd, int wait)
+{
+	uint32_t keycode;
+
+	HVKBD_LOCK();
+	keycode = hvkbd_read_char_locked(kbd, wait);
+	HVKBD_UNLOCK();
+
+	return (keycode);
+}
+
+/* clear the internal state of the keyboard */
+static void
+hvkbd_clear_state(keyboard_t *kbd)
+{
+	hv_kbd_sc *sc = kbd->kb_data;
+	sc->sc_state &= LOCK_MASK;	/* preserve locking key state */
+	sc->sc_flags &= ~HVKBD_FLAG_POLLING;
+}
+
+static int
+hvkbd_ioctl_locked(keyboard_t *kbd, u_long cmd, caddr_t arg)
+{
+	int i;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+        int ival;
+#endif
+	hv_kbd_sc *sc = kbd->kb_data;
+	switch (cmd) {
+	case KDGKBMODE:
+		*(int *)arg = sc->sc_mode;
+		break;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+	case _IO('K', 7):
+		ival = IOCPARM_IVAL(arg);
+		arg = (caddr_t)&ival;
+		/* FALLTHROUGH */
+#endif
+	case KDSKBMODE:		/* set keyboard mode */
+		DEBUG_HVKBD(kbd, "expected mode: %x\n", *(int *)arg);
+		switch (*(int *)arg) {
+		case K_XLATE:
+			if (sc->sc_mode != K_XLATE) {
+				/* make lock key state and LED state match */
+				sc->sc_state &= ~LOCK_MASK;
+				sc->sc_state |= KBD_LED_VAL(kbd);
+			}
+			/* FALLTHROUGH */
+		case K_RAW:
+		case K_CODE:
+			if (sc->sc_mode != *(int *)arg) {
+				DEBUG_HVKBD(kbd, "mod changed to %x\n", *(int *)arg);
+				if ((sc->sc_flags & HVKBD_FLAG_POLLING) == 0)
+					hvkbd_clear_state(kbd);
+				sc->sc_mode = *(int *)arg;
+			}
+			break;
+		default:
+			return (EINVAL);
+		}
+		break;
+	case KDGKBSTATE:	/* get lock key state */
+		*(int *)arg = sc->sc_state & LOCK_MASK;
+		break;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+	case _IO('K', 20):
+		ival = IOCPARM_IVAL(arg);
+		arg = (caddr_t)&ival;
+		/* FALLTHROUGH */
+#endif
+	case KDSKBSTATE:		/* set lock key state */
+		if (*(int *)arg & ~LOCK_MASK) {
+			return (EINVAL);
+		}
+		sc->sc_state &= ~LOCK_MASK;
+		sc->sc_state |= *(int *)arg;
+		return hvkbd_ioctl_locked(kbd, KDSETLED, arg);
+	case KDGETLED:			/* get keyboard LED */
+		*(int *)arg = KBD_LED_VAL(kbd);
+		break;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+	case _IO('K', 66):
+		ival = IOCPARM_IVAL(arg);
+		arg = (caddr_t)&ival;
+		/* FALLTHROUGH */
+#endif
+	case KDSETLED:			/* set keyboard LED */
+		/* NOTE: lock key state in "sc_state" won't be changed */
+		if (*(int *)arg & ~LOCK_MASK)
+			return (EINVAL);
+
+		i = *(int *)arg;
+
+		/* replace CAPS LED with ALTGR LED for ALTGR keyboards */
+		if (sc->sc_mode == K_XLATE &&
+		    kbd->kb_keymap->n_keys > ALTGR_OFFSET) {
+			if (i & ALKED)
+				i |= CLKED;
+			else
+				i &= ~CLKED;
+		}
+		if (KBD_HAS_DEVICE(kbd)) {
+			DEBUG_HVSC(sc, "setled 0x%x\n", *(int *)arg);
+		}
+
+		KBD_LED_VAL(kbd) = *(int *)arg;
+		break;
+	default:
+		return (genkbd_commonioctl(kbd, cmd, arg));
+	}
+	return (0);
+}
+
+/* some useful control functions */
+static int
+hvkbd_ioctl(keyboard_t *kbd, u_long cmd, caddr_t arg)
+{
+	DEBUG_HVKBD(kbd, "%s: %lx start\n", __func__, cmd);
+	HVKBD_LOCK();
+	int ret = hvkbd_ioctl_locked(kbd, cmd, arg);
+	HVKBD_UNLOCK();
+	DEBUG_HVKBD(kbd, "%s: %lx end %d\n", __func__, cmd, ret);
+	return (ret);
+}
+
+/* read one byte from the keyboard if it's allowed */
+/* Currently unused. */
+static int
+hvkbd_read(keyboard_t *kbd, int wait)
+{
+	DEBUG_HVKBD(kbd, "%s\n", __func__);
+	HVKBD_LOCK_ASSERT();
+	if (!KBD_IS_ACTIVE(kbd))
+		return (-1);
+	return hvkbd_read_char_locked(kbd, wait);
+}
+
+static keyboard_switch_t hvkbdsw = {
+	hvkbd_probe,		/* not used */
+	hvkbd_init,
+	hvkbd_term,		/* not used */
+	hvkbd_intr,		/* not used */
+	hvkbd_test_if,		/* not used */
+	hvkbd_enable,
+	hvkbd_disable,
+	hvkbd_read,
+	hvkbd_check,
+	hvkbd_read_char,
+	hvkbd_check_char,
+	hvkbd_ioctl,
+	hvkbd_lock,		/* not used */
+	hvkbd_clear_state,
+	hvkbd_get_state,	/* not used */
+	hvkbd_set_state,	/* not used */
+	genkbd_get_fkeystr,
+	hvkbd_poll,
+	genkbd_diag,
+};
+
+KEYBOARD_DRIVER(hvkbd, hvkbdsw, hvkbd_configure);
+
+void
+hv_kbd_intr(hv_kbd_sc *sc)
+{
+	uint32_t c;
+	if ((sc->sc_flags & HVKBD_FLAG_POLLING) != 0)
+		return;
+
+	if (KBD_IS_ACTIVE(&sc->sc_kbd) &&
+	    KBD_IS_BUSY(&sc->sc_kbd)) {
+		/* let the callback function process the input */
+		(sc->sc_kbd.kb_callback.kc_func) (&sc->sc_kbd, KBDIO_KEYINPUT,
+		    sc->sc_kbd.kb_callback.kc_arg);
+	} else {
+		/* read and discard the input, no one is waiting for it */
+		do {
+			c = hvkbd_read_char(&sc->sc_kbd, 0);
+		} while (c != NOKEY);
+	}
+}
+
+int
+hvkbd_driver_load(module_t mod, int what, void *arg)
+{
+	switch (what) {
+	case MOD_LOAD:
+		kbd_add_driver(&hvkbd_kbd_driver);
+		break;
+	case MOD_UNLOAD:
+		kbd_delete_driver(&hvkbd_kbd_driver);
+		break;
+	}
+	return (0);
+}
+
+int
+hv_kbd_drv_attach(device_t dev)
+{
+	hv_kbd_sc *sc = device_get_softc(dev);
+	int unit = device_get_unit(dev);
+	keyboard_t *kbd = &sc->sc_kbd;
+	keyboard_switch_t *sw;
+	sw = kbd_get_switch(HVKBD_DRIVER_NAME);
+	if (sw == NULL) {
+		return (ENXIO);
+	}
+
+	kbd_init_struct(kbd, HVKBD_DRIVER_NAME, KB_OTHER, unit, 0, 0, 0);
+	kbd->kb_data = (void *)sc;
+	kbd_set_maps(kbd, &key_map, &accent_map, fkey_tab, nitems(fkey_tab));
+	KBD_FOUND_DEVICE(kbd);
+	hvkbd_clear_state(kbd);
+	KBD_PROBE_DONE(kbd);
+	KBD_INIT_DONE(kbd);
+	sc->sc_mode = K_RAW;
+	(*sw->enable)(kbd);
+
+	if (kbd_register(kbd) < 0) {
+		goto detach;
+	}
+	KBD_CONFIG_DONE(kbd);
+#ifdef KBD_INSTALL_CDEV
+        if (kbd_attach(kbd)) {
+		goto detach;
+	}
+#endif
+	if (bootverbose) {
+		genkbd_diag(kbd, bootverbose);
+	}
+	return (0);
+detach:
+	hv_kbd_drv_detach(dev);
+	return (ENXIO);
+}
+
+int
+hv_kbd_drv_detach(device_t dev)
+{
+	int error = 0;
+	hv_kbd_sc *sc = device_get_softc(dev);
+	hvkbd_disable(&sc->sc_kbd);
+	if (KBD_IS_CONFIGURED(&sc->sc_kbd)) {
+		error = kbd_unregister(&sc->sc_kbd);
+		if (error) {
+			device_printf(dev, "WARNING: kbd_unregister() "
+			    "returned non-zero! (ignored)\n");
+		}
+	}
+#ifdef KBD_INSTALL_CDEV
+	error = kbd_detach(&sc->sc_kbd);
+#endif
+	return (error);
+}
+


Property changes on: trunk/sys/dev/hyperv/input/hv_kbd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/input/hv_kbdc.c
===================================================================
--- trunk/sys/dev/hyperv/input/hv_kbdc.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/input/hv_kbdc.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,531 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/input/hv_kbdc.c 320766 2017-07-07 09:38:40Z sephe $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/taskqueue.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/mutex.h>
+
+#include <sys/kbio.h>
+#include <dev/kbd/kbdreg.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/utilities/hv_utilreg.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+
+#include "dev/hyperv/input/hv_kbdc.h"
+#include "vmbus_if.h"
+
+#define HV_KBD_VER_MAJOR	(1)
+#define HV_KBD_VER_MINOR	(0)
+
+#define HV_KBD_VER		(HV_KBD_VER_MINOR | (HV_KBD_VER_MAJOR) << 16)
+
+#define HV_KBD_PROTO_ACCEPTED	(1)
+
+#define HV_BUFF_SIZE		(4*PAGE_SIZE)
+#define HV_KBD_RINGBUFF_SEND_SZ	(10*PAGE_SIZE)
+#define HV_KBD_RINGBUFF_RECV_SZ (10*PAGE_SIZE)
+
+enum hv_kbd_msg_type_t {
+	HV_KBD_PROTO_REQUEST        = 1,
+	HV_KBD_PROTO_RESPONSE       = 2,
+	HV_KBD_PROTO_EVENT          = 3,
+	HV_KBD_PROTO_LED_INDICATORS = 4,
+};
+
+typedef struct hv_kbd_msg_hdr_t {
+	uint32_t type;
+} hv_kbd_msg_hdr;
+
+typedef struct hv_kbd_msg_t {
+	hv_kbd_msg_hdr hdr;
+	char data[];
+} hv_kbd_msg;
+
+typedef struct hv_kbd_proto_req_t {
+	hv_kbd_msg_hdr	hdr;
+	uint32_t	ver;
+} hv_kbd_proto_req;
+
+typedef struct hv_kbd_proto_resp_t {
+	hv_kbd_msg_hdr  hdr;
+	uint32_t	status;
+} hv_kbd_proto_resp;
+
+#define HV_KBD_PROTO_REQ_SZ	(sizeof(hv_kbd_proto_req))
+#define HV_KBD_PROTO_RESP_SZ	(sizeof(hv_kbd_proto_resp))
+
+/**
+ * the struct in win host:
+ * typedef struct _HK_MESSAGE_KEYSTROKE
+ * {
+ *     HK_MESSAGE_HEADER Header;
+ *     UINT16 MakeCode;
+ *     UINT32 IsUnicode:1;
+ *     UINT32 IsBreak:1;
+ *     UINT32 IsE0:1;
+ *     UINT32 IsE1:1;
+ *     UINT32 Reserved:28;
+ * } HK_MESSAGE_KEYSTROKE
+ */
+typedef struct hv_kbd_keystroke_t {
+	hv_kbd_msg_hdr  hdr;
+	keystroke	ks;
+} hv_kbd_keystroke;
+
+static const struct vmbus_ic_desc vmbus_kbd_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48,
+		    0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76,  0x84} },
+		.ic_desc = "Hyper-V KBD"
+	},
+	VMBUS_IC_DESC_END
+};
+
+static int hv_kbd_attach(device_t dev);
+static int hv_kbd_detach(device_t dev);
+
+/**
+ * return 1 if producer is ready
+ */
+int
+hv_kbd_prod_is_ready(hv_kbd_sc *sc)
+{
+	int ret;
+	mtx_lock(&sc->ks_mtx);
+	ret = !STAILQ_EMPTY(&sc->ks_queue);
+	mtx_unlock(&sc->ks_mtx);
+	return (ret);
+}
+
+int
+hv_kbd_produce_ks(hv_kbd_sc *sc, const keystroke *ks)
+{
+	int ret = 0;
+	keystroke_info *ksi;
+	mtx_lock(&sc->ks_mtx);
+	if (LIST_EMPTY(&sc->ks_free_list)) {
+		DEBUG_HVSC(sc, "NO buffer!\n");
+		ret = 1;
+	} else {
+		ksi = LIST_FIRST(&sc->ks_free_list);
+		LIST_REMOVE(ksi, link);
+		ksi->ks = *ks;
+		STAILQ_INSERT_TAIL(&sc->ks_queue, ksi, slink);
+	}
+	mtx_unlock(&sc->ks_mtx);
+	return (ret);
+}
+
+/**
+ * return 0 if successfully get the 1st item of queue without removing it
+ */
+int
+hv_kbd_fetch_top(hv_kbd_sc *sc, keystroke *result)
+{
+	int ret = 0;
+	keystroke_info *ksi = NULL;
+	mtx_lock(&sc->ks_mtx);
+	if (STAILQ_EMPTY(&sc->ks_queue)) {
+		DEBUG_HVSC(sc, "Empty queue!\n");
+		ret = 1;
+	} else {
+		ksi = STAILQ_FIRST(&sc->ks_queue);
+		*result = ksi->ks;
+	}
+	mtx_unlock(&sc->ks_mtx);
+	return (ret);
+}
+
+/**
+ * return 0 if successfully removing the top item
+ */
+int
+hv_kbd_remove_top(hv_kbd_sc *sc)
+{
+	int ret = 0;
+	keystroke_info *ksi = NULL;
+	mtx_lock(&sc->ks_mtx);
+	if (STAILQ_EMPTY(&sc->ks_queue)) {
+		DEBUG_HVSC(sc, "Empty queue!\n");
+		ret = 1;
+	} else {
+		ksi = STAILQ_FIRST(&sc->ks_queue);
+		STAILQ_REMOVE_HEAD(&sc->ks_queue, slink);
+		LIST_INSERT_HEAD(&sc->ks_free_list, ksi, link);
+	}
+	mtx_unlock(&sc->ks_mtx);
+	return (ret);
+}
+
+/**
+ * return 0 if successfully modify the 1st item of queue
+ */
+int
+hv_kbd_modify_top(hv_kbd_sc *sc, keystroke *top)
+{
+	int ret = 0;
+	keystroke_info *ksi = NULL;
+	mtx_lock(&sc->ks_mtx);
+	if (STAILQ_EMPTY(&sc->ks_queue)) {
+		DEBUG_HVSC(sc, "Empty queue!\n");
+		ret = 1;
+	} else {
+		ksi = STAILQ_FIRST(&sc->ks_queue);
+		ksi->ks = *top;
+	}
+	mtx_unlock(&sc->ks_mtx);
+	return (ret);
+}
+
+static int
+hv_kbd_probe(device_t dev)
+{
+	device_t bus = device_get_parent(dev);
+	const struct vmbus_ic_desc *d;
+
+	if (resource_disabled(device_get_name(dev), 0))
+		return (ENXIO);
+
+	for (d = vmbus_kbd_descs; d->ic_desc != NULL; ++d) {
+		if (VMBUS_PROBE_GUID(bus, dev, &d->ic_guid) == 0) {
+			device_set_desc(dev, d->ic_desc);
+			return (BUS_PROBE_DEFAULT);
+		}
+	}
+	return (ENXIO);
+}
+
+static void
+hv_kbd_on_response(hv_kbd_sc *sc, struct vmbus_chanpkt_hdr *pkt)
+{
+	struct vmbus_xact_ctx *xact = sc->hs_xact_ctx;
+	if (xact != NULL) {
+		DEBUG_HVSC(sc, "hvkbd is ready\n");
+		vmbus_xact_ctx_wakeup(xact, VMBUS_CHANPKT_CONST_DATA(pkt),
+		    VMBUS_CHANPKT_DATALEN(pkt));
+	}
+}
+
+static void
+hv_kbd_on_received(hv_kbd_sc *sc, struct vmbus_chanpkt_hdr *pkt)
+{
+
+	const hv_kbd_msg *msg = VMBUS_CHANPKT_CONST_DATA(pkt);
+	const hv_kbd_proto_resp *resp =
+	    VMBUS_CHANPKT_CONST_DATA(pkt);
+	const hv_kbd_keystroke *keystroke =
+	    VMBUS_CHANPKT_CONST_DATA(pkt);
+	uint32_t msg_len = VMBUS_CHANPKT_DATALEN(pkt);
+	enum hv_kbd_msg_type_t msg_type;
+	uint32_t info;
+	uint16_t scan_code;
+
+	if (msg_len <= sizeof(hv_kbd_msg)) {
+		device_printf(sc->dev, "Illegal packet\n");
+		return;
+	}
+	msg_type = msg->hdr.type;
+	switch (msg_type) {
+		case HV_KBD_PROTO_RESPONSE:
+			hv_kbd_on_response(sc, pkt);
+			DEBUG_HVSC(sc, "keyboard resp: 0x%x\n",
+			    resp->status);
+			break;
+		case HV_KBD_PROTO_EVENT:
+			info = keystroke->ks.info;
+			scan_code = keystroke->ks.makecode;
+			DEBUG_HVSC(sc, "keystroke info: 0x%x, scan: 0x%x\n",
+			    info, scan_code);
+			hv_kbd_produce_ks(sc, &keystroke->ks);
+			hv_kbd_intr(sc);
+		default:
+			break;
+	}
+}
+
+void 
+hv_kbd_read_channel(struct vmbus_channel *channel, void *context)
+{
+	uint8_t *buf;
+	uint32_t buflen = 0;
+	int ret = 0;
+
+	hv_kbd_sc *sc = (hv_kbd_sc*)context;
+	buf = sc->buf;
+	buflen = sc->buflen;
+	for (;;) {
+		struct vmbus_chanpkt_hdr *pkt = (struct vmbus_chanpkt_hdr *)buf;
+		uint32_t rxed = buflen;
+
+		ret = vmbus_chan_recv_pkt(channel, pkt, &rxed);
+		if (__predict_false(ret == ENOBUFS)) {
+			buflen = sc->buflen * 2;
+			while (buflen < rxed)
+				buflen *= 2;
+			buf = malloc(buflen, M_DEVBUF, M_WAITOK | M_ZERO);
+			device_printf(sc->dev, "expand recvbuf %d -> %d\n",
+			    sc->buflen, buflen);
+			free(sc->buf, M_DEVBUF);
+			sc->buf = buf;
+			sc->buflen = buflen;
+			continue;
+		} else if (__predict_false(ret == EAGAIN)) {
+			/* No more channel packets; done! */
+			break;
+		}
+		KASSERT(!ret, ("vmbus_chan_recv_pkt failed: %d", ret));
+
+		DEBUG_HVSC(sc, "event: 0x%x\n", pkt->cph_type);
+		switch (pkt->cph_type) {
+		case VMBUS_CHANPKT_TYPE_COMP:
+		case VMBUS_CHANPKT_TYPE_RXBUF:
+			device_printf(sc->dev, "unhandled event: %d\n",
+			    pkt->cph_type);
+			break;
+		case VMBUS_CHANPKT_TYPE_INBAND:
+			hv_kbd_on_received(sc, pkt);
+			break;
+		default:
+			device_printf(sc->dev, "unknown event: %d\n",
+			    pkt->cph_type);
+			break;
+		}
+	}
+}
+
+static int
+hv_kbd_connect_vsp(hv_kbd_sc *sc)
+{
+	int ret;
+	size_t resplen;
+	struct vmbus_xact *xact;
+	hv_kbd_proto_req *req;
+	const hv_kbd_proto_resp *resp;
+
+	xact = vmbus_xact_get(sc->hs_xact_ctx, sizeof(*req));
+	if (xact == NULL) {
+		device_printf(sc->dev, "no xact for kbd init");
+		return (ENODEV);
+	}
+	req = vmbus_xact_req_data(xact);
+	req->hdr.type = HV_KBD_PROTO_REQUEST;
+	req->ver = HV_KBD_VER;
+
+	vmbus_xact_activate(xact);
+	ret = vmbus_chan_send(sc->hs_chan,
+		VMBUS_CHANPKT_TYPE_INBAND,
+		VMBUS_CHANPKT_FLAG_RC,
+		req, sizeof(hv_kbd_proto_req),
+		(uint64_t)(uintptr_t)xact);
+	if (ret) {
+		device_printf(sc->dev, "fail to send\n");
+		vmbus_xact_deactivate(xact);
+		return (ret);
+	}
+	resp = vmbus_chan_xact_wait(sc->hs_chan, xact, &resplen, true);
+	if (resplen < HV_KBD_PROTO_RESP_SZ) {
+		device_printf(sc->dev, "hv_kbd init communicate failed\n");
+		ret = ENODEV;
+		goto clean;
+	}
+
+	if (!(resp->status & HV_KBD_PROTO_ACCEPTED)) {
+		device_printf(sc->dev, "hv_kbd protocol request failed\n");
+		ret = ENODEV;
+	}
+clean:
+	vmbus_xact_put(xact);
+	DEBUG_HVSC(sc, "finish connect vsp\n");
+	return (ret);
+}
+
+static int
+hv_kbd_attach1(device_t dev, vmbus_chan_callback_t cb)
+{
+	int ret;
+	hv_kbd_sc *sc;
+
+        sc = device_get_softc(dev);
+	sc->buflen = HV_BUFF_SIZE;
+	sc->buf = malloc(sc->buflen, M_DEVBUF, M_WAITOK | M_ZERO);
+	vmbus_chan_set_readbatch(sc->hs_chan, false);
+	ret = vmbus_chan_open(
+		sc->hs_chan,
+		HV_KBD_RINGBUFF_SEND_SZ,
+		HV_KBD_RINGBUFF_RECV_SZ,
+		NULL, 0,
+		cb,
+		sc);
+	if (ret != 0) {
+		free(sc->buf, M_DEVBUF);
+	}
+	return (ret);
+}
+
+static int
+hv_kbd_detach1(device_t dev)
+{
+	hv_kbd_sc *sc = device_get_softc(dev);
+	vmbus_chan_close(vmbus_get_channel(dev));
+	free(sc->buf, M_DEVBUF);
+	return (0);
+}
+
+static void
+hv_kbd_init(hv_kbd_sc *sc)
+{
+	const int max_list = 16;
+	int i;
+	keystroke_info *ksi;
+
+	mtx_init(&sc->ks_mtx, "hv_kbdc mutex", NULL, MTX_DEF);
+	LIST_INIT(&sc->ks_free_list);
+	STAILQ_INIT(&sc->ks_queue);
+	for (i = 0; i < max_list; i++) {
+		ksi = malloc(sizeof(keystroke_info),
+		    M_DEVBUF, M_WAITOK|M_ZERO);
+		LIST_INSERT_HEAD(&sc->ks_free_list, ksi, link);
+	}
+}
+
+static void
+hv_kbd_fini(hv_kbd_sc *sc)
+{
+	keystroke_info *ksi;
+	while (!LIST_EMPTY(&sc->ks_free_list)) {
+		ksi = LIST_FIRST(&sc->ks_free_list);
+		LIST_REMOVE(ksi, link);
+		free(ksi, M_DEVBUF);
+	}
+	while (!STAILQ_EMPTY(&sc->ks_queue)) {
+		ksi = STAILQ_FIRST(&sc->ks_queue);
+		STAILQ_REMOVE_HEAD(&sc->ks_queue, slink);
+		free(ksi, M_DEVBUF);
+	}
+	mtx_destroy(&sc->ks_mtx);
+}
+
+static void
+hv_kbd_sysctl(device_t dev)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	hv_kbd_sc *sc;
+
+	sc = device_get_softc(dev);
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
+	    &sc->debug, 0, "debug hyperv keyboard");
+}
+
+static int
+hv_kbd_attach(device_t dev)
+{
+	int error = 0;
+	hv_kbd_sc *sc;
+
+	sc = device_get_softc(dev);
+	sc->hs_chan = vmbus_get_channel(dev);
+	sc->dev = dev;
+	hv_kbd_init(sc);
+	sc->hs_xact_ctx = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
+	    HV_KBD_PROTO_REQ_SZ, HV_KBD_PROTO_RESP_SZ, 0);
+	if (sc->hs_xact_ctx == NULL) {
+		error = ENOMEM;
+		goto failed;
+	}
+
+	error = hv_kbd_attach1(dev, hv_kbd_read_channel);
+	if (error)
+		goto failed;
+	error = hv_kbd_connect_vsp(sc);
+	if (error)
+		goto failed;
+
+	error = hv_kbd_drv_attach(dev);
+	if (error)
+		goto failed;
+	hv_kbd_sysctl(dev);
+	return (0);
+failed:
+	hv_kbd_detach(dev);
+	return (error);
+}
+
+static int
+hv_kbd_detach(device_t dev)
+{
+	int ret;
+	hv_kbd_sc *sc = device_get_softc(dev);
+	hv_kbd_fini(sc);
+	if (sc->hs_xact_ctx != NULL)
+		vmbus_xact_ctx_destroy(sc->hs_xact_ctx);
+	ret = hv_kbd_detach1(dev);
+	if (!ret)
+		device_printf(dev, "Fail to detach\n");
+	return hv_kbd_drv_detach(dev);
+}
+
+static device_method_t kbd_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_kbd_probe),
+	DEVMETHOD(device_attach, hv_kbd_attach),
+	DEVMETHOD(device_detach, hv_kbd_detach),
+	{ 0, 0 }
+};
+
+static driver_t kbd_driver = {HVKBD_DRIVER_NAME , kbd_methods, sizeof(hv_kbd_sc)};
+
+static devclass_t kbd_devclass;
+
+DRIVER_MODULE(hv_kbd, vmbus, kbd_driver, kbd_devclass, hvkbd_driver_load, NULL);
+MODULE_VERSION(hv_kbd, 1);
+MODULE_DEPEND(hv_kbd, vmbus, 1, 1, 1);


Property changes on: trunk/sys/dev/hyperv/input/hv_kbdc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/input/hv_kbdc.h
===================================================================
--- trunk/sys/dev/hyperv/input/hv_kbdc.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/input/hv_kbdc.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,105 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/input/hv_kbdc.h 316515 2017-04-05 05:01:23Z sephe $
+ */
+
+#ifndef _HV_KBD_H
+#define _HV_KBD_H
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+
+#include <dev/kbd/kbdreg.h>
+
+#define HVKBD_DRIVER_NAME	"hvkbd"
+#define IS_UNICODE		(1)
+#define IS_BREAK		(2)
+#define IS_E0			(4)
+#define IS_E1			(8)
+
+#define XTKBD_EMUL0		(0xe0)
+#define XTKBD_EMUL1		(0xe1)
+#define XTKBD_RELEASE		(0x80)
+
+#define DEBUG_HVSC(sc, ...) do {			\
+	if (sc->debug > 0) {				\
+		device_printf(sc->dev, __VA_ARGS__);	\
+	}						\
+} while (0)
+#define DEBUG_HVKBD(kbd, ...) do {			\
+	hv_kbd_sc *sc = (kbd)->kb_data;			\
+	DEBUG_HVSC(sc, __VA_ARGS__);				\
+} while (0)
+
+struct vmbus_channel;
+struct vmbus_xact_ctx;
+
+typedef struct keystroke_t {
+	uint16_t			makecode;
+	uint32_t			info;
+} keystroke;
+
+typedef struct keystroke_info {
+	LIST_ENTRY(keystroke_info)	link;
+	STAILQ_ENTRY(keystroke_info)	slink;
+	keystroke			ks;
+} keystroke_info;
+
+typedef struct hv_kbd_sc_t {
+	struct vmbus_channel		*hs_chan;
+	device_t			dev;
+	struct vmbus_xact_ctx		*hs_xact_ctx;
+	int32_t				buflen;
+	uint8_t				*buf;
+
+	struct mtx			ks_mtx;
+	LIST_HEAD(, keystroke_info)	ks_free_list;
+	STAILQ_HEAD(, keystroke_info)	ks_queue;	/* keystroke info queue */
+
+	keyboard_t			sc_kbd;
+	int				sc_mode;
+	int				sc_state;
+	int				sc_polling;	/* polling recursion count */
+	uint32_t			sc_flags;
+	int				debug;
+} hv_kbd_sc;
+
+int	hv_kbd_produce_ks(hv_kbd_sc *sc, const keystroke *ks);
+int	hv_kbd_fetch_top(hv_kbd_sc *sc, keystroke *top);
+int	hv_kbd_modify_top(hv_kbd_sc *sc, keystroke *top);
+int	hv_kbd_remove_top(hv_kbd_sc *sc);
+int	hv_kbd_prod_is_ready(hv_kbd_sc *sc);
+void	hv_kbd_read_channel(struct vmbus_channel *, void *);
+
+int	hv_kbd_drv_attach(device_t dev);
+int	hv_kbd_drv_detach(device_t dev);
+
+int	hvkbd_driver_load(module_t, int, void *);
+void	hv_kbd_intr(hv_kbd_sc *sc);
+#endif


Property changes on: trunk/sys/dev/hyperv/input/hv_kbdc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/hn_nvs.c
===================================================================
--- trunk/sys/dev/hyperv/netvsc/hn_nvs.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/hn_nvs.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,747 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Network Virtualization Service.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/hn_nvs.c 324574 2017-10-13 02:26:39Z sephe $");
+
+#include "opt_inet6.h"
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_media.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp_lro.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+
+#include <dev/hyperv/netvsc/ndis.h>
+#include <dev/hyperv/netvsc/if_hnreg.h>
+#include <dev/hyperv/netvsc/if_hnvar.h>
+#include <dev/hyperv/netvsc/hn_nvs.h>
+
+static int			hn_nvs_conn_chim(struct hn_softc *);
+static int			hn_nvs_conn_rxbuf(struct hn_softc *);
+static void			hn_nvs_disconn_chim(struct hn_softc *);
+static void			hn_nvs_disconn_rxbuf(struct hn_softc *);
+static int			hn_nvs_conf_ndis(struct hn_softc *, int);
+static int			hn_nvs_init_ndis(struct hn_softc *);
+static int			hn_nvs_doinit(struct hn_softc *, uint32_t);
+static int			hn_nvs_init(struct hn_softc *);
+static const void		*hn_nvs_xact_execute(struct hn_softc *,
+				    struct vmbus_xact *, void *, int,
+				    size_t *, uint32_t);
+static void			hn_nvs_sent_none(struct hn_nvs_sendctx *,
+				    struct hn_softc *, struct vmbus_channel *,
+				    const void *, int);
+
+struct hn_nvs_sendctx		hn_nvs_sendctx_none =
+    HN_NVS_SENDCTX_INITIALIZER(hn_nvs_sent_none, NULL);
+
+static const uint32_t		hn_nvs_version[] = {
+	HN_NVS_VERSION_5,
+	HN_NVS_VERSION_4,
+	HN_NVS_VERSION_2,
+	HN_NVS_VERSION_1
+};
+
+static const void *
+hn_nvs_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact,
+    void *req, int reqlen, size_t *resplen0, uint32_t type)
+{
+	struct hn_nvs_sendctx sndc;
+	size_t resplen, min_resplen = *resplen0;
+	const struct hn_nvs_hdr *hdr;
+	int error;
+
+	KASSERT(min_resplen >= sizeof(*hdr),
+	    ("invalid minimum response len %zu", min_resplen));
+
+	/*
+	 * Execute the xact setup by the caller.
+	 */
+	hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
+
+	vmbus_xact_activate(xact);
+	error = hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_RC,
+	    req, reqlen, &sndc);
+	if (error) {
+		vmbus_xact_deactivate(xact);
+		return (NULL);
+	}
+	hdr = vmbus_chan_xact_wait(sc->hn_prichan, xact, &resplen,
+	    HN_CAN_SLEEP(sc));
+
+	/*
+	 * Check this NVS response message.
+	 */
+	if (resplen < min_resplen) {
+		if_printf(sc->hn_ifp, "invalid NVS resp len %zu\n", resplen);
+		return (NULL);
+	}
+	if (hdr->nvs_type != type) {
+		if_printf(sc->hn_ifp, "unexpected NVS resp 0x%08x, "
+		    "expect 0x%08x\n", hdr->nvs_type, type);
+		return (NULL);
+	}
+	/* All pass! */
+	*resplen0 = resplen;
+	return (hdr);
+}
+
+static __inline int
+hn_nvs_req_send(struct hn_softc *sc, void *req, int reqlen)
+{
+
+	return (hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_NONE,
+	    req, reqlen, &hn_nvs_sendctx_none));
+}
+
+static int 
+hn_nvs_conn_rxbuf(struct hn_softc *sc)
+{
+	struct vmbus_xact *xact = NULL;
+	struct hn_nvs_rxbuf_conn *conn;
+	const struct hn_nvs_rxbuf_connresp *resp;
+	size_t resp_len;
+	uint32_t status;
+	int error, rxbuf_size;
+
+	/*
+	 * Limit RXBUF size for old NVS.
+	 */
+	if (sc->hn_nvs_ver <= HN_NVS_VERSION_2)
+		rxbuf_size = HN_RXBUF_SIZE_COMPAT;
+	else
+		rxbuf_size = HN_RXBUF_SIZE;
+
+	/*
+	 * Connect the RXBUF GPADL to the primary channel.
+	 *
+	 * NOTE:
+	 * Only primary channel has RXBUF connected to it.  Sub-channels
+	 * just share this RXBUF.
+	 */
+	error = vmbus_chan_gpadl_connect(sc->hn_prichan,
+	    sc->hn_rxbuf_dma.hv_paddr, rxbuf_size, &sc->hn_rxbuf_gpadl);
+	if (error) {
+		if_printf(sc->hn_ifp, "rxbuf gpadl conn failed: %d\n",
+		    error);
+		goto cleanup;
+	}
+
+	/*
+	 * Connect RXBUF to NVS.
+	 */
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*conn));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for nvs rxbuf conn\n");
+		error = ENXIO;
+		goto cleanup;
+	}
+	conn = vmbus_xact_req_data(xact);
+	conn->nvs_type = HN_NVS_TYPE_RXBUF_CONN;
+	conn->nvs_gpadl = sc->hn_rxbuf_gpadl;
+	conn->nvs_sig = HN_NVS_RXBUF_SIG;
+
+	resp_len = sizeof(*resp);
+	resp = hn_nvs_xact_execute(sc, xact, conn, sizeof(*conn), &resp_len,
+	    HN_NVS_TYPE_RXBUF_CONNRESP);
+	if (resp == NULL) {
+		if_printf(sc->hn_ifp, "exec nvs rxbuf conn failed\n");
+		error = EIO;
+		goto cleanup;
+	}
+
+	status = resp->nvs_status;
+	vmbus_xact_put(xact);
+	xact = NULL;
+
+	if (status != HN_NVS_STATUS_OK) {
+		if_printf(sc->hn_ifp, "nvs rxbuf conn failed: %x\n", status);
+		error = EIO;
+		goto cleanup;
+	}
+	sc->hn_flags |= HN_FLAG_RXBUF_CONNECTED;
+
+	return (0);
+
+cleanup:
+	if (xact != NULL)
+		vmbus_xact_put(xact);
+	hn_nvs_disconn_rxbuf(sc);
+	return (error);
+}
+
+static int 
+hn_nvs_conn_chim(struct hn_softc *sc)
+{
+	struct vmbus_xact *xact = NULL;
+	struct hn_nvs_chim_conn *chim;
+	const struct hn_nvs_chim_connresp *resp;
+	size_t resp_len;
+	uint32_t status, sectsz;
+	int error;
+
+	/*
+	 * Connect chimney sending buffer GPADL to the primary channel.
+	 *
+	 * NOTE:
+	 * Only primary channel has chimney sending buffer connected to it.
+	 * Sub-channels just share this chimney sending buffer.
+	 */
+	error = vmbus_chan_gpadl_connect(sc->hn_prichan,
+  	    sc->hn_chim_dma.hv_paddr, HN_CHIM_SIZE, &sc->hn_chim_gpadl);
+	if (error) {
+		if_printf(sc->hn_ifp, "chim gpadl conn failed: %d\n", error);
+		goto cleanup;
+	}
+
+	/*
+	 * Connect chimney sending buffer to NVS
+	 */
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*chim));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for nvs chim conn\n");
+		error = ENXIO;
+		goto cleanup;
+	}
+	chim = vmbus_xact_req_data(xact);
+	chim->nvs_type = HN_NVS_TYPE_CHIM_CONN;
+	chim->nvs_gpadl = sc->hn_chim_gpadl;
+	chim->nvs_sig = HN_NVS_CHIM_SIG;
+
+	resp_len = sizeof(*resp);
+	resp = hn_nvs_xact_execute(sc, xact, chim, sizeof(*chim), &resp_len,
+	    HN_NVS_TYPE_CHIM_CONNRESP);
+	if (resp == NULL) {
+		if_printf(sc->hn_ifp, "exec nvs chim conn failed\n");
+		error = EIO;
+		goto cleanup;
+	}
+
+	status = resp->nvs_status;
+	sectsz = resp->nvs_sectsz;
+	vmbus_xact_put(xact);
+	xact = NULL;
+
+	if (status != HN_NVS_STATUS_OK) {
+		if_printf(sc->hn_ifp, "nvs chim conn failed: %x\n", status);
+		error = EIO;
+		goto cleanup;
+	}
+	if (sectsz == 0 || sectsz % sizeof(uint32_t) != 0) {
+		/*
+		 * Can't use chimney sending buffer; done!
+		 */
+		if (sectsz == 0) {
+			if_printf(sc->hn_ifp, "zero chimney sending buffer "
+			    "section size\n");
+		} else {
+			if_printf(sc->hn_ifp, "misaligned chimney sending "
+			    "buffers, section size: %u\n", sectsz);
+		}
+		sc->hn_chim_szmax = 0;
+		sc->hn_chim_cnt = 0;
+		sc->hn_flags |= HN_FLAG_CHIM_CONNECTED;
+		return (0);
+	}
+
+	sc->hn_chim_szmax = sectsz;
+	sc->hn_chim_cnt = HN_CHIM_SIZE / sc->hn_chim_szmax;
+	if (HN_CHIM_SIZE % sc->hn_chim_szmax != 0) {
+		if_printf(sc->hn_ifp, "chimney sending sections are "
+		    "not properly aligned\n");
+	}
+	if (sc->hn_chim_cnt % LONG_BIT != 0) {
+		if_printf(sc->hn_ifp, "discard %d chimney sending sections\n",
+		    sc->hn_chim_cnt % LONG_BIT);
+	}
+
+	sc->hn_chim_bmap_cnt = sc->hn_chim_cnt / LONG_BIT;
+	sc->hn_chim_bmap = malloc(sc->hn_chim_bmap_cnt * sizeof(u_long),
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+
+	/* Done! */
+	sc->hn_flags |= HN_FLAG_CHIM_CONNECTED;
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "chimney sending buffer %d/%d\n",
+		    sc->hn_chim_szmax, sc->hn_chim_cnt);
+	}
+	return (0);
+
+cleanup:
+	if (xact != NULL)
+		vmbus_xact_put(xact);
+	hn_nvs_disconn_chim(sc);
+	return (error);
+}
+
+static void
+hn_nvs_disconn_rxbuf(struct hn_softc *sc)
+{
+	int error;
+
+	if (sc->hn_flags & HN_FLAG_RXBUF_CONNECTED) {
+		struct hn_nvs_rxbuf_disconn disconn;
+
+		/*
+		 * Disconnect RXBUF from NVS.
+		 */
+		memset(&disconn, 0, sizeof(disconn));
+		disconn.nvs_type = HN_NVS_TYPE_RXBUF_DISCONN;
+		disconn.nvs_sig = HN_NVS_RXBUF_SIG;
+
+		/* NOTE: No response. */
+		error = hn_nvs_req_send(sc, &disconn, sizeof(disconn));
+		if (error) {
+			if_printf(sc->hn_ifp,
+			    "send nvs rxbuf disconn failed: %d\n", error);
+			/*
+			 * Fine for a revoked channel, since the hypervisor
+			 * does not drain TX bufring for a revoked channel.
+			 */
+			if (!vmbus_chan_is_revoked(sc->hn_prichan))
+				sc->hn_flags |= HN_FLAG_RXBUF_REF;
+		}
+		sc->hn_flags &= ~HN_FLAG_RXBUF_CONNECTED;
+
+		/*
+		 * Wait for the hypervisor to receive this NVS request.
+		 *
+		 * NOTE:
+		 * The TX bufring will not be drained by the hypervisor,
+		 * if the primary channel is revoked.
+		 */
+		while (!vmbus_chan_tx_empty(sc->hn_prichan) &&
+		    !vmbus_chan_is_revoked(sc->hn_prichan))
+			pause("waittx", 1);
+		/*
+		 * Linger long enough for NVS to disconnect RXBUF.
+		 */
+		pause("lingtx", (200 * hz) / 1000);
+	}
+
+	if (sc->hn_rxbuf_gpadl != 0) {
+		/*
+		 * Disconnect RXBUF from primary channel.
+		 */
+		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
+		    sc->hn_rxbuf_gpadl);
+		if (error) {
+			if_printf(sc->hn_ifp,
+			    "rxbuf gpadl disconn failed: %d\n", error);
+			sc->hn_flags |= HN_FLAG_RXBUF_REF;
+		}
+		sc->hn_rxbuf_gpadl = 0;
+	}
+}
+
+static void
+hn_nvs_disconn_chim(struct hn_softc *sc)
+{
+	int error;
+
+	if (sc->hn_flags & HN_FLAG_CHIM_CONNECTED) {
+		struct hn_nvs_chim_disconn disconn;
+
+		/*
+		 * Disconnect chimney sending buffer from NVS.
+		 */
+		memset(&disconn, 0, sizeof(disconn));
+		disconn.nvs_type = HN_NVS_TYPE_CHIM_DISCONN;
+		disconn.nvs_sig = HN_NVS_CHIM_SIG;
+
+		/* NOTE: No response. */
+		error = hn_nvs_req_send(sc, &disconn, sizeof(disconn));
+		if (error) {
+			if_printf(sc->hn_ifp,
+			    "send nvs chim disconn failed: %d\n", error);
+			/*
+			 * Fine for a revoked channel, since the hypervisor
+			 * does not drain TX bufring for a revoked channel.
+			 */
+			if (!vmbus_chan_is_revoked(sc->hn_prichan))
+				sc->hn_flags |= HN_FLAG_CHIM_REF;
+		}
+		sc->hn_flags &= ~HN_FLAG_CHIM_CONNECTED;
+
+		/*
+		 * Wait for the hypervisor to receive this NVS request.
+		 *
+		 * NOTE:
+		 * The TX bufring will not be drained by the hypervisor,
+		 * if the primary channel is revoked.
+		 */
+		while (!vmbus_chan_tx_empty(sc->hn_prichan) &&
+		    !vmbus_chan_is_revoked(sc->hn_prichan))
+			pause("waittx", 1);
+		/*
+		 * Linger long enough for NVS to disconnect chimney
+		 * sending buffer.
+		 */
+		pause("lingtx", (200 * hz) / 1000);
+	}
+
+	if (sc->hn_chim_gpadl != 0) {
+		/*
+		 * Disconnect chimney sending buffer from primary channel.
+		 */
+		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
+		    sc->hn_chim_gpadl);
+		if (error) {
+			if_printf(sc->hn_ifp,
+			    "chim gpadl disconn failed: %d\n", error);
+			sc->hn_flags |= HN_FLAG_CHIM_REF;
+		}
+		sc->hn_chim_gpadl = 0;
+	}
+
+	if (sc->hn_chim_bmap != NULL) {
+		free(sc->hn_chim_bmap, M_DEVBUF);
+		sc->hn_chim_bmap = NULL;
+		sc->hn_chim_bmap_cnt = 0;
+	}
+}
+
+static int
+hn_nvs_doinit(struct hn_softc *sc, uint32_t nvs_ver)
+{
+	struct vmbus_xact *xact;
+	struct hn_nvs_init *init;
+	const struct hn_nvs_init_resp *resp;
+	size_t resp_len;
+	uint32_t status;
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*init));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for nvs init\n");
+		return (ENXIO);
+	}
+	init = vmbus_xact_req_data(xact);
+	init->nvs_type = HN_NVS_TYPE_INIT;
+	init->nvs_ver_min = nvs_ver;
+	init->nvs_ver_max = nvs_ver;
+
+	resp_len = sizeof(*resp);
+	resp = hn_nvs_xact_execute(sc, xact, init, sizeof(*init), &resp_len,
+	    HN_NVS_TYPE_INIT_RESP);
+	if (resp == NULL) {
+		if_printf(sc->hn_ifp, "exec init failed\n");
+		vmbus_xact_put(xact);
+		return (EIO);
+	}
+
+	status = resp->nvs_status;
+	vmbus_xact_put(xact);
+
+	if (status != HN_NVS_STATUS_OK) {
+		if (bootverbose) {
+			/*
+			 * Caller may try another NVS version, and will log
+			 * error if there are no more NVS versions to try,
+			 * so don't bark out loud here.
+			 */
+			if_printf(sc->hn_ifp, "nvs init failed for ver 0x%x\n",
+			    nvs_ver);
+		}
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * Configure MTU and enable VLAN.
+ */
+static int
+hn_nvs_conf_ndis(struct hn_softc *sc, int mtu)
+{
+	struct hn_nvs_ndis_conf conf;
+	int error;
+
+	memset(&conf, 0, sizeof(conf));
+	conf.nvs_type = HN_NVS_TYPE_NDIS_CONF;
+	conf.nvs_mtu = mtu + ETHER_HDR_LEN;
+	conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN;
+	if (sc->hn_nvs_ver >= HN_NVS_VERSION_5)
+		conf.nvs_caps |= HN_NVS_NDIS_CONF_SRIOV;
+
+	/* NOTE: No response. */
+	error = hn_nvs_req_send(sc, &conf, sizeof(conf));
+	if (error) {
+		if_printf(sc->hn_ifp, "send nvs ndis conf failed: %d\n", error);
+		return (error);
+	}
+
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "nvs ndis conf done\n");
+	sc->hn_caps |= HN_CAP_MTU | HN_CAP_VLAN;
+	return (0);
+}
+
+static int
+hn_nvs_init_ndis(struct hn_softc *sc)
+{
+	struct hn_nvs_ndis_init ndis;
+	int error;
+
+	memset(&ndis, 0, sizeof(ndis));
+	ndis.nvs_type = HN_NVS_TYPE_NDIS_INIT;
+	ndis.nvs_ndis_major = HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver);
+	ndis.nvs_ndis_minor = HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver);
+
+	/* NOTE: No response. */
+	error = hn_nvs_req_send(sc, &ndis, sizeof(ndis));
+	if (error)
+		if_printf(sc->hn_ifp, "send nvs ndis init failed: %d\n", error);
+	return (error);
+}
+
+static int
+hn_nvs_init(struct hn_softc *sc)
+{
+	int i, error;
+
+	if (device_is_attached(sc->hn_dev)) {
+		/*
+		 * NVS version and NDIS version MUST NOT be changed.
+		 */
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "reinit NVS version 0x%x, "
+			    "NDIS version %u.%u\n", sc->hn_nvs_ver,
+			    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
+			    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
+		}
+
+		error = hn_nvs_doinit(sc, sc->hn_nvs_ver);
+		if (error) {
+			if_printf(sc->hn_ifp, "reinit NVS version 0x%x "
+			    "failed: %d\n", sc->hn_nvs_ver, error);
+			return (error);
+		}
+		goto done;
+	}
+
+	/*
+	 * Find the supported NVS version and set NDIS version accordingly.
+	 */
+	for (i = 0; i < nitems(hn_nvs_version); ++i) {
+		error = hn_nvs_doinit(sc, hn_nvs_version[i]);
+		if (!error) {
+			sc->hn_nvs_ver = hn_nvs_version[i];
+
+			/* Set NDIS version according to NVS version. */
+			sc->hn_ndis_ver = HN_NDIS_VERSION_6_30;
+			if (sc->hn_nvs_ver <= HN_NVS_VERSION_4)
+				sc->hn_ndis_ver = HN_NDIS_VERSION_6_1;
+
+			if (bootverbose) {
+				if_printf(sc->hn_ifp, "NVS version 0x%x, "
+				    "NDIS version %u.%u\n", sc->hn_nvs_ver,
+				    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
+				    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
+			}
+			goto done;
+		}
+	}
+	if_printf(sc->hn_ifp, "no NVS available\n");
+	return (ENXIO);
+
+done:
+	if (sc->hn_nvs_ver >= HN_NVS_VERSION_5)
+		sc->hn_caps |= HN_CAP_HASHVAL;
+	return (0);
+}
+
+int
+hn_nvs_attach(struct hn_softc *sc, int mtu)
+{
+	int error;
+
+	if (hyperv_ver_major >= 10) {
+		/* UDP 4-tuple hash is enforced. */
+		sc->hn_caps |= HN_CAP_UDPHASH;
+	}
+
+	/*
+	 * Initialize NVS.
+	 */
+	error = hn_nvs_init(sc);
+	if (error)
+		return (error);
+
+	if (sc->hn_nvs_ver >= HN_NVS_VERSION_2) {
+		/*
+		 * Configure NDIS before initializing it.
+		 */
+		error = hn_nvs_conf_ndis(sc, mtu);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Initialize NDIS.
+	 */
+	error = hn_nvs_init_ndis(sc);
+	if (error)
+		return (error);
+
+	/*
+	 * Connect RXBUF.
+	 */
+	error = hn_nvs_conn_rxbuf(sc);
+	if (error)
+		return (error);
+
+	/*
+	 * Connect chimney sending buffer.
+	 */
+	error = hn_nvs_conn_chim(sc);
+	if (error) {
+		hn_nvs_disconn_rxbuf(sc);
+		return (error);
+	}
+	return (0);
+}
+
+void
+hn_nvs_detach(struct hn_softc *sc)
+{
+
+	/* NOTE: there are no requests to stop the NVS. */
+	hn_nvs_disconn_rxbuf(sc);
+	hn_nvs_disconn_chim(sc);
+}
+
+void
+hn_nvs_sent_xact(struct hn_nvs_sendctx *sndc,
+    struct hn_softc *sc __unused, struct vmbus_channel *chan __unused,
+    const void *data, int dlen)
+{
+
+	vmbus_xact_wakeup(sndc->hn_cbarg, data, dlen);
+}
+
+static void
+hn_nvs_sent_none(struct hn_nvs_sendctx *sndc __unused,
+    struct hn_softc *sc __unused, struct vmbus_channel *chan __unused,
+    const void *data __unused, int dlen __unused)
+{
+	/* EMPTY */
+}
+
+int
+hn_nvs_alloc_subchans(struct hn_softc *sc, int *nsubch0)
+{
+	struct vmbus_xact *xact;
+	struct hn_nvs_subch_req *req;
+	const struct hn_nvs_subch_resp *resp;
+	int error, nsubch_req;
+	uint32_t nsubch;
+	size_t resp_len;
+
+	nsubch_req = *nsubch0;
+	KASSERT(nsubch_req > 0, ("invalid # of sub-channels %d", nsubch_req));
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for nvs subch alloc\n");
+		return (ENXIO);
+	}
+	req = vmbus_xact_req_data(xact);
+	req->nvs_type = HN_NVS_TYPE_SUBCH_REQ;
+	req->nvs_op = HN_NVS_SUBCH_OP_ALLOC;
+	req->nvs_nsubch = nsubch_req;
+
+	resp_len = sizeof(*resp);
+	resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len,
+	    HN_NVS_TYPE_SUBCH_RESP);
+	if (resp == NULL) {
+		if_printf(sc->hn_ifp, "exec nvs subch alloc failed\n");
+		error = EIO;
+		goto done;
+	}
+	if (resp->nvs_status != HN_NVS_STATUS_OK) {
+		if_printf(sc->hn_ifp, "nvs subch alloc failed: %x\n",
+		    resp->nvs_status);
+		error = EIO;
+		goto done;
+	}
+
+	nsubch = resp->nvs_nsubch;
+	if (nsubch > nsubch_req) {
+		if_printf(sc->hn_ifp, "%u subchans are allocated, "
+		    "requested %d\n", nsubch, nsubch_req);
+		nsubch = nsubch_req;
+	}
+	*nsubch0 = nsubch;
+	error = 0;
+done:
+	vmbus_xact_put(xact);
+	return (error);
+}
+
+int
+hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
+    struct hn_nvs_sendctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt)
+{
+
+	return hn_nvs_send_rndis_sglist(chan, HN_NVS_RNDIS_MTYPE_CTRL,
+	    sndc, gpa, gpa_cnt);
+}
+
+void
+hn_nvs_set_datapath(struct hn_softc *sc, uint32_t path)
+{
+	struct hn_nvs_datapath dp;
+
+	memset(&dp, 0, sizeof(dp));
+	dp.nvs_type = HN_NVS_TYPE_SET_DATAPATH;
+	dp.nvs_active_path = path;
+
+	hn_nvs_req_send(sc, &dp, sizeof(dp));
+}


Property changes on: trunk/sys/dev/hyperv/netvsc/hn_nvs.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/hn_nvs.h
===================================================================
--- trunk/sys/dev/hyperv/netvsc/hn_nvs.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/hn_nvs.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,108 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/netvsc/hn_nvs.h 324461 2017-10-10 02:22:34Z sephe $
+ */
+
+#ifndef _HN_NVS_H_
+#define _HN_NVS_H_
+
+struct hn_nvs_sendctx;
+struct vmbus_channel;
+struct hn_softc;
+
+typedef void		(*hn_nvs_sent_t)
+			(struct hn_nvs_sendctx *, struct hn_softc *,
+			 struct vmbus_channel *, const void *, int);
+
+struct hn_nvs_sendctx {
+	hn_nvs_sent_t	hn_cb;
+	void		*hn_cbarg;
+};
+
+#define HN_NVS_SENDCTX_INITIALIZER(cb, cbarg)	\
+{						\
+	.hn_cb		= cb,			\
+	.hn_cbarg	= cbarg			\
+}
+
+static __inline void
+hn_nvs_sendctx_init(struct hn_nvs_sendctx *sndc, hn_nvs_sent_t cb, void *cbarg)
+{
+
+	sndc->hn_cb = cb;
+	sndc->hn_cbarg = cbarg;
+}
+
+static __inline int
+hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,
+    void *nvs_msg, int nvs_msglen, struct hn_nvs_sendctx *sndc)
+{
+
+	return (vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, flags,
+	    nvs_msg, nvs_msglen, (uint64_t)(uintptr_t)sndc));
+}
+
+static __inline int
+hn_nvs_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen,
+    void *nvs_msg, int nvs_msglen, struct hn_nvs_sendctx *sndc)
+{
+
+	return (vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen,
+	    (uint64_t)(uintptr_t)sndc));
+}
+
+static __inline int
+hn_nvs_send_rndis_sglist(struct vmbus_channel *chan, uint32_t rndis_mtype,
+    struct hn_nvs_sendctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt)
+{
+	struct hn_nvs_rndis rndis;
+
+	rndis.nvs_type = HN_NVS_TYPE_RNDIS;
+	rndis.nvs_rndis_mtype = rndis_mtype;
+	rndis.nvs_chim_idx = HN_NVS_CHIM_IDX_INVALID;
+	rndis.nvs_chim_sz = 0;
+
+	return (hn_nvs_send_sglist(chan, gpa, gpa_cnt,
+	    &rndis, sizeof(rndis), sndc));
+}
+
+int		hn_nvs_attach(struct hn_softc *sc, int mtu);
+void		hn_nvs_detach(struct hn_softc *sc);
+int		hn_nvs_alloc_subchans(struct hn_softc *sc, int *nsubch);
+void		hn_nvs_sent_xact(struct hn_nvs_sendctx *sndc,
+		    struct hn_softc *sc, struct vmbus_channel *chan,
+		    const void *data, int dlen);
+int		hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
+		    struct hn_nvs_sendctx *sndc, struct vmbus_gpa *gpa,
+		    int gpa_cnt);
+void		hn_nvs_set_datapath(struct hn_softc *sc, uint32_t path);
+
+extern struct hn_nvs_sendctx	hn_nvs_sendctx_none;
+
+#endif  /* !_HN_NVS_H_ */


Property changes on: trunk/sys/dev/hyperv/netvsc/hn_nvs.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/hn_rndis.c
===================================================================
--- trunk/sys/dev/hyperv/netvsc/hn_rndis.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/hn_rndis.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,1038 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/hn_rndis.c 324574 2017-10-13 02:26:39Z sephe $");
+
+#include "opt_inet6.h"
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/atomic.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_var.h>
+#include <net/if_media.h>
+#include <net/rndis.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp_lro.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+
+#include <dev/hyperv/netvsc/ndis.h>
+#include <dev/hyperv/netvsc/if_hnreg.h>
+#include <dev/hyperv/netvsc/if_hnvar.h>
+#include <dev/hyperv/netvsc/hn_nvs.h>
+#include <dev/hyperv/netvsc/hn_rndis.h>
+
+#define HN_RNDIS_RID_COMPAT_MASK	0xffff
+#define HN_RNDIS_RID_COMPAT_MAX		HN_RNDIS_RID_COMPAT_MASK
+
+#define HN_RNDIS_XFER_SIZE		2048
+
+#define HN_NDIS_TXCSUM_CAP_IP4		\
+	(NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP4		\
+	(NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP6		\
+	(NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
+	 NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_TXCSUM_CAP_UDP6		\
+	(NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_LSOV2_CAP_IP6		\
+	(NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
+
+static const void	*hn_rndis_xact_exec1(struct hn_softc *,
+			    struct vmbus_xact *, size_t,
+			    struct hn_nvs_sendctx *, size_t *);
+static const void	*hn_rndis_xact_execute(struct hn_softc *,
+			    struct vmbus_xact *, uint32_t, size_t, size_t *,
+			    uint32_t);
+static int		hn_rndis_query(struct hn_softc *, uint32_t,
+			    const void *, size_t, void *, size_t *);
+static int		hn_rndis_query2(struct hn_softc *, uint32_t,
+			    const void *, size_t, void *, size_t *, size_t);
+static int		hn_rndis_set(struct hn_softc *, uint32_t,
+			    const void *, size_t);
+static int		hn_rndis_init(struct hn_softc *);
+static int		hn_rndis_halt(struct hn_softc *);
+static int		hn_rndis_conf_offload(struct hn_softc *, int);
+static int		hn_rndis_query_hwcaps(struct hn_softc *,
+			    struct ndis_offload *);
+
+static __inline uint32_t
+hn_rndis_rid(struct hn_softc *sc)
+{
+	uint32_t rid;
+
+again:
+	rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1);
+	if (rid == 0)
+		goto again;
+
+	/* Use upper 16 bits for non-compat RNDIS messages. */
+	return ((rid & 0xffff) << 16);
+}
+
+void
+hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data, int dlen)
+{
+	const struct rndis_comp_hdr *comp;
+	const struct rndis_msghdr *hdr;
+
+	KASSERT(dlen >= sizeof(*hdr), ("invalid RNDIS msg\n"));
+	hdr = data;
+
+	switch (hdr->rm_type) {
+	case REMOTE_NDIS_INITIALIZE_CMPLT:
+	case REMOTE_NDIS_QUERY_CMPLT:
+	case REMOTE_NDIS_SET_CMPLT:
+	case REMOTE_NDIS_KEEPALIVE_CMPLT:	/* unused */
+		if (dlen < sizeof(*comp)) {
+			if_printf(sc->hn_ifp, "invalid RNDIS cmplt\n");
+			return;
+		}
+		comp = data;
+
+		KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX,
+		    ("invalid RNDIS rid 0x%08x\n", comp->rm_rid));
+		vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen);
+		break;
+
+	case REMOTE_NDIS_RESET_CMPLT:
+		/*
+		 * Reset completed, no rid.
+		 *
+		 * NOTE:
+		 * RESET is not issued by hn(4), so this message should
+		 * _not_ be observed.
+		 */
+		if_printf(sc->hn_ifp, "RESET cmplt received\n");
+		break;
+
+	default:
+		if_printf(sc->hn_ifp, "unknown RNDIS msg 0x%x\n",
+		    hdr->rm_type);
+		break;
+	}
+}
+
+int
+hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr)
+{
+	size_t eaddr_len;
+	int error;
+
+	eaddr_len = ETHER_ADDR_LEN;
+	error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
+	    eaddr, &eaddr_len);
+	if (error)
+		return (error);
+	if (eaddr_len != ETHER_ADDR_LEN) {
+		if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len);
+		return (EINVAL);
+	}
+	return (0);
+}
+
+int
+hn_rndis_get_linkstatus(struct hn_softc *sc, uint32_t *link_status)
+{
+	size_t size;
+	int error;
+
+	size = sizeof(*link_status);
+	error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
+	    link_status, &size);
+	if (error)
+		return (error);
+	if (size != sizeof(uint32_t)) {
+		if_printf(sc->hn_ifp, "invalid link status len %zu\n", size);
+		return (EINVAL);
+	}
+	return (0);
+}
+
+int
+hn_rndis_get_mtu(struct hn_softc *sc, uint32_t *mtu)
+{
+	size_t size;
+	int error;
+
+	size = sizeof(*mtu);
+	error = hn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, NULL, 0,
+	    mtu, &size);
+	if (error)
+		return (error);
+	if (size != sizeof(uint32_t)) {
+		if_printf(sc->hn_ifp, "invalid mtu len %zu\n", size);
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static const void *
+hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen,
+    struct hn_nvs_sendctx *sndc, size_t *comp_len)
+{
+	struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT];
+	int gpa_cnt, error;
+	bus_addr_t paddr;
+
+	KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0,
+	    ("invalid request length %zu", reqlen));
+
+	/*
+	 * Setup the SG list.
+	 */
+	paddr = vmbus_xact_req_paddr(xact);
+	KASSERT((paddr & PAGE_MASK) == 0,
+	    ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr));
+	for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) {
+		int len = PAGE_SIZE;
+
+		if (reqlen == 0)
+			break;
+		if (reqlen < len)
+			len = reqlen;
+
+		gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt;
+		gpa[gpa_cnt].gpa_len = len;
+		gpa[gpa_cnt].gpa_ofs = 0;
+
+		reqlen -= len;
+	}
+	KASSERT(reqlen == 0, ("still have %zu request data left", reqlen));
+
+	/*
+	 * Send this RNDIS control message and wait for its completion
+	 * message.
+	 */
+	vmbus_xact_activate(xact);
+	error = hn_nvs_send_rndis_ctrl(sc->hn_prichan, sndc, gpa, gpa_cnt);
+	if (error) {
+		vmbus_xact_deactivate(xact);
+		if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error);
+		return (NULL);
+	}
+	return (vmbus_chan_xact_wait(sc->hn_prichan, xact, comp_len,
+	    HN_CAN_SLEEP(sc)));
+}
+
+static const void *
+hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid,
+    size_t reqlen, size_t *comp_len0, uint32_t comp_type)
+{
+	const struct rndis_comp_hdr *comp;
+	size_t comp_len, min_complen = *comp_len0;
+
+	KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid));
+	KASSERT(min_complen >= sizeof(*comp),
+	    ("invalid minimum complete len %zu", min_complen));
+
+	/*
+	 * Execute the xact setup by the caller.
+	 */
+	comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_nvs_sendctx_none,
+	    &comp_len);
+	if (comp == NULL)
+		return (NULL);
+
+	/*
+	 * Check this RNDIS complete message.
+	 */
+	if (comp_len < min_complen) {
+		if (comp_len >= sizeof(*comp)) {
+			/* rm_status field is valid */
+			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, "
+			    "status 0x%08x\n", comp_len, comp->rm_status);
+		} else {
+			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n",
+			    comp_len);
+		}
+		return (NULL);
+	}
+	if (comp->rm_len < min_complen) {
+		if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n",
+		    comp->rm_len);
+		return (NULL);
+	}
+	if (comp->rm_type != comp_type) {
+		if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, "
+		    "expect 0x%08x\n", comp->rm_type, comp_type);
+		return (NULL);
+	}
+	if (comp->rm_rid != rid) {
+		if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, "
+		    "expect %u\n", comp->rm_rid, rid);
+		return (NULL);
+	}
+	/* All pass! */
+	*comp_len0 = comp_len;
+	return (comp);
+}
+
+static int
+hn_rndis_query(struct hn_softc *sc, uint32_t oid,
+    const void *idata, size_t idlen, void *odata, size_t *odlen0)
+{
+
+	return (hn_rndis_query2(sc, oid, idata, idlen, odata, odlen0, *odlen0));
+}
+
+static int
+hn_rndis_query2(struct hn_softc *sc, uint32_t oid,
+    const void *idata, size_t idlen, void *odata, size_t *odlen0,
+    size_t min_odlen)
+{
+	struct rndis_query_req *req;
+	const struct rndis_query_comp *comp;
+	struct vmbus_xact *xact;
+	size_t reqlen, odlen = *odlen0, comp_len;
+	int error, ofs;
+	uint32_t rid;
+
+	reqlen = sizeof(*req) + idlen;
+	xact = vmbus_xact_get(sc->hn_xact, reqlen);
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid);
+		return (ENXIO);
+	}
+	rid = hn_rndis_rid(sc);
+	req = vmbus_xact_req_data(xact);
+	req->rm_type = REMOTE_NDIS_QUERY_MSG;
+	req->rm_len = reqlen;
+	req->rm_rid = rid;
+	req->rm_oid = oid;
+	/*
+	 * XXX
+	 * This is _not_ RNDIS Spec conforming:
+	 * "This MUST be set to 0 when there is no input data
+	 *  associated with the OID."
+	 *
+	 * If this field was set to 0 according to the RNDIS Spec,
+	 * Hyper-V would set non-SUCCESS status in the query
+	 * completion.
+	 */
+	req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
+
+	if (idlen > 0) {
+		req->rm_infobuflen = idlen;
+		/* Input data immediately follows RNDIS query. */
+		memcpy(req + 1, idata, idlen);
+	}
+
+	comp_len = sizeof(*comp) + min_odlen;
+	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
+	    REMOTE_NDIS_QUERY_CMPLT);
+	if (comp == NULL) {
+		if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid);
+		error = EIO;
+		goto done;
+	}
+
+	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
+		if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: "
+		    "status 0x%08x\n", oid, comp->rm_status);
+		error = EIO;
+		goto done;
+	}
+	if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) {
+		/* No output data! */
+		if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid);
+		*odlen0 = 0;
+		error = 0;
+		goto done;
+	}
+
+	/*
+	 * Check output data length and offset.
+	 */
+	/* ofs is the offset from the beginning of comp. */
+	ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset);
+	if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) {
+		if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, "
+		    "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen);
+		error = EINVAL;
+		goto done;
+	}
+
+	/*
+	 * Save output data.
+	 */
+	if (comp->rm_infobuflen < odlen)
+		odlen = comp->rm_infobuflen;
+	memcpy(odata, ((const uint8_t *)comp) + ofs, odlen);
+	*odlen0 = odlen;
+
+	error = 0;
+done:
+	vmbus_xact_put(xact);
+	return (error);
+}
+
+int
+hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt0)
+{
+	struct ndis_rss_caps in, caps;
+	size_t caps_len;
+	int error, indsz, rxr_cnt, hash_fnidx;
+	uint32_t hash_func = 0, hash_types = 0;
+
+	*rxr_cnt0 = 0;
+
+	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_20)
+		return (EOPNOTSUPP);
+
+	memset(&in, 0, sizeof(in));
+	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
+	in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
+	in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
+
+	caps_len = NDIS_RSS_CAPS_SIZE;
+	error = hn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
+	    &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
+	if (error)
+		return (error);
+
+	/*
+	 * Preliminary verification.
+	 */
+	if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
+		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
+		    caps.ndis_hdr.ndis_type);
+		return (EINVAL);
+	}
+	if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
+		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
+		    caps.ndis_hdr.ndis_rev);
+		return (EINVAL);
+	}
+	if (caps.ndis_hdr.ndis_size > caps_len) {
+		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
+		    "data size %zu\n", caps.ndis_hdr.ndis_size, caps_len);
+		return (EINVAL);
+	} else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
+		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
+		    caps.ndis_hdr.ndis_size);
+		return (EINVAL);
+	}
+
+	/*
+	 * Save information for later RSS configuration.
+	 */
+	if (caps.ndis_nrxr == 0) {
+		if_printf(sc->hn_ifp, "0 RX rings!?\n");
+		return (EINVAL);
+	}
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "%u RX rings\n", caps.ndis_nrxr);
+	rxr_cnt = caps.ndis_nrxr;
+
+	if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
+	    caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
+		if (caps.ndis_nind > NDIS_HASH_INDCNT) {
+			if_printf(sc->hn_ifp,
+			    "too many RSS indirect table entries %u\n",
+			    caps.ndis_nind);
+			return (EOPNOTSUPP);
+		}
+		if (!powerof2(caps.ndis_nind)) {
+			if_printf(sc->hn_ifp, "RSS indirect table size is not "
+			    "power-of-2 %u\n", caps.ndis_nind);
+		}
+
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "RSS indirect table size %u\n",
+			    caps.ndis_nind);
+		}
+		indsz = caps.ndis_nind;
+	} else {
+		indsz = NDIS_HASH_INDCNT;
+	}
+	if (indsz < rxr_cnt) {
+		if_printf(sc->hn_ifp, "# of RX rings (%d) > "
+		    "RSS indirect table size %d\n", rxr_cnt, indsz);
+		rxr_cnt = indsz;
+	}
+
+	/*
+	 * NOTE:
+	 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
+	 * instead of fls(), is used here.
+	 */
+	hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
+	if (hash_fnidx == 0) {
+		if_printf(sc->hn_ifp, "no hash functions, caps 0x%08x\n",
+		    caps.ndis_caps);
+		return (EOPNOTSUPP);
+	}
+	hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
+
+	if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
+		hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
+	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
+		hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
+	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
+		hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
+	if (hash_types == 0) {
+		if_printf(sc->hn_ifp, "no hash types, caps 0x%08x\n",
+		    caps.ndis_caps);
+		return (EOPNOTSUPP);
+	}
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "RSS caps %#x\n", caps.ndis_caps);
+
+	/* Commit! */
+	sc->hn_rss_ind_size = indsz;
+	sc->hn_rss_hcap = hash_func | hash_types;
+	if (sc->hn_caps & HN_CAP_UDPHASH) {
+		/* UDP 4-tuple hash is unconditionally enabled. */
+		sc->hn_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
+	}
+	*rxr_cnt0 = rxr_cnt;
+	return (0);
+}
+
+static int
+hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen)
+{
+	struct rndis_set_req *req;
+	const struct rndis_set_comp *comp;
+	struct vmbus_xact *xact;
+	size_t reqlen, comp_len;
+	uint32_t rid;
+	int error;
+
+	KASSERT(dlen > 0, ("invalid dlen %zu", dlen));
+
+	reqlen = sizeof(*req) + dlen;
+	xact = vmbus_xact_get(sc->hn_xact, reqlen);
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid);
+		return (ENXIO);
+	}
+	rid = hn_rndis_rid(sc);
+	req = vmbus_xact_req_data(xact);
+	req->rm_type = REMOTE_NDIS_SET_MSG;
+	req->rm_len = reqlen;
+	req->rm_rid = rid;
+	req->rm_oid = oid;
+	req->rm_infobuflen = dlen;
+	req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
+	/* Data immediately follows RNDIS set. */
+	memcpy(req + 1, data, dlen);
+
+	comp_len = sizeof(*comp);
+	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
+	    REMOTE_NDIS_SET_CMPLT);
+	if (comp == NULL) {
+		if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid);
+		error = EIO;
+		goto done;
+	}
+
+	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
+		if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: "
+		    "status 0x%08x\n", oid, comp->rm_status);
+		error = EIO;
+		goto done;
+	}
+	error = 0;
+done:
+	vmbus_xact_put(xact);
+	return (error);
+}
+
+static int
+hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
+{
+	struct ndis_offload hwcaps;
+	struct ndis_offload_params params;
+	uint32_t caps = 0;
+	size_t paramsz;
+	int error, tso_maxsz, tso_minsg;
+
+	error = hn_rndis_query_hwcaps(sc, &hwcaps);
+	if (error) {
+		if_printf(sc->hn_ifp, "hwcaps query failed: %d\n", error);
+		return (error);
+	}
+
+	/* NOTE: 0 means "no change" */
+	memset(&params, 0, sizeof(params));
+
+	params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
+	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30) {
+		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
+		paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
+	} else {
+		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
+		paramsz = NDIS_OFFLOAD_PARAMS_SIZE;
+	}
+	params.ndis_hdr.ndis_size = paramsz;
+
+	/*
+	 * TSO4/TSO6 setup.
+	 */
+	tso_maxsz = IP_MAXPACKET;
+	tso_minsg = 2;
+	if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
+		caps |= HN_CAP_TSO4;
+		params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
+
+		if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
+			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
+		if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
+			tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
+	}
+	if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
+	    (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) ==
+	    HN_NDIS_LSOV2_CAP_IP6) {
+		caps |= HN_CAP_TSO6;
+		params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
+
+		if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
+			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
+		if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
+			tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
+	}
+	sc->hn_ndis_tso_szmax = 0;
+	sc->hn_ndis_tso_sgmin = 0;
+	if (caps & (HN_CAP_TSO4 | HN_CAP_TSO6)) {
+		KASSERT(tso_maxsz <= IP_MAXPACKET,
+		    ("invalid NDIS TSO maxsz %d", tso_maxsz));
+		KASSERT(tso_minsg >= 2,
+		    ("invalid NDIS TSO minsg %d", tso_minsg));
+		if (tso_maxsz < tso_minsg * mtu) {
+			if_printf(sc->hn_ifp, "invalid NDIS TSO config: "
+			    "maxsz %d, minsg %d, mtu %d; "
+			    "disable TSO4 and TSO6\n",
+			    tso_maxsz, tso_minsg, mtu);
+			caps &= ~(HN_CAP_TSO4 | HN_CAP_TSO6);
+			params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
+			params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
+		} else {
+			sc->hn_ndis_tso_szmax = tso_maxsz;
+			sc->hn_ndis_tso_sgmin = tso_minsg;
+			if (bootverbose) {
+				if_printf(sc->hn_ifp, "NDIS TSO "
+				    "szmax %d sgmin %d\n",
+				    sc->hn_ndis_tso_szmax,
+				    sc->hn_ndis_tso_sgmin);
+			}
+		}
+	}
+
+	/* IPv4 checksum */
+	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) ==
+	    HN_NDIS_TXCSUM_CAP_IP4) {
+		caps |= HN_CAP_IPCS;
+		params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
+	}
+	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
+		if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
+			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
+		else
+			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
+	}
+
+	/* TCP4 checksum */
+	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) ==
+	    HN_NDIS_TXCSUM_CAP_TCP4) {
+		caps |= HN_CAP_TCP4CS;
+		params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
+	}
+	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
+		if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
+			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
+		else
+			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
+	}
+
+	/* UDP4 checksum */
+	if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
+		caps |= HN_CAP_UDP4CS;
+		params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
+	}
+	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
+		if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
+			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
+		else
+			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
+	}
+
+	/* TCP6 checksum */
+	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) ==
+	    HN_NDIS_TXCSUM_CAP_TCP6) {
+		caps |= HN_CAP_TCP6CS;
+		params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
+	}
+	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
+		if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
+			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
+		else
+			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
+	}
+
+	/* UDP6 checksum */
+	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_UDP6) ==
+	    HN_NDIS_TXCSUM_CAP_UDP6) {
+		caps |= HN_CAP_UDP6CS;
+		params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
+	}
+	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
+		if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
+			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
+		else
+			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
+	}
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "offload csum: "
+		    "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
+		    params.ndis_ip4csum,
+		    params.ndis_tcp4csum,
+		    params.ndis_udp4csum,
+		    params.ndis_tcp6csum,
+		    params.ndis_udp6csum);
+		if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n",
+		    params.ndis_lsov2_ip4,
+		    params.ndis_lsov2_ip6);
+	}
+
+	error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
+	if (error) {
+		if_printf(sc->hn_ifp, "offload config failed: %d\n", error);
+		return (error);
+	}
+
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "offload config done\n");
+	sc->hn_caps |= caps;
+	return (0);
+}
+
+int
+hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags)
+{
+	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
+	struct ndis_rss_params *prm = &rss->rss_params;
+	int error, rss_size;
+
+	/*
+	 * Only NDIS 6.20+ is supported:
+	 * We only support 4bytes element in indirect table, which has been
+	 * adopted since NDIS 6.20.
+	 */
+	KASSERT(sc->hn_ndis_ver >= HN_NDIS_VERSION_6_20,
+	    ("NDIS 6.20+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
+
+	/* XXX only one can be specified through, popcnt? */
+	KASSERT((sc->hn_rss_hash & NDIS_HASH_FUNCTION_MASK),
+	    ("no hash func %08x", sc->hn_rss_hash));
+	KASSERT((sc->hn_rss_hash & NDIS_HASH_STD),
+	    ("no standard hash types %08x", sc->hn_rss_hash));
+	KASSERT(sc->hn_rss_ind_size > 0, ("no indirect table size"));
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "RSS indirect table size %d, "
+		    "hash 0x%08x\n", sc->hn_rss_ind_size, sc->hn_rss_hash);
+	}
+
+	/*
+	 * NOTE:
+	 * DO NOT whack rss_key and rss_ind, which are setup by the caller.
+	 */
+	memset(prm, 0, sizeof(*prm));
+	rss_size = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->hn_rss_ind_size);
+
+	prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
+	prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
+	prm->ndis_hdr.ndis_size = rss_size;
+	prm->ndis_flags = flags;
+	prm->ndis_hash = sc->hn_rss_hash &
+	    (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
+	prm->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->hn_rss_ind_size;
+	prm->ndis_indoffset =
+	    __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
+	prm->ndis_keysize = sizeof(rss->rss_key);
+	prm->ndis_keyoffset =
+	    __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
+
+	error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS,
+	    rss, rss_size);
+	if (error) {
+		if_printf(sc->hn_ifp, "RSS config failed: %d\n", error);
+	} else {
+		if (bootverbose)
+			if_printf(sc->hn_ifp, "RSS config done\n");
+	}
+	return (error);
+}
+
+int
+hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter)
+{
+	int error;
+
+	error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
+	    &filter, sizeof(filter));
+	if (error) {
+		if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n",
+		    filter, error);
+	} else {
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n",
+			    filter);
+		}
+	}
+	return (error);
+}
+
+static int
+hn_rndis_init(struct hn_softc *sc)
+{
+	struct rndis_init_req *req;
+	const struct rndis_init_comp *comp;
+	struct vmbus_xact *xact;
+	size_t comp_len;
+	uint32_t rid;
+	int error;
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for RNDIS init\n");
+		return (ENXIO);
+	}
+	rid = hn_rndis_rid(sc);
+	req = vmbus_xact_req_data(xact);
+	req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
+	req->rm_len = sizeof(*req);
+	req->rm_rid = rid;
+	req->rm_ver_major = RNDIS_VERSION_MAJOR;
+	req->rm_ver_minor = RNDIS_VERSION_MINOR;
+	req->rm_max_xfersz = HN_RNDIS_XFER_SIZE;
+
+	comp_len = RNDIS_INIT_COMP_SIZE_MIN;
+	comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len,
+	    REMOTE_NDIS_INITIALIZE_CMPLT);
+	if (comp == NULL) {
+		if_printf(sc->hn_ifp, "exec RNDIS init failed\n");
+		error = EIO;
+		goto done;
+	}
+
+	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
+		if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n",
+		    comp->rm_status);
+		error = EIO;
+		goto done;
+	}
+	sc->hn_rndis_agg_size = comp->rm_pktmaxsz;
+	sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt;
+	sc->hn_rndis_agg_align = 1U << comp->rm_align;
+
+	if (sc->hn_rndis_agg_align < sizeof(uint32_t)) {
+		/*
+		 * The RNDIS packet messsage encap assumes that the RNDIS
+		 * packet message is at least 4 bytes aligned.  Fix up the
+		 * alignment here, if the remote side sets the alignment
+		 * too low.
+		 */
+		if_printf(sc->hn_ifp, "fixup RNDIS aggpkt align: %u -> %zu\n",
+		    sc->hn_rndis_agg_align, sizeof(uint32_t));
+		sc->hn_rndis_agg_align = sizeof(uint32_t);
+	}
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "RNDIS ver %u.%u, "
+		    "aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
+		    comp->rm_ver_major, comp->rm_ver_minor,
+		    sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts,
+		    sc->hn_rndis_agg_align);
+	}
+	error = 0;
+done:
+	vmbus_xact_put(xact);
+	return (error);
+}
+
+static int
+hn_rndis_halt(struct hn_softc *sc)
+{
+	struct vmbus_xact *xact;
+	struct rndis_halt_req *halt;
+	struct hn_nvs_sendctx sndc;
+	size_t comp_len;
+
+	xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt));
+	if (xact == NULL) {
+		if_printf(sc->hn_ifp, "no xact for RNDIS halt\n");
+		return (ENXIO);
+	}
+	halt = vmbus_xact_req_data(xact);
+	halt->rm_type = REMOTE_NDIS_HALT_MSG;
+	halt->rm_len = sizeof(*halt);
+	halt->rm_rid = hn_rndis_rid(sc);
+
+	/* No RNDIS completion; rely on NVS message send completion */
+	hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
+	hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len);
+
+	vmbus_xact_put(xact);
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "RNDIS halt done\n");
+	return (0);
+}
+
+static int
+hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
+{
+	struct ndis_offload in;
+	size_t caps_len, size;
+	int error;
+
+	memset(&in, 0, sizeof(in));
+	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
+	if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_30) {
+		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
+		size = NDIS_OFFLOAD_SIZE;
+	} else if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_1) {
+		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
+		size = NDIS_OFFLOAD_SIZE_6_1;
+	} else {
+		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
+		size = NDIS_OFFLOAD_SIZE_6_0;
+	}
+	in.ndis_hdr.ndis_size = size;
+
+	caps_len = NDIS_OFFLOAD_SIZE;
+	error = hn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
+	    &in, size, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
+	if (error)
+		return (error);
+
+	/*
+	 * Preliminary verification.
+	 */
+	if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
+		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
+		    caps->ndis_hdr.ndis_type);
+		return (EINVAL);
+	}
+	if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
+		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
+		    caps->ndis_hdr.ndis_rev);
+		return (EINVAL);
+	}
+	if (caps->ndis_hdr.ndis_size > caps_len) {
+		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
+		    "data size %zu\n", caps->ndis_hdr.ndis_size, caps_len);
+		return (EINVAL);
+	} else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
+		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
+		    caps->ndis_hdr.ndis_size);
+		return (EINVAL);
+	}
+
+	if (bootverbose) {
+		/*
+		 * NOTE:
+		 * caps->ndis_hdr.ndis_size MUST be checked before accessing
+		 * NDIS 6.1+ specific fields.
+		 */
+		if_printf(sc->hn_ifp, "hwcaps rev %u\n",
+		    caps->ndis_hdr.ndis_rev);
+
+		if_printf(sc->hn_ifp, "hwcaps csum: "
+		    "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
+		    "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
+		    caps->ndis_csum.ndis_ip4_txcsum,
+		    caps->ndis_csum.ndis_ip4_txenc,
+		    caps->ndis_csum.ndis_ip4_rxcsum,
+		    caps->ndis_csum.ndis_ip4_rxenc,
+		    caps->ndis_csum.ndis_ip6_txcsum,
+		    caps->ndis_csum.ndis_ip6_txenc,
+		    caps->ndis_csum.ndis_ip6_rxcsum,
+		    caps->ndis_csum.ndis_ip6_rxenc);
+		if_printf(sc->hn_ifp, "hwcaps lsov2: "
+		    "ip4 maxsz %u minsg %u encap 0x%x, "
+		    "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
+		    caps->ndis_lsov2.ndis_ip4_maxsz,
+		    caps->ndis_lsov2.ndis_ip4_minsg,
+		    caps->ndis_lsov2.ndis_ip4_encap,
+		    caps->ndis_lsov2.ndis_ip6_maxsz,
+		    caps->ndis_lsov2.ndis_ip6_minsg,
+		    caps->ndis_lsov2.ndis_ip6_encap,
+		    caps->ndis_lsov2.ndis_ip6_opts);
+	}
+	return (0);
+}
+
+int
+hn_rndis_attach(struct hn_softc *sc, int mtu, int *init_done)
+{
+	int error;
+
+	*init_done = 0;
+
+	/*
+	 * Initialize RNDIS.
+	 */
+	error = hn_rndis_init(sc);
+	if (error)
+		return (error);
+	*init_done = 1;
+
+	/*
+	 * Configure NDIS offload settings.
+	 */
+	hn_rndis_conf_offload(sc, mtu);
+	return (0);
+}
+
+void
+hn_rndis_detach(struct hn_softc *sc)
+{
+
+	/* Halt the RNDIS. */
+	hn_rndis_halt(sc);
+}


Property changes on: trunk/sys/dev/hyperv/netvsc/hn_rndis.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/hn_rndis.h
===================================================================
--- trunk/sys/dev/hyperv/netvsc/hn_rndis.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/hn_rndis.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,51 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/netvsc/hn_rndis.h 324466 2017-10-10 03:29:36Z sephe $
+ */
+
+#ifndef _HN_RNDIS_H_
+#define _HN_RNDIS_H_
+
+struct hn_softc;
+
+int		hn_rndis_attach(struct hn_softc *sc, int mtu, int *init_done);
+void		hn_rndis_detach(struct hn_softc *sc);
+int		hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags);
+int		hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt);
+int		hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr);
+/* link_status: NDIS_MEDIA_STATE_ */
+int		hn_rndis_get_linkstatus(struct hn_softc *sc,
+		    uint32_t *link_status);
+int		hn_rndis_get_mtu(struct hn_softc *sc, uint32_t *mtu);
+/* filter: NDIS_PACKET_TYPE_. */
+int		hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter);
+void		hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data,
+		    int dlen);
+
+#endif  /* !_HN_RNDIS_H_ */


Property changes on: trunk/sys/dev/hyperv/netvsc/hn_rndis.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/if_hn.c
===================================================================
--- trunk/sys/dev/hyperv/netvsc/if_hn.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/if_hn.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,7498 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2004-2006 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/if_hn.c 324575 2017-10-13 02:29:43Z sephe $");
+
+#include "opt_inet6.h"
+#include "opt_inet.h"
+#include "opt_hn.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/sbuf.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/buf_ring.h>
+#include <sys/eventhandler.h>
+
+#include <machine/atomic.h>
+#include <machine/in_cksum.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_vlan_var.h>
+#include <net/rndis.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#include <netinet/udp.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+
+#include <dev/hyperv/netvsc/ndis.h>
+#include <dev/hyperv/netvsc/if_hnreg.h>
+#include <dev/hyperv/netvsc/if_hnvar.h>
+#include <dev/hyperv/netvsc/hn_nvs.h>
+#include <dev/hyperv/netvsc/hn_rndis.h>
+
+#include "vmbus_if.h"
+
+#define HN_IFSTART_SUPPORT
+
+/* NOTE: M_HASHTYPE_RSS_UDP_IPV4 is not available on stable/10. */
+#ifndef M_HASHTYPE_RSS_UDP_IPV4
+#define M_HASHTYPE_RSS_UDP_IPV4		M_HASHTYPE_OPAQUE
+#endif
+
+#define HN_RING_CNT_DEF_MAX		8
+
+#define HN_VFMAP_SIZE_DEF		8
+
+#define HN_XPNT_VF_ATTWAIT_MIN		2	/* seconds */
+
+/* YYY should get it from the underlying channel */
+#define HN_TX_DESC_CNT			512
+
+#define HN_RNDIS_PKT_LEN					\
+	(sizeof(struct rndis_packet_msg) +			\
+	 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) +	\
+	 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +		\
+	 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +		\
+	 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
+#define HN_RNDIS_PKT_BOUNDARY		PAGE_SIZE
+#define HN_RNDIS_PKT_ALIGN		CACHE_LINE_SIZE
+
+#define HN_TX_DATA_BOUNDARY		PAGE_SIZE
+#define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
+#define HN_TX_DATA_SEGSIZE		PAGE_SIZE
+/* -1 for RNDIS packet message */
+#define HN_TX_DATA_SEGCNT_MAX		(HN_GPACNT_MAX - 1)
+
+#define HN_DIRECT_TX_SIZE_DEF		128
+
+#define HN_EARLY_TXEOF_THRESH		8
+
+#define HN_PKTBUF_LEN_DEF		(16 * 1024)
+
+#define HN_LROENT_CNT_DEF		128
+
+#define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
+#define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
+/* YYY 2*MTU is a bit rough, but should be good enough. */
+#define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
+
+#define HN_LRO_ACKCNT_DEF		1
+
+#define HN_LOCK_INIT(sc)		\
+	sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
+#define HN_LOCK_DESTROY(sc)		sx_destroy(&(sc)->hn_lock)
+#define HN_LOCK_ASSERT(sc)		sx_assert(&(sc)->hn_lock, SA_XLOCKED)
+#define HN_LOCK(sc)					\
+do {							\
+	while (sx_try_xlock(&(sc)->hn_lock) == 0)	\
+		DELAY(1000);				\
+} while (0)
+#define HN_UNLOCK(sc)			sx_xunlock(&(sc)->hn_lock)
+
+#define HN_CSUM_IP_MASK			(CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
+#define HN_CSUM_IP6_MASK		(CSUM_IP6_TCP | CSUM_IP6_UDP)
+#define HN_CSUM_IP_HWASSIST(sc)		\
+	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
+#define HN_CSUM_IP6_HWASSIST(sc)	\
+	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
+
+#define HN_PKTSIZE_MIN(align)		\
+	roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
+	    HN_RNDIS_PKT_LEN, (align))
+#define HN_PKTSIZE(m, align)		\
+	roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
+
+#define HN_RING_IDX2CPU(sc, idx)	(((sc)->hn_cpu + (idx)) % mp_ncpus)
+
+struct hn_txdesc {
+#ifndef HN_USE_TXDESC_BUFRING
+	SLIST_ENTRY(hn_txdesc)		link;
+#endif
+	STAILQ_ENTRY(hn_txdesc)		agg_link;
+
+	/* Aggregated txdescs, in sending order. */
+	STAILQ_HEAD(, hn_txdesc)	agg_list;
+
+	/* The oldest packet, if transmission aggregation happens. */
+	struct mbuf			*m;
+	struct hn_tx_ring		*txr;
+	int				refs;
+	uint32_t			flags;	/* HN_TXD_FLAG_ */
+	struct hn_nvs_sendctx		send_ctx;
+	uint32_t			chim_index;
+	int				chim_size;
+
+	bus_dmamap_t			data_dmap;
+
+	bus_addr_t			rndis_pkt_paddr;
+	struct rndis_packet_msg		*rndis_pkt;
+	bus_dmamap_t			rndis_pkt_dmap;
+};
+
+#define HN_TXD_FLAG_ONLIST		0x0001
+#define HN_TXD_FLAG_DMAMAP		0x0002
+#define HN_TXD_FLAG_ONAGG		0x0004
+
+struct hn_rxinfo {
+	uint32_t			vlan_info;
+	uint32_t			csum_info;
+	uint32_t			hash_info;
+	uint32_t			hash_value;
+};
+
+struct hn_rxvf_setarg {
+	struct hn_rx_ring	*rxr;
+	struct ifnet		*vf_ifp;
+};
+
+#define HN_RXINFO_VLAN			0x0001
+#define HN_RXINFO_CSUM			0x0002
+#define HN_RXINFO_HASHINF		0x0004
+#define HN_RXINFO_HASHVAL		0x0008
+#define HN_RXINFO_ALL			\
+	(HN_RXINFO_VLAN |		\
+	 HN_RXINFO_CSUM |		\
+	 HN_RXINFO_HASHINF |		\
+	 HN_RXINFO_HASHVAL)
+
+#define HN_NDIS_VLAN_INFO_INVALID	0xffffffff
+#define HN_NDIS_RXCSUM_INFO_INVALID	0
+#define HN_NDIS_HASH_INFO_INVALID	0
+
+static int			hn_probe(device_t);
+static int			hn_attach(device_t);
+static int			hn_detach(device_t);
+static int			hn_shutdown(device_t);
+static void			hn_chan_callback(struct vmbus_channel *,
+				    void *);
+
+static void			hn_init(void *);
+static int			hn_ioctl(struct ifnet *, u_long, caddr_t);
+#ifdef HN_IFSTART_SUPPORT
+static void			hn_start(struct ifnet *);
+#endif
+static int			hn_transmit(struct ifnet *, struct mbuf *);
+static void			hn_xmit_qflush(struct ifnet *);
+static int			hn_ifmedia_upd(struct ifnet *);
+static void			hn_ifmedia_sts(struct ifnet *,
+				    struct ifmediareq *);
+
+static void			hn_ifnet_event(void *, struct ifnet *, int);
+static void			hn_ifaddr_event(void *, struct ifnet *);
+static void			hn_ifnet_attevent(void *, struct ifnet *);
+static void			hn_ifnet_detevent(void *, struct ifnet *);
+static void			hn_ifnet_lnkevent(void *, struct ifnet *, int);
+
+static bool			hn_ismyvf(const struct hn_softc *,
+				    const struct ifnet *);
+static void			hn_rxvf_change(struct hn_softc *,
+				    struct ifnet *, bool);
+static void			hn_rxvf_set(struct hn_softc *, struct ifnet *);
+static void			hn_rxvf_set_task(void *, int);
+static void			hn_xpnt_vf_input(struct ifnet *, struct mbuf *);
+static int			hn_xpnt_vf_iocsetflags(struct hn_softc *);
+static int			hn_xpnt_vf_iocsetcaps(struct hn_softc *,
+				    struct ifreq *);
+static void			hn_xpnt_vf_saveifflags(struct hn_softc *);
+static bool			hn_xpnt_vf_isready(struct hn_softc *);
+static void			hn_xpnt_vf_setready(struct hn_softc *);
+static void			hn_xpnt_vf_init_taskfunc(void *, int);
+static void			hn_xpnt_vf_init(struct hn_softc *);
+static void			hn_xpnt_vf_setenable(struct hn_softc *);
+static void			hn_xpnt_vf_setdisable(struct hn_softc *, bool);
+static void			hn_vf_rss_fixup(struct hn_softc *, bool);
+static void			hn_vf_rss_restore(struct hn_softc *);
+
+static int			hn_rndis_rxinfo(const void *, int,
+				    struct hn_rxinfo *);
+static void			hn_rndis_rx_data(struct hn_rx_ring *,
+				    const void *, int);
+static void			hn_rndis_rx_status(struct hn_softc *,
+				    const void *, int);
+static void			hn_rndis_init_fixat(struct hn_softc *, int);
+
+static void			hn_nvs_handle_notify(struct hn_softc *,
+				    const struct vmbus_chanpkt_hdr *);
+static void			hn_nvs_handle_comp(struct hn_softc *,
+				    struct vmbus_channel *,
+				    const struct vmbus_chanpkt_hdr *);
+static void			hn_nvs_handle_rxbuf(struct hn_rx_ring *,
+				    struct vmbus_channel *,
+				    const struct vmbus_chanpkt_hdr *);
+static void			hn_nvs_ack_rxbuf(struct hn_rx_ring *,
+				    struct vmbus_channel *, uint64_t);
+
+#if __FreeBSD_version >= 1100099
+static int			hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
+#endif
+static int			hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
+#if __FreeBSD_version < 1100095
+static int			hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
+#else
+static int			hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
+#endif
+static int			hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
+static int			hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
+
+static void			hn_stop(struct hn_softc *, bool);
+static void			hn_init_locked(struct hn_softc *);
+static int			hn_chan_attach(struct hn_softc *,
+				    struct vmbus_channel *);
+static void			hn_chan_detach(struct hn_softc *,
+				    struct vmbus_channel *);
+static int			hn_attach_subchans(struct hn_softc *);
+static void			hn_detach_allchans(struct hn_softc *);
+static void			hn_chan_rollup(struct hn_rx_ring *,
+				    struct hn_tx_ring *);
+static void			hn_set_ring_inuse(struct hn_softc *, int);
+static int			hn_synth_attach(struct hn_softc *, int);
+static void			hn_synth_detach(struct hn_softc *);
+static int			hn_synth_alloc_subchans(struct hn_softc *,
+				    int *);
+static bool			hn_synth_attachable(const struct hn_softc *);
+static void			hn_suspend(struct hn_softc *);
+static void			hn_suspend_data(struct hn_softc *);
+static void			hn_suspend_mgmt(struct hn_softc *);
+static void			hn_resume(struct hn_softc *);
+static void			hn_resume_data(struct hn_softc *);
+static void			hn_resume_mgmt(struct hn_softc *);
+static void			hn_suspend_mgmt_taskfunc(void *, int);
+static void			hn_chan_drain(struct hn_softc *,
+				    struct vmbus_channel *);
+static void			hn_disable_rx(struct hn_softc *);
+static void			hn_drain_rxtx(struct hn_softc *, int);
+static void			hn_polling(struct hn_softc *, u_int);
+static void			hn_chan_polling(struct vmbus_channel *, u_int);
+static void			hn_mtu_change_fixup(struct hn_softc *);
+
+static void			hn_update_link_status(struct hn_softc *);
+static void			hn_change_network(struct hn_softc *);
+static void			hn_link_taskfunc(void *, int);
+static void			hn_netchg_init_taskfunc(void *, int);
+static void			hn_netchg_status_taskfunc(void *, int);
+static void			hn_link_status(struct hn_softc *);
+
+static int			hn_create_rx_data(struct hn_softc *, int);
+static void			hn_destroy_rx_data(struct hn_softc *);
+static int			hn_check_iplen(const struct mbuf *, int);
+static void			hn_rxpkt_proto(const struct mbuf *, int *, int *);
+static int			hn_set_rxfilter(struct hn_softc *, uint32_t);
+static int			hn_rxfilter_config(struct hn_softc *);
+static int			hn_rss_reconfig(struct hn_softc *);
+static void			hn_rss_ind_fixup(struct hn_softc *);
+static void			hn_rss_mbuf_hash(struct hn_softc *, uint32_t);
+static int			hn_rxpkt(struct hn_rx_ring *, const void *,
+				    int, const struct hn_rxinfo *);
+static uint32_t			hn_rss_type_fromndis(uint32_t);
+static uint32_t			hn_rss_type_tondis(uint32_t);
+
+static int			hn_tx_ring_create(struct hn_softc *, int);
+static void			hn_tx_ring_destroy(struct hn_tx_ring *);
+static int			hn_create_tx_data(struct hn_softc *, int);
+static void			hn_fixup_tx_data(struct hn_softc *);
+static void			hn_fixup_rx_data(struct hn_softc *);
+static void			hn_destroy_tx_data(struct hn_softc *);
+static void			hn_txdesc_dmamap_destroy(struct hn_txdesc *);
+static void			hn_txdesc_gc(struct hn_tx_ring *,
+				    struct hn_txdesc *);
+static int			hn_encap(struct ifnet *, struct hn_tx_ring *,
+				    struct hn_txdesc *, struct mbuf **);
+static int			hn_txpkt(struct ifnet *, struct hn_tx_ring *,
+				    struct hn_txdesc *);
+static void			hn_set_chim_size(struct hn_softc *, int);
+static void			hn_set_tso_maxsize(struct hn_softc *, int, int);
+static bool			hn_tx_ring_pending(struct hn_tx_ring *);
+static void			hn_tx_ring_qflush(struct hn_tx_ring *);
+static void			hn_resume_tx(struct hn_softc *, int);
+static void			hn_set_txagg(struct hn_softc *);
+static void			*hn_try_txagg(struct ifnet *,
+				    struct hn_tx_ring *, struct hn_txdesc *,
+				    int);
+static int			hn_get_txswq_depth(const struct hn_tx_ring *);
+static void			hn_txpkt_done(struct hn_nvs_sendctx *,
+				    struct hn_softc *, struct vmbus_channel *,
+				    const void *, int);
+static int			hn_txpkt_sglist(struct hn_tx_ring *,
+				    struct hn_txdesc *);
+static int			hn_txpkt_chim(struct hn_tx_ring *,
+				    struct hn_txdesc *);
+static int			hn_xmit(struct hn_tx_ring *, int);
+static void			hn_xmit_taskfunc(void *, int);
+static void			hn_xmit_txeof(struct hn_tx_ring *);
+static void			hn_xmit_txeof_taskfunc(void *, int);
+#ifdef HN_IFSTART_SUPPORT
+static int			hn_start_locked(struct hn_tx_ring *, int);
+static void			hn_start_taskfunc(void *, int);
+static void			hn_start_txeof(struct hn_tx_ring *);
+static void			hn_start_txeof_taskfunc(void *, int);
+#endif
+
+SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+    "Hyper-V network interface");
+
+/* Trust tcp segements verification on host side. */
+static int			hn_trust_hosttcp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
+    &hn_trust_hosttcp, 0,
+    "Trust tcp segement verification on host side, "
+    "when csum info is missing (global setting)");
+
+/* Trust udp datagrams verification on host side. */
+static int			hn_trust_hostudp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
+    &hn_trust_hostudp, 0,
+    "Trust udp datagram verification on host side, "
+    "when csum info is missing (global setting)");
+
+/* Trust ip packets verification on host side. */
+static int			hn_trust_hostip = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
+    &hn_trust_hostip, 0,
+    "Trust ip packet verification on host side, "
+    "when csum info is missing (global setting)");
+
+/*
+ * Offload UDP/IPv4 checksum.
+ */
+static int			hn_enable_udp4cs = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN,
+    &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum");
+
+/*
+ * Offload UDP/IPv6 checksum.
+ */
+static int			hn_enable_udp6cs = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN,
+    &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum");
+
+/* Stats. */
+static counter_u64_t		hn_udpcs_fixup;
+SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW,
+    &hn_udpcs_fixup, "# of UDP checksum fixup");
+
+/*
+ * See hn_set_hlen().
+ *
+ * This value is for Azure.  For Hyper-V, set this above
+ * 65536 to disable UDP datagram checksum fixup.
+ */
+static int			hn_udpcs_fixup_mtu = 1420;
+SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN,
+    &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold");
+
+/* Limit TSO burst size */
+static int			hn_tso_maxlen = IP_MAXPACKET;
+SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
+    &hn_tso_maxlen, 0, "TSO burst limit");
+
+/* Limit chimney send size */
+static int			hn_tx_chimney_size = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
+    &hn_tx_chimney_size, 0, "Chimney send packet size limit");
+
+/* Limit the size of packet for direct transmission */
+static int			hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
+    &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
+
+/* # of LRO entries per RX ring */
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+static int			hn_lro_entry_count = HN_LROENT_CNT_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
+    &hn_lro_entry_count, 0, "LRO entry count");
+#endif
+#endif
+
+static int			hn_tx_taskq_cnt = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
+    &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
+
+#define HN_TX_TASKQ_M_INDEP	0
+#define HN_TX_TASKQ_M_GLOBAL	1
+#define HN_TX_TASKQ_M_EVTTQ	2
+
+static int			hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
+    &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
+    "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
+
+#ifndef HN_USE_TXDESC_BUFRING
+static int			hn_use_txdesc_bufring = 0;
+#else
+static int			hn_use_txdesc_bufring = 1;
+#endif
+SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
+    &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
+
+#ifdef HN_IFSTART_SUPPORT
+/* Use ifnet.if_start instead of ifnet.if_transmit */
+static int			hn_use_if_start = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
+    &hn_use_if_start, 0, "Use if_start TX method");
+#endif
+
+/* # of channels to use */
+static int			hn_chan_cnt = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
+    &hn_chan_cnt, 0,
+    "# of channels to use; each channel has one RX ring and one TX ring");
+
+/* # of transmit rings to use */
+static int			hn_tx_ring_cnt = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
+    &hn_tx_ring_cnt, 0, "# of TX rings to use");
+
+/* Software TX ring deptch */
+static int			hn_tx_swq_depth = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
+    &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
+
+/* Enable sorted LRO, and the depth of the per-channel mbuf queue */
+#if __FreeBSD_version >= 1100095
+static u_int			hn_lro_mbufq_depth = 0;
+SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
+    &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
+#endif
+
+/* Packet transmission aggregation size limit */
+static int			hn_tx_agg_size = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
+    &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
+
+/* Packet transmission aggregation count limit */
+static int			hn_tx_agg_pkts = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
+    &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
+
+/* VF list */
+SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING,
+    0, 0, hn_vflist_sysctl, "A", "VF list");
+
+/* VF mapping */
+SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING,
+    0, 0, hn_vfmap_sysctl, "A", "VF mapping");
+
+/* Transparent VF */
+static int			hn_xpnt_vf = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
+    &hn_xpnt_vf, 0, "Transparent VF mod");
+
+/* Accurate BPF support for Transparent VF */
+static int			hn_xpnt_vf_accbpf = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
+    &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
+
+/* Extra wait for transparent VF attach routing; unit seconds. */
+static int			hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
+    &hn_xpnt_vf_attwait, 0,
+    "Extra wait for transparent VF attach routing; unit: seconds");
+
+static u_int			hn_cpu_index;	/* next CPU for channel */
+static struct taskqueue		**hn_tx_taskque;/* shared TX taskqueues */
+
+static struct rmlock		hn_vfmap_lock;
+static int			hn_vfmap_size;
+static struct ifnet		**hn_vfmap;
+
+static const uint8_t
+hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
+	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+static const struct hyperv_guid	hn_guid = {
+	.hv_guid = {
+	    0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
+	    0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
+};
+
+static device_method_t hn_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		hn_probe),
+	DEVMETHOD(device_attach,	hn_attach),
+	DEVMETHOD(device_detach,	hn_detach),
+	DEVMETHOD(device_shutdown,	hn_shutdown),
+	DEVMETHOD_END
+};
+
+static driver_t hn_driver = {
+	"hn",
+	hn_methods,
+	sizeof(struct hn_softc)
+};
+
+static devclass_t hn_devclass;
+
+DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
+MODULE_VERSION(hn, 1);
+MODULE_DEPEND(hn, vmbus, 1, 1, 1);
+
+#if __FreeBSD_version >= 1100099
+static void
+hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
+{
+	int i;
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
+}
+#endif
+
+static int
+hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+
+	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
+	    txd->chim_size == 0, ("invalid rndis sglist txd"));
+	return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
+	    &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
+}
+
+static int
+hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+	struct hn_nvs_rndis rndis;
+
+	KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
+	    txd->chim_size > 0, ("invalid rndis chim txd"));
+
+	rndis.nvs_type = HN_NVS_TYPE_RNDIS;
+	rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
+	rndis.nvs_chim_idx = txd->chim_index;
+	rndis.nvs_chim_sz = txd->chim_size;
+
+	return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
+	    &rndis, sizeof(rndis), &txd->send_ctx));
+}
+
+static __inline uint32_t
+hn_chim_alloc(struct hn_softc *sc)
+{
+	int i, bmap_cnt = sc->hn_chim_bmap_cnt;
+	u_long *bmap = sc->hn_chim_bmap;
+	uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
+
+	for (i = 0; i < bmap_cnt; ++i) {
+		int idx;
+
+		idx = ffsl(~bmap[i]);
+		if (idx == 0)
+			continue;
+
+		--idx; /* ffsl is 1-based */
+		KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
+		    ("invalid i %d and idx %d", i, idx));
+
+		if (atomic_testandset_long(&bmap[i], idx))
+			continue;
+
+		ret = i * LONG_BIT + idx;
+		break;
+	}
+	return (ret);
+}
+
+static __inline void
+hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
+{
+	u_long mask;
+	uint32_t idx;
+
+	idx = chim_idx / LONG_BIT;
+	KASSERT(idx < sc->hn_chim_bmap_cnt,
+	    ("invalid chimney index 0x%x", chim_idx));
+
+	mask = 1UL << (chim_idx % LONG_BIT);
+	KASSERT(sc->hn_chim_bmap[idx] & mask,
+	    ("index bitmap 0x%lx, chimney index %u, "
+	     "bitmap idx %d, bitmask 0x%lx",
+	     sc->hn_chim_bmap[idx], chim_idx, idx, mask));
+
+	atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
+}
+
+#if defined(INET6) || defined(INET)
+
+#define PULLUP_HDR(m, len)				\
+do {							\
+	if (__predict_false((m)->m_len < (len))) {	\
+		(m) = m_pullup((m), (len));		\
+		if ((m) == NULL)			\
+			return (NULL);			\
+	}						\
+} while (0)
+
+/*
+ * NOTE: If this function failed, the m_head would be freed.
+ */
+static __inline struct mbuf *
+hn_tso_fixup(struct mbuf *m_head)
+{
+	struct ether_vlan_header *evl;
+	struct tcphdr *th;
+	int ehlen;
+
+	KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
+
+	PULLUP_HDR(m_head, sizeof(*evl));
+	evl = mtod(m_head, struct ether_vlan_header *);
+	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
+		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+	else
+		ehlen = ETHER_HDR_LEN;
+	m_head->m_pkthdr.l2hlen = ehlen;
+
+#ifdef INET
+	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
+		struct ip *ip;
+		int iphlen;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
+		ip = mtodo(m_head, ehlen);
+		iphlen = ip->ip_hl << 2;
+		m_head->m_pkthdr.l3hlen = iphlen;
+
+		PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
+		th = mtodo(m_head, ehlen + iphlen);
+
+		ip->ip_len = 0;
+		ip->ip_sum = 0;
+		th->th_sum = in_pseudo(ip->ip_src.s_addr,
+		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
+	}
+#endif
+#if defined(INET6) && defined(INET)
+	else
+#endif
+#ifdef INET6
+	{
+		struct ip6_hdr *ip6;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
+		ip6 = mtodo(m_head, ehlen);
+		if (ip6->ip6_nxt != IPPROTO_TCP) {
+			m_freem(m_head);
+			return (NULL);
+		}
+		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
+		th = mtodo(m_head, ehlen + sizeof(*ip6));
+
+		ip6->ip6_plen = 0;
+		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
+	}
+#endif
+	return (m_head);
+}
+
+/*
+ * NOTE: If this function failed, the m_head would be freed.
+ */
+static __inline struct mbuf *
+hn_set_hlen(struct mbuf *m_head)
+{
+	const struct ether_vlan_header *evl;
+	int ehlen;
+
+	PULLUP_HDR(m_head, sizeof(*evl));
+	evl = mtod(m_head, const struct ether_vlan_header *);
+	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
+		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+	else
+		ehlen = ETHER_HDR_LEN;
+	m_head->m_pkthdr.l2hlen = ehlen;
+
+#ifdef INET
+	if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) {
+		const struct ip *ip;
+		int iphlen;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
+		ip = mtodo(m_head, ehlen);
+		iphlen = ip->ip_hl << 2;
+		m_head->m_pkthdr.l3hlen = iphlen;
+
+		/*
+		 * UDP checksum offload does not work in Azure, if the
+		 * following conditions meet:
+		 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
+		 * - IP_DF is not set in the IP hdr.
+		 *
+		 * Fallback to software checksum for these UDP datagrams.
+		 */
+		if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) &&
+		    m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen &&
+		    (ntohs(ip->ip_off) & IP_DF) == 0) {
+			uint16_t off = ehlen + iphlen;
+
+			counter_u64_add(hn_udpcs_fixup, 1);
+			PULLUP_HDR(m_head, off + sizeof(struct udphdr));
+			*(uint16_t *)(m_head->m_data + off +
+                            m_head->m_pkthdr.csum_data) = in_cksum_skip(
+			    m_head, m_head->m_pkthdr.len, off);
+			m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP;
+		}
+	}
+#endif
+#if defined(INET6) && defined(INET)
+	else
+#endif
+#ifdef INET6
+	{
+		const struct ip6_hdr *ip6;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
+		ip6 = mtodo(m_head, ehlen);
+		if (ip6->ip6_nxt != IPPROTO_TCP) {
+			m_freem(m_head);
+			return (NULL);
+		}
+		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
+	}
+#endif
+	return (m_head);
+}
+
+/*
+ * NOTE: If this function failed, the m_head would be freed.
+ */
+static __inline struct mbuf *
+hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
+{
+	const struct tcphdr *th;
+	int ehlen, iphlen;
+
+	*tcpsyn = 0;
+	ehlen = m_head->m_pkthdr.l2hlen;
+	iphlen = m_head->m_pkthdr.l3hlen;
+
+	PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
+	th = mtodo(m_head, ehlen + iphlen);
+	if (th->th_flags & TH_SYN)
+		*tcpsyn = 1;
+	return (m_head);
+}
+
+#undef PULLUP_HDR
+
+#endif	/* INET6 || INET */
+
+static int
+hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
+{
+	int error = 0;
+
+	HN_LOCK_ASSERT(sc);
+
+	if (sc->hn_rx_filter != filter) {
+		error = hn_rndis_set_rxfilter(sc, filter);
+		if (!error)
+			sc->hn_rx_filter = filter;
+	}
+	return (error);
+}
+
+static int
+hn_rxfilter_config(struct hn_softc *sc)
+{
+	struct ifnet *ifp = sc->hn_ifp;
+	uint32_t filter;
+
+	HN_LOCK_ASSERT(sc);
+
+	/*
+	 * If the non-transparent mode VF is activated, we don't know how
+	 * its RX filter is configured, so stick the synthetic device in
+	 * the promiscous mode.
+	 */
+	if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
+		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
+	} else {
+		filter = NDIS_PACKET_TYPE_DIRECTED;
+		if (ifp->if_flags & IFF_BROADCAST)
+			filter |= NDIS_PACKET_TYPE_BROADCAST;
+		/* TODO: support multicast list */
+		if ((ifp->if_flags & IFF_ALLMULTI) ||
+		    !TAILQ_EMPTY(&ifp->if_multiaddrs))
+			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+	}
+	return (hn_set_rxfilter(sc, filter));
+}
+
+static void
+hn_set_txagg(struct hn_softc *sc)
+{
+	uint32_t size, pkts;
+	int i;
+
+	/*
+	 * Setup aggregation size.
+	 */
+	if (sc->hn_agg_size < 0)
+		size = UINT32_MAX;
+	else
+		size = sc->hn_agg_size;
+
+	if (sc->hn_rndis_agg_size < size)
+		size = sc->hn_rndis_agg_size;
+
+	/* NOTE: We only aggregate packets using chimney sending buffers. */
+	if (size > (uint32_t)sc->hn_chim_szmax)
+		size = sc->hn_chim_szmax;
+
+	if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
+		/* Disable */
+		size = 0;
+		pkts = 0;
+		goto done;
+	}
+
+	/* NOTE: Type of the per TX ring setting is 'int'. */
+	if (size > INT_MAX)
+		size = INT_MAX;
+
+	/*
+	 * Setup aggregation packet count.
+	 */
+	if (sc->hn_agg_pkts < 0)
+		pkts = UINT32_MAX;
+	else
+		pkts = sc->hn_agg_pkts;
+
+	if (sc->hn_rndis_agg_pkts < pkts)
+		pkts = sc->hn_rndis_agg_pkts;
+
+	if (pkts <= 1) {
+		/* Disable */
+		size = 0;
+		pkts = 0;
+		goto done;
+	}
+
+	/* NOTE: Type of the per TX ring setting is 'short'. */
+	if (pkts > SHRT_MAX)
+		pkts = SHRT_MAX;
+
+done:
+	/* NOTE: Type of the per TX ring setting is 'short'. */
+	if (sc->hn_rndis_agg_align > SHRT_MAX) {
+		/* Disable */
+		size = 0;
+		pkts = 0;
+	}
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
+		    size, pkts, sc->hn_rndis_agg_align);
+	}
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+		mtx_lock(&txr->hn_tx_lock);
+		txr->hn_agg_szmax = size;
+		txr->hn_agg_pktmax = pkts;
+		txr->hn_agg_align = sc->hn_rndis_agg_align;
+		mtx_unlock(&txr->hn_tx_lock);
+	}
+}
+
+static int
+hn_get_txswq_depth(const struct hn_tx_ring *txr)
+{
+
+	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
+	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
+		return txr->hn_txdesc_cnt;
+	return hn_tx_swq_depth;
+}
+
+static int
+hn_rss_reconfig(struct hn_softc *sc)
+{
+	int error;
+
+	HN_LOCK_ASSERT(sc);
+
+	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
+		return (ENXIO);
+
+	/*
+	 * Disable RSS first.
+	 *
+	 * NOTE:
+	 * Direct reconfiguration by setting the UNCHG flags does
+	 * _not_ work properly.
+	 */
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "disable RSS\n");
+	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
+	if (error) {
+		if_printf(sc->hn_ifp, "RSS disable failed\n");
+		return (error);
+	}
+
+	/*
+	 * Reenable the RSS w/ the updated RSS key or indirect
+	 * table.
+	 */
+	if (bootverbose)
+		if_printf(sc->hn_ifp, "reconfig RSS\n");
+	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
+	if (error) {
+		if_printf(sc->hn_ifp, "RSS reconfig failed\n");
+		return (error);
+	}
+	return (0);
+}
+
+static void
+hn_rss_ind_fixup(struct hn_softc *sc)
+{
+	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
+	int i, nchan;
+
+	nchan = sc->hn_rx_ring_inuse;
+	KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
+
+	/*
+	 * Check indirect table to make sure that all channels in it
+	 * can be used.
+	 */
+	for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
+		if (rss->rss_ind[i] >= nchan) {
+			if_printf(sc->hn_ifp,
+			    "RSS indirect table %d fixup: %u -> %d\n",
+			    i, rss->rss_ind[i], nchan - 1);
+			rss->rss_ind[i] = nchan - 1;
+		}
+	}
+}
+
+static int
+hn_ifmedia_upd(struct ifnet *ifp __unused)
+{
+
+	return EOPNOTSUPP;
+}
+
+static void
+hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+	struct hn_softc *sc = ifp->if_softc;
+
+	ifmr->ifm_status = IFM_AVALID;
+	ifmr->ifm_active = IFM_ETHER;
+
+	if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
+		ifmr->ifm_active |= IFM_NONE;
+		return;
+	}
+	ifmr->ifm_status |= IFM_ACTIVE;
+	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
+}
+
+static void
+hn_rxvf_set_task(void *xarg, int pending __unused)
+{
+	struct hn_rxvf_setarg *arg = xarg;
+
+	arg->rxr->hn_rxvf_ifp = arg->vf_ifp;
+}
+
+static void
+hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp)
+{
+	struct hn_rx_ring *rxr;
+	struct hn_rxvf_setarg arg;
+	struct task task;
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	TASK_INIT(&task, 0, hn_rxvf_set_task, &arg);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+
+		if (i < sc->hn_rx_ring_inuse) {
+			arg.rxr = rxr;
+			arg.vf_ifp = vf_ifp;
+			vmbus_chan_run_task(rxr->hn_chan, &task);
+		} else {
+			rxr->hn_rxvf_ifp = vf_ifp;
+		}
+	}
+}
+
+static bool
+hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp)
+{
+	const struct ifnet *hn_ifp;
+
+	hn_ifp = sc->hn_ifp;
+
+	if (ifp == hn_ifp)
+		return (false);
+
+	if (ifp->if_alloctype != IFT_ETHER)
+		return (false);
+
+	/* Ignore lagg/vlan interfaces */
+	if (strcmp(ifp->if_dname, "lagg") == 0 ||
+	    strcmp(ifp->if_dname, "vlan") == 0)
+		return (false);
+
+	if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
+		return (false);
+
+	return (true);
+}
+
+static void
+hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf)
+{
+	struct ifnet *hn_ifp;
+
+	HN_LOCK(sc);
+
+	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
+		goto out;
+
+	if (!hn_ismyvf(sc, ifp))
+		goto out;
+	hn_ifp = sc->hn_ifp;
+
+	if (rxvf) {
+		if (sc->hn_flags & HN_FLAG_RXVF)
+			goto out;
+
+		sc->hn_flags |= HN_FLAG_RXVF;
+		hn_rxfilter_config(sc);
+	} else {
+		if (!(sc->hn_flags & HN_FLAG_RXVF))
+			goto out;
+
+		sc->hn_flags &= ~HN_FLAG_RXVF;
+		if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
+			hn_rxfilter_config(sc);
+		else
+			hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
+	}
+
+	hn_nvs_set_datapath(sc,
+	    rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
+
+	hn_rxvf_set(sc, rxvf ? ifp : NULL);
+
+	if (rxvf) {
+		hn_vf_rss_fixup(sc, true);
+		hn_suspend_mgmt(sc);
+		sc->hn_link_flags &=
+		    ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
+		if_link_state_change(hn_ifp, LINK_STATE_DOWN);
+	} else {
+		hn_vf_rss_restore(sc);
+		hn_resume_mgmt(sc);
+	}
+
+	devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname,
+	    rxvf ? "VF_UP" : "VF_DOWN", NULL);
+
+	if (bootverbose) {
+		if_printf(hn_ifp, "datapath is switched %s %s\n",
+		    rxvf ? "to" : "from", ifp->if_xname);
+	}
+out:
+	HN_UNLOCK(sc);
+}
+
+static void
+hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
+{
+
+	if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
+		return;
+	hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP);
+}
+
+static void
+hn_ifaddr_event(void *arg, struct ifnet *ifp)
+{
+
+	hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP);
+}
+
+static int
+hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr)
+{
+	struct ifnet *ifp, *vf_ifp;
+	uint64_t tmp;
+	int error;
+
+	HN_LOCK_ASSERT(sc);
+	ifp = sc->hn_ifp;
+	vf_ifp = sc->hn_vf_ifp;
+
+	/*
+	 * Fix up requested capabilities w/ supported capabilities,
+	 * since the supported capabilities could have been changed.
+	 */
+	ifr->ifr_reqcap &= ifp->if_capabilities;
+	/* Pass SIOCSIFCAP to VF. */
+	error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr);
+
+	/*
+	 * NOTE:
+	 * The error will be propagated to the callers, however, it
+	 * is _not_ useful here.
+	 */
+
+	/*
+	 * Merge VF's enabled capabilities.
+	 */
+	ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities;
+
+	tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc);
+	if (ifp->if_capenable & IFCAP_TXCSUM)
+		ifp->if_hwassist |= tmp;
+	else
+		ifp->if_hwassist &= ~tmp;
+
+	tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc);
+	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+		ifp->if_hwassist |= tmp;
+	else
+		ifp->if_hwassist &= ~tmp;
+
+	tmp = vf_ifp->if_hwassist & CSUM_IP_TSO;
+	if (ifp->if_capenable & IFCAP_TSO4)
+		ifp->if_hwassist |= tmp;
+	else
+		ifp->if_hwassist &= ~tmp;
+
+	tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO;
+	if (ifp->if_capenable & IFCAP_TSO6)
+		ifp->if_hwassist |= tmp;
+	else
+		ifp->if_hwassist &= ~tmp;
+
+	return (error);
+}
+
+static int
+hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
+{
+	struct ifnet *vf_ifp;
+	struct ifreq ifr;
+
+	HN_LOCK_ASSERT(sc);
+	vf_ifp = sc->hn_vf_ifp;
+
+	memset(&ifr, 0, sizeof(ifr));
+	strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+	ifr.ifr_flags = vf_ifp->if_flags & 0xffff;
+	ifr.ifr_flagshigh = vf_ifp->if_flags >> 16;
+	return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
+}
+
+static void
+hn_xpnt_vf_saveifflags(struct hn_softc *sc)
+{
+	struct ifnet *ifp = sc->hn_ifp;
+	int allmulti = 0;
+
+	HN_LOCK_ASSERT(sc);
+
+	/* XXX vlan(4) style mcast addr maintenance */
+	if (!TAILQ_EMPTY(&ifp->if_multiaddrs))
+		allmulti = IFF_ALLMULTI;
+
+	/* Always set the VF's if_flags */
+	sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti;
+}
+
+static void
+hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
+{
+	struct rm_priotracker pt;
+	struct ifnet *hn_ifp = NULL;
+	struct mbuf *mn;
+
+	/*
+	 * XXX racy, if hn(4) ever detached.
+	 */
+	rm_rlock(&hn_vfmap_lock, &pt);
+	if (vf_ifp->if_index < hn_vfmap_size)
+		hn_ifp = hn_vfmap[vf_ifp->if_index];
+	rm_runlock(&hn_vfmap_lock, &pt);
+
+	if (hn_ifp != NULL) {
+		for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
+			/*
+			 * Allow tapping on the VF.
+			 */
+			ETHER_BPF_MTAP(vf_ifp, mn);
+
+			/*
+			 * Update VF stats.
+			 */
+			if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) {
+				if_inc_counter(vf_ifp, IFCOUNTER_IBYTES,
+				    mn->m_pkthdr.len);
+			}
+			/*
+			 * XXX IFCOUNTER_IMCAST
+			 * This stat updating is kinda invasive, since it
+			 * requires two checks on the mbuf: the length check
+			 * and the ethernet header check.  As of this write,
+			 * all multicast packets go directly to hn(4), which
+			 * makes imcast stat updating in the VF a try in vian.
+			 */
+
+			/*
+			 * Fix up rcvif and increase hn(4)'s ipackets.
+			 */
+			mn->m_pkthdr.rcvif = hn_ifp;
+			if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
+		}
+		/*
+		 * Go through hn(4)'s if_input.
+		 */
+		hn_ifp->if_input(hn_ifp, m);
+	} else {
+		/*
+		 * In the middle of the transition; free this
+		 * mbuf chain.
+		 */
+		while (m != NULL) {
+			mn = m->m_nextpkt;
+			m->m_nextpkt = NULL;
+			m_freem(m);
+			m = mn;
+		}
+	}
+}
+
+static void
+hn_mtu_change_fixup(struct hn_softc *sc)
+{
+	struct ifnet *ifp;
+
+	HN_LOCK_ASSERT(sc);
+	ifp = sc->hn_ifp;
+
+	hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
+#if __FreeBSD_version >= 1100099
+	if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
+		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
+#endif
+}
+
+static uint32_t
+hn_rss_type_fromndis(uint32_t rss_hash)
+{
+	uint32_t types = 0;
+
+	if (rss_hash & NDIS_HASH_IPV4)
+		types |= RSS_TYPE_IPV4;
+	if (rss_hash & NDIS_HASH_TCP_IPV4)
+		types |= RSS_TYPE_TCP_IPV4;
+	if (rss_hash & NDIS_HASH_IPV6)
+		types |= RSS_TYPE_IPV6;
+	if (rss_hash & NDIS_HASH_IPV6_EX)
+		types |= RSS_TYPE_IPV6_EX;
+	if (rss_hash & NDIS_HASH_TCP_IPV6)
+		types |= RSS_TYPE_TCP_IPV6;
+	if (rss_hash & NDIS_HASH_TCP_IPV6_EX)
+		types |= RSS_TYPE_TCP_IPV6_EX;
+	if (rss_hash & NDIS_HASH_UDP_IPV4_X)
+		types |= RSS_TYPE_UDP_IPV4;
+	return (types);
+}
+
+static uint32_t
+hn_rss_type_tondis(uint32_t types)
+{
+	uint32_t rss_hash = 0;
+
+	KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0,
+	    ("UDP6 and UDP6EX are not supported"));
+
+	if (types & RSS_TYPE_IPV4)
+		rss_hash |= NDIS_HASH_IPV4;
+	if (types & RSS_TYPE_TCP_IPV4)
+		rss_hash |= NDIS_HASH_TCP_IPV4;
+	if (types & RSS_TYPE_IPV6)
+		rss_hash |= NDIS_HASH_IPV6;
+	if (types & RSS_TYPE_IPV6_EX)
+		rss_hash |= NDIS_HASH_IPV6_EX;
+	if (types & RSS_TYPE_TCP_IPV6)
+		rss_hash |= NDIS_HASH_TCP_IPV6;
+	if (types & RSS_TYPE_TCP_IPV6_EX)
+		rss_hash |= NDIS_HASH_TCP_IPV6_EX;
+	if (types & RSS_TYPE_UDP_IPV4)
+		rss_hash |= NDIS_HASH_UDP_IPV4_X;
+	return (rss_hash);
+}
+
+static void
+hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash;
+}
+
+static void
+hn_vf_rss_fixup(struct hn_softc *sc, bool reconf)
+{
+	struct ifnet *ifp, *vf_ifp;
+	struct ifrsshash ifrh;
+	struct ifrsskey ifrk;
+	int error;
+	uint32_t my_types, diff_types, mbuf_types = 0;
+
+	HN_LOCK_ASSERT(sc);
+	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
+	    ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
+
+	if (sc->hn_rx_ring_inuse == 1) {
+		/* No RSS on synthetic parts; done. */
+		return;
+	}
+	if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) {
+		/* Synthetic parts do not support Toeplitz; done. */
+		return;
+	}
+
+	ifp = sc->hn_ifp;
+	vf_ifp = sc->hn_vf_ifp;
+
+	/*
+	 * Extract VF's RSS key.  Only 40 bytes key for Toeplitz is
+	 * supported.
+	 */
+	memset(&ifrk, 0, sizeof(ifrk));
+	strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name));
+	error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk);
+	if (error) {
+		if_printf(ifp, "%s SIOCGRSSKEY failed: %d\n",
+		    vf_ifp->if_xname, error);
+		goto done;
+	}
+	if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) {
+		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
+		    vf_ifp->if_xname, ifrk.ifrk_func);
+		goto done;
+	}
+	if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) {
+		if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n",
+		    vf_ifp->if_xname, ifrk.ifrk_keylen);
+		goto done;
+	}
+
+	/*
+	 * Extract VF's RSS hash.  Only Toeplitz is supported.
+	 */
+	memset(&ifrh, 0, sizeof(ifrh));
+	strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name));
+	error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh);
+	if (error) {
+		if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n",
+		    vf_ifp->if_xname, error);
+		goto done;
+	}
+	if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) {
+		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
+		    vf_ifp->if_xname, ifrh.ifrh_func);
+		goto done;
+	}
+
+	my_types = hn_rss_type_fromndis(sc->hn_rss_hcap);
+	if ((ifrh.ifrh_types & my_types) == 0) {
+		/* This disables RSS; ignore it then */
+		if_printf(ifp, "%s intersection of RSS types failed.  "
+		    "VF %#x, mine %#x\n", vf_ifp->if_xname,
+		    ifrh.ifrh_types, my_types);
+		goto done;
+	}
+
+	diff_types = my_types ^ ifrh.ifrh_types;
+	my_types &= ifrh.ifrh_types;
+	mbuf_types = my_types;
+
+	/*
+	 * Detect RSS hash value/type confliction.
+	 *
+	 * NOTE:
+	 * We don't disable the hash type, but stop delivery the hash
+	 * value/type through mbufs on RX path.
+	 *
+	 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple
+	 * hash is delivered with type of TCP_IPV4.  This means if
+	 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at
+	 * least to hn_mbuf_hash.  However, given that _all_ of the
+	 * NICs implement TCP_IPV4, this will _not_ impose any issues
+	 * here.
+	 */
+	if ((my_types & RSS_TYPE_IPV4) &&
+	    (diff_types & ifrh.ifrh_types &
+	     (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) {
+		/* Conflict; disable IPV4 hash type/value delivery. */
+		if_printf(ifp, "disable IPV4 mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_IPV4;
+	}
+	if ((my_types & RSS_TYPE_IPV6) &&
+	    (diff_types & ifrh.ifrh_types &
+	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
+	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
+	      RSS_TYPE_IPV6_EX))) {
+		/* Conflict; disable IPV6 hash type/value delivery. */
+		if_printf(ifp, "disable IPV6 mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_IPV6;
+	}
+	if ((my_types & RSS_TYPE_IPV6_EX) &&
+	    (diff_types & ifrh.ifrh_types &
+	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
+	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
+	      RSS_TYPE_IPV6))) {
+		/* Conflict; disable IPV6_EX hash type/value delivery. */
+		if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_IPV6_EX;
+	}
+	if ((my_types & RSS_TYPE_TCP_IPV6) &&
+	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) {
+		/* Conflict; disable TCP_IPV6 hash type/value delivery. */
+		if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_TCP_IPV6;
+	}
+	if ((my_types & RSS_TYPE_TCP_IPV6_EX) &&
+	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) {
+		/* Conflict; disable TCP_IPV6_EX hash type/value delivery. */
+		if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX;
+	}
+	if ((my_types & RSS_TYPE_UDP_IPV6) &&
+	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) {
+		/* Conflict; disable UDP_IPV6 hash type/value delivery. */
+		if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_UDP_IPV6;
+	}
+	if ((my_types & RSS_TYPE_UDP_IPV6_EX) &&
+	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) {
+		/* Conflict; disable UDP_IPV6_EX hash type/value delivery. */
+		if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n");
+		mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX;
+	}
+
+	/*
+	 * Indirect table does not matter.
+	 */
+
+	sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) |
+	    hn_rss_type_tondis(my_types);
+	memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key));
+	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
+
+	if (reconf) {
+		error = hn_rss_reconfig(sc);
+		if (error) {
+			/* XXX roll-back? */
+			if_printf(ifp, "hn_rss_reconfig failed: %d\n", error);
+			/* XXX keep going. */
+		}
+	}
+done:
+	/* Hash deliverability for mbufs. */
+	hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types));
+}
+
+static void
+hn_vf_rss_restore(struct hn_softc *sc)
+{
+
+	HN_LOCK_ASSERT(sc);
+	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
+	    ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
+
+	if (sc->hn_rx_ring_inuse == 1)
+		goto done;
+
+	/*
+	 * Restore hash types.  Key does _not_ matter.
+	 */
+	if (sc->hn_rss_hash != sc->hn_rss_hcap) {
+		int error;
+
+		sc->hn_rss_hash = sc->hn_rss_hcap;
+		error = hn_rss_reconfig(sc);
+		if (error) {
+			if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n",
+			    error);
+			/* XXX keep going. */
+		}
+	}
+done:
+	/* Hash deliverability for mbufs. */
+	hn_rss_mbuf_hash(sc, NDIS_HASH_ALL);
+}
+
+static void
+hn_xpnt_vf_setready(struct hn_softc *sc)
+{
+	struct ifnet *ifp, *vf_ifp;
+	struct ifreq ifr;
+
+	HN_LOCK_ASSERT(sc);
+	ifp = sc->hn_ifp;
+	vf_ifp = sc->hn_vf_ifp;
+
+	/*
+	 * Mark the VF ready.
+	 */
+	sc->hn_vf_rdytick = 0;
+
+	/*
+	 * Save information for restoration.
+	 */
+	sc->hn_saved_caps = ifp->if_capabilities;
+	sc->hn_saved_tsomax = ifp->if_hw_tsomax;
+	sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount;
+	sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize;
+
+	/*
+	 * Intersect supported/enabled capabilities.
+	 *
+	 * NOTE:
+	 * if_hwassist is not changed here.
+	 */
+	ifp->if_capabilities &= vf_ifp->if_capabilities;
+	ifp->if_capenable &= ifp->if_capabilities;
+
+	/*
+	 * Fix TSO settings.
+	 */
+	if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax)
+		ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax;
+	if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount)
+		ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount;
+	if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize)
+		ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize;
+
+	/*
+	 * Change VF's enabled capabilities.
+	 */
+	memset(&ifr, 0, sizeof(ifr));
+	strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+	ifr.ifr_reqcap = ifp->if_capenable;
+	hn_xpnt_vf_iocsetcaps(sc, &ifr);
+
+	if (ifp->if_mtu != ETHERMTU) {
+		int error;
+
+		/*
+		 * Change VF's MTU.
+		 */
+		memset(&ifr, 0, sizeof(ifr));
+		strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+		ifr.ifr_mtu = ifp->if_mtu;
+		error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr);
+		if (error) {
+			if_printf(ifp, "%s SIOCSIFMTU %lu failed\n",
+			    vf_ifp->if_xname, ifp->if_mtu);
+			if (ifp->if_mtu > ETHERMTU) {
+				if_printf(ifp, "change MTU to %d\n", ETHERMTU);
+
+				/*
+				 * XXX
+				 * No need to adjust the synthetic parts' MTU;
+				 * failure of the adjustment will cause us
+				 * infinite headache.
+				 */
+				ifp->if_mtu = ETHERMTU;
+				hn_mtu_change_fixup(sc);
+			}
+		}
+	}
+}
+
+static bool
+hn_xpnt_vf_isready(struct hn_softc *sc)
+{
+
+	HN_LOCK_ASSERT(sc);
+
+	if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
+		return (false);
+
+	if (sc->hn_vf_rdytick == 0)
+		return (true);
+
+	if (sc->hn_vf_rdytick > ticks)
+		return (false);
+
+	/* Mark VF as ready. */
+	hn_xpnt_vf_setready(sc);
+	return (true);
+}
+
+static void
+hn_xpnt_vf_setenable(struct hn_softc *sc)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+	rm_wlock(&sc->hn_vf_lock);
+	sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
+	rm_wunlock(&sc->hn_vf_lock);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF;
+}
+
+static void
+hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+	rm_wlock(&sc->hn_vf_lock);
+	sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
+	if (clear_vf)
+		sc->hn_vf_ifp = NULL;
+	rm_wunlock(&sc->hn_vf_lock);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF;
+}
+
+static void
+hn_xpnt_vf_init(struct hn_softc *sc)
+{
+	int error;
+
+	HN_LOCK_ASSERT(sc);
+
+	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+	    ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "try bringing up %s\n",
+		    sc->hn_vf_ifp->if_xname);
+	}
+
+	/*
+	 * Bring the VF up.
+	 */
+	hn_xpnt_vf_saveifflags(sc);
+	sc->hn_vf_ifp->if_flags |= IFF_UP;
+	error = hn_xpnt_vf_iocsetflags(sc);
+	if (error) {
+		if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
+		    sc->hn_vf_ifp->if_xname, error);
+		return;
+	}
+
+	/*
+	 * NOTE:
+	 * Datapath setting must happen _after_ bringing the VF up.
+	 */
+	hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+
+	/*
+	 * NOTE:
+	 * Fixup RSS related bits _after_ the VF is brought up, since
+	 * many VFs generate RSS key during it's initialization.
+	 */
+	hn_vf_rss_fixup(sc, true);
+
+	/* Mark transparent mode VF as enabled. */
+	hn_xpnt_vf_setenable(sc);
+}
+
+static void
+hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
+{
+	struct hn_softc *sc = xsc;
+
+	HN_LOCK(sc);
+
+	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
+		goto done;
+	if (sc->hn_vf_ifp == NULL)
+		goto done;
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+		goto done;
+
+	if (sc->hn_vf_rdytick != 0) {
+		/* Mark VF as ready. */
+		hn_xpnt_vf_setready(sc);
+	}
+
+	if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) {
+		/*
+		 * Delayed VF initialization.
+		 */
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "delayed initialize %s\n",
+			    sc->hn_vf_ifp->if_xname);
+		}
+		hn_xpnt_vf_init(sc);
+	}
+done:
+	HN_UNLOCK(sc);
+}
+
+static void
+hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
+{
+	struct hn_softc *sc = xsc;
+
+	HN_LOCK(sc);
+
+	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
+		goto done;
+
+	if (!hn_ismyvf(sc, ifp))
+		goto done;
+
+	if (sc->hn_vf_ifp != NULL) {
+		if_printf(sc->hn_ifp, "%s was attached as VF\n",
+		    sc->hn_vf_ifp->if_xname);
+		goto done;
+	}
+
+	if (hn_xpnt_vf && ifp->if_start != NULL) {
+		/*
+		 * ifnet.if_start is _not_ supported by transparent
+		 * mode VF; mainly due to the IFF_DRV_OACTIVE flag.
+		 */
+		if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
+		    "in transparent VF mode.\n", ifp->if_xname);
+		goto done;
+	}
+
+	rm_wlock(&hn_vfmap_lock);
+
+	if (ifp->if_index >= hn_vfmap_size) {
+		struct ifnet **newmap;
+		int newsize;
+
+		newsize = ifp->if_index + HN_VFMAP_SIZE_DEF;
+		newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF,
+		    M_WAITOK | M_ZERO);
+
+		memcpy(newmap, hn_vfmap,
+		    sizeof(struct ifnet *) * hn_vfmap_size);
+		free(hn_vfmap, M_DEVBUF);
+		hn_vfmap = newmap;
+		hn_vfmap_size = newsize;
+	}
+	KASSERT(hn_vfmap[ifp->if_index] == NULL,
+	    ("%s: ifindex %d was mapped to %s",
+	     ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname));
+	hn_vfmap[ifp->if_index] = sc->hn_ifp;
+
+	rm_wunlock(&hn_vfmap_lock);
+
+	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+	rm_wlock(&sc->hn_vf_lock);
+	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+	    ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
+	sc->hn_vf_ifp = ifp;
+	rm_wunlock(&sc->hn_vf_lock);
+
+	if (hn_xpnt_vf) {
+		int wait_ticks;
+
+		/*
+		 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
+		 * Save vf_ifp's current if_input for later restoration.
+		 */
+		sc->hn_vf_input = ifp->if_input;
+		ifp->if_input = hn_xpnt_vf_input;
+
+		/*
+		 * Stop link status management; use the VF's.
+		 */
+		hn_suspend_mgmt(sc);
+
+		/*
+		 * Give VF sometime to complete its attach routing.
+		 */
+		wait_ticks = hn_xpnt_vf_attwait * hz;
+		sc->hn_vf_rdytick = ticks + wait_ticks;
+
+		taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
+		    wait_ticks);
+	}
+done:
+	HN_UNLOCK(sc);
+}
+
+static void
+hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
+{
+	struct hn_softc *sc = xsc;
+
+	HN_LOCK(sc);
+
+	if (sc->hn_vf_ifp == NULL)
+		goto done;
+
+	if (!hn_ismyvf(sc, ifp))
+		goto done;
+
+	if (hn_xpnt_vf) {
+		/*
+		 * Make sure that the delayed initialization is not running.
+		 *
+		 * NOTE:
+		 * - This lock _must_ be released, since the hn_vf_init task
+		 *   will try holding this lock.
+		 * - It is safe to release this lock here, since the
+		 *   hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
+		 *
+		 * XXX racy, if hn(4) ever detached.
+		 */
+		HN_UNLOCK(sc);
+		taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
+		HN_LOCK(sc);
+
+		KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
+		    sc->hn_ifp->if_xname));
+		ifp->if_input = sc->hn_vf_input;
+		sc->hn_vf_input = NULL;
+
+		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) &&
+		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED))
+			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+		if (sc->hn_vf_rdytick == 0) {
+			/*
+			 * The VF was ready; restore some settings.
+			 */
+			sc->hn_ifp->if_capabilities = sc->hn_saved_caps;
+			/*
+			 * NOTE:
+			 * There is _no_ need to fixup if_capenable and
+			 * if_hwassist, since the if_capabilities before
+			 * restoration was an intersection of the VF's
+			 * if_capabilites and the synthetic device's
+			 * if_capabilites.
+			 */
+			sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax;
+			sc->hn_ifp->if_hw_tsomaxsegcount =
+			    sc->hn_saved_tsosegcnt;
+			sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz;
+		}
+
+		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
+			/*
+			 * Restore RSS settings.
+			 */
+			hn_vf_rss_restore(sc);
+
+			/*
+			 * Resume link status management, which was suspended
+			 * by hn_ifnet_attevent().
+			 */
+			hn_resume_mgmt(sc);
+		}
+	}
+
+	/* Mark transparent mode VF as disabled. */
+	hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */);
+
+	rm_wlock(&hn_vfmap_lock);
+
+	KASSERT(ifp->if_index < hn_vfmap_size,
+	    ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size));
+	if (hn_vfmap[ifp->if_index] != NULL) {
+		KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp,
+		    ("%s: ifindex %d was mapped to %s",
+		     ifp->if_xname, ifp->if_index,
+		     hn_vfmap[ifp->if_index]->if_xname));
+		hn_vfmap[ifp->if_index] = NULL;
+	}
+
+	rm_wunlock(&hn_vfmap_lock);
+done:
+	HN_UNLOCK(sc);
+}
+
+static void
+hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state)
+{
+	struct hn_softc *sc = xsc;
+
+	if (sc->hn_vf_ifp == ifp)
+		if_link_state_change(sc->hn_ifp, link_state);
+}
+
+static int
+hn_probe(device_t dev)
+{
+
+	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) {
+		device_set_desc(dev, "Hyper-V Network Interface");
+		return BUS_PROBE_DEFAULT;
+	}
+	return ENXIO;
+}
+
+static int
+hn_attach(device_t dev)
+{
+	struct hn_softc *sc = device_get_softc(dev);
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	uint8_t eaddr[ETHER_ADDR_LEN];
+	struct ifnet *ifp = NULL;
+	int error, ring_cnt, tx_ring_cnt;
+	uint32_t mtu;
+
+	sc->hn_dev = dev;
+	sc->hn_prichan = vmbus_get_channel(dev);
+	HN_LOCK_INIT(sc);
+	rm_init(&sc->hn_vf_lock, "hnvf");
+	if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
+		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
+
+	/*
+	 * Initialize these tunables once.
+	 */
+	sc->hn_agg_size = hn_tx_agg_size;
+	sc->hn_agg_pkts = hn_tx_agg_pkts;
+
+	/*
+	 * Setup taskqueue for transmission.
+	 */
+	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
+		int i;
+
+		sc->hn_tx_taskqs =
+		    malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
+		    M_DEVBUF, M_WAITOK);
+		for (i = 0; i < hn_tx_taskq_cnt; ++i) {
+			sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
+			    M_WAITOK, taskqueue_thread_enqueue,
+			    &sc->hn_tx_taskqs[i]);
+			taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
+			    "%s tx%d", device_get_nameunit(dev), i);
+		}
+	} else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
+		sc->hn_tx_taskqs = hn_tx_taskque;
+	}
+
+	/*
+	 * Setup taskqueue for mangement tasks, e.g. link status.
+	 */
+	sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
+	    taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
+	taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
+	    device_get_nameunit(dev));
+	TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
+	TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
+	TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
+	    hn_netchg_status_taskfunc, sc);
+
+	if (hn_xpnt_vf) {
+		/*
+		 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
+		 */
+		sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
+		    taskqueue_thread_enqueue, &sc->hn_vf_taskq);
+		taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
+		    device_get_nameunit(dev));
+		TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
+		    hn_xpnt_vf_init_taskfunc, sc);
+	}
+
+	/*
+	 * Allocate ifnet and setup its name earlier, so that if_printf
+	 * can be used by functions, which will be called after
+	 * ether_ifattach().
+	 */
+	ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
+	ifp->if_softc = sc;
+	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+
+	/*
+	 * Initialize ifmedia earlier so that it can be unconditionally
+	 * destroyed, if error happened later on.
+	 */
+	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
+
+	/*
+	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
+	 * to use (tx_ring_cnt).
+	 *
+	 * NOTE:
+	 * The # of RX rings to use is same as the # of channels to use.
+	 */
+	ring_cnt = hn_chan_cnt;
+	if (ring_cnt <= 0) {
+		/* Default */
+		ring_cnt = mp_ncpus;
+		if (ring_cnt > HN_RING_CNT_DEF_MAX)
+			ring_cnt = HN_RING_CNT_DEF_MAX;
+	} else if (ring_cnt > mp_ncpus) {
+		ring_cnt = mp_ncpus;
+	}
+
+	tx_ring_cnt = hn_tx_ring_cnt;
+	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
+		tx_ring_cnt = ring_cnt;
+#ifdef HN_IFSTART_SUPPORT
+	if (hn_use_if_start) {
+		/* ifnet.if_start only needs one TX ring. */
+		tx_ring_cnt = 1;
+	}
+#endif
+
+	/*
+	 * Set the leader CPU for channels.
+	 */
+	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
+
+	/*
+	 * Create enough TX/RX rings, even if only limited number of
+	 * channels can be allocated.
+	 */
+	error = hn_create_tx_data(sc, tx_ring_cnt);
+	if (error)
+		goto failed;
+	error = hn_create_rx_data(sc, ring_cnt);
+	if (error)
+		goto failed;
+
+	/*
+	 * Create transaction context for NVS and RNDIS transactions.
+	 */
+	sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
+	    HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
+	if (sc->hn_xact == NULL) {
+		error = ENXIO;
+		goto failed;
+	}
+
+	/*
+	 * Install orphan handler for the revocation of this device's
+	 * primary channel.
+	 *
+	 * NOTE:
+	 * The processing order is critical here:
+	 * Install the orphan handler, _before_ testing whether this
+	 * device's primary channel has been revoked or not.
+	 */
+	vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
+	if (vmbus_chan_is_revoked(sc->hn_prichan)) {
+		error = ENXIO;
+		goto failed;
+	}
+
+	/*
+	 * Attach the synthetic parts, i.e. NVS and RNDIS.
+	 */
+	error = hn_synth_attach(sc, ETHERMTU);
+	if (error)
+		goto failed;
+
+	error = hn_rndis_get_eaddr(sc, eaddr);
+	if (error)
+		goto failed;
+
+	error = hn_rndis_get_mtu(sc, &mtu);
+	if (error)
+		mtu = ETHERMTU;
+	else if (bootverbose)
+		device_printf(dev, "RNDIS mtu %u\n", mtu);
+
+#if __FreeBSD_version >= 1100099
+	if (sc->hn_rx_ring_inuse > 1) {
+		/*
+		 * Reduce TCP segment aggregation limit for multiple
+		 * RX rings to increase ACK timeliness.
+		 */
+		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
+	}
+#endif
+
+	/*
+	 * Fixup TX/RX stuffs after synthetic parts are attached.
+	 */
+	hn_fixup_tx_data(sc);
+	hn_fixup_rx_data(sc);
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
+	    &sc->hn_nvs_ver, 0, "NVS version");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_ndis_version_sysctl, "A", "NDIS version");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_caps_sysctl, "A", "capabilities");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_hwassist_sysctl, "A", "hwassist");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max",
+	    CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt",
+	    CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0,
+	    "max # of TSO segments");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz",
+	    CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0,
+	    "max size of TSO segment");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rxfilter_sysctl, "A", "rxfilter");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rss_hash_sysctl, "A", "RSS hash");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rss_hcap_sysctl, "A", "RSS hash capabilities");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
+	    CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
+	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rss_key_sysctl, "IU", "RSS key");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
+	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_rss_ind_sysctl, "IU", "RSS indirect table");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
+	    CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
+	    "RNDIS offered packet transmission aggregation size limit");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
+	    CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
+	    "RNDIS offered packet transmission aggregation count limit");
+	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
+	    CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
+	    "RNDIS packet transmission aggregation alignment");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_txagg_size_sysctl, "I",
+	    "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_txagg_pkts_sysctl, "I",
+	    "Packet transmission aggregation packets, "
+	    "0 -- disable, -1 -- auto");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
+	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_polling_sysctl, "I",
+	    "Polling frequency: [100,1000000], 0 disable polling");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_vf_sysctl, "A", "Virtual Function's name");
+	if (!hn_xpnt_vf) {
+		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
+		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+		    hn_rxvf_sysctl, "A", "activated Virtual Function's name");
+	} else {
+		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
+		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+		    hn_xpnt_vf_enabled_sysctl, "I",
+		    "Transparent VF enabled");
+		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
+		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+		    hn_xpnt_vf_accbpf_sysctl, "I",
+		    "Accurate BPF for transparent VF");
+	}
+
+	/*
+	 * Setup the ifmedia, which has been initialized earlier.
+	 */
+	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
+	/* XXX ifmedia_set really should do this for us */
+	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
+
+	/*
+	 * Setup the ifnet for this interface.
+	 */
+
+#ifdef __LP64__
+	ifp->if_baudrate = IF_Gbps(10);
+#else
+	/* if_baudrate is 32bits on 32bit system. */
+	ifp->if_baudrate = IF_Gbps(1);
+#endif
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_ioctl = hn_ioctl;
+	ifp->if_init = hn_init;
+#ifdef HN_IFSTART_SUPPORT
+	if (hn_use_if_start) {
+		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
+
+		ifp->if_start = hn_start;
+		IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
+		ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
+		IFQ_SET_READY(&ifp->if_snd);
+	} else
+#endif
+	{
+		ifp->if_transmit = hn_transmit;
+		ifp->if_qflush = hn_xmit_qflush;
+	}
+
+	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE;
+#ifdef foo
+	/* We can't diff IPv6 packets from IPv4 packets on RX path. */
+	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
+#endif
+	if (sc->hn_caps & HN_CAP_VLAN) {
+		/* XXX not sure about VLAN_MTU. */
+		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
+	}
+
+	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
+	if (ifp->if_hwassist & HN_CSUM_IP_MASK)
+		ifp->if_capabilities |= IFCAP_TXCSUM;
+	if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
+		ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
+	if (sc->hn_caps & HN_CAP_TSO4) {
+		ifp->if_capabilities |= IFCAP_TSO4;
+		ifp->if_hwassist |= CSUM_IP_TSO;
+	}
+	if (sc->hn_caps & HN_CAP_TSO6) {
+		ifp->if_capabilities |= IFCAP_TSO6;
+		ifp->if_hwassist |= CSUM_IP6_TSO;
+	}
+
+	/* Enable all available capabilities by default. */
+	ifp->if_capenable = ifp->if_capabilities;
+
+	/*
+	 * Disable IPv6 TSO and TXCSUM by default, they still can
+	 * be enabled through SIOCSIFCAP.
+	 */
+	ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
+	ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
+
+	if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
+		/*
+		 * Lock hn_set_tso_maxsize() to simplify its
+		 * internal logic.
+		 */
+		HN_LOCK(sc);
+		hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
+		HN_UNLOCK(sc);
+		ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
+		ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
+	}
+
+	ether_ifattach(ifp, eaddr);
+
+	if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
+		if_printf(ifp, "TSO segcnt %u segsz %u\n",
+		    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
+	}
+	if (mtu < ETHERMTU) {
+		if_printf(ifp, "fixup mtu %lu -> %u\n", ifp->if_mtu, mtu);
+		ifp->if_mtu = mtu;
+	}
+
+	/* Inform the upper layer about the long frame support. */
+	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
+
+	/*
+	 * Kick off link status check.
+	 */
+	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
+	hn_update_link_status(sc);
+
+	if (!hn_xpnt_vf) {
+		sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
+		    hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
+		sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
+		    hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
+	} else {
+		sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
+		    hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
+	}
+
+	/*
+	 * NOTE:
+	 * Subscribe ether_ifattach event, instead of ifnet_arrival event,
+	 * since interface's LLADDR is needed; interface LLADDR is not
+	 * available when ifnet_arrival event is triggered.
+	 */
+	sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event,
+	    hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY);
+	sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY);
+
+	return (0);
+failed:
+	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
+		hn_synth_detach(sc);
+	hn_detach(dev);
+	return (error);
+}
+
+static int
+hn_detach(device_t dev)
+{
+	struct hn_softc *sc = device_get_softc(dev);
+	struct ifnet *ifp = sc->hn_ifp, *vf_ifp;
+
+	if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
+		/*
+		 * In case that the vmbus missed the orphan handler
+		 * installation.
+		 */
+		vmbus_xact_ctx_orphan(sc->hn_xact);
+	}
+
+	if (sc->hn_ifaddr_evthand != NULL)
+		EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
+	if (sc->hn_ifnet_evthand != NULL)
+		EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
+	if (sc->hn_ifnet_atthand != NULL) {
+		EVENTHANDLER_DEREGISTER(ether_ifattach_event,
+		    sc->hn_ifnet_atthand);
+	}
+	if (sc->hn_ifnet_dethand != NULL) {
+		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+		    sc->hn_ifnet_dethand);
+	}
+	if (sc->hn_ifnet_lnkhand != NULL)
+		EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
+
+	vf_ifp = sc->hn_vf_ifp;
+	__compiler_membar();
+	if (vf_ifp != NULL)
+		hn_ifnet_detevent(sc, vf_ifp);
+
+	if (device_is_attached(dev)) {
+		HN_LOCK(sc);
+		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+				hn_stop(sc, true);
+			/*
+			 * NOTE:
+			 * hn_stop() only suspends data, so managment
+			 * stuffs have to be suspended manually here.
+			 */
+			hn_suspend_mgmt(sc);
+			hn_synth_detach(sc);
+		}
+		HN_UNLOCK(sc);
+		ether_ifdetach(ifp);
+	}
+
+	ifmedia_removeall(&sc->hn_media);
+	hn_destroy_rx_data(sc);
+	hn_destroy_tx_data(sc);
+
+	if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
+		int i;
+
+		for (i = 0; i < hn_tx_taskq_cnt; ++i)
+			taskqueue_free(sc->hn_tx_taskqs[i]);
+		free(sc->hn_tx_taskqs, M_DEVBUF);
+	}
+	taskqueue_free(sc->hn_mgmt_taskq0);
+	if (sc->hn_vf_taskq != NULL)
+		taskqueue_free(sc->hn_vf_taskq);
+
+	if (sc->hn_xact != NULL) {
+		/*
+		 * Uninstall the orphan handler _before_ the xact is
+		 * destructed.
+		 */
+		vmbus_chan_unset_orphan(sc->hn_prichan);
+		vmbus_xact_ctx_destroy(sc->hn_xact);
+	}
+
+	if_free(ifp);
+
+	HN_LOCK_DESTROY(sc);
+	rm_destroy(&sc->hn_vf_lock);
+	return (0);
+}
+
+static int
+hn_shutdown(device_t dev)
+{
+
+	return (0);
+}
+
+static void
+hn_link_status(struct hn_softc *sc)
+{
+	uint32_t link_status;
+	int error;
+
+	error = hn_rndis_get_linkstatus(sc, &link_status);
+	if (error) {
+		/* XXX what to do? */
+		return;
+	}
+
+	if (link_status == NDIS_MEDIA_STATE_CONNECTED)
+		sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
+	else
+		sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
+	if_link_state_change(sc->hn_ifp,
+	    (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
+	    LINK_STATE_UP : LINK_STATE_DOWN);
+}
+
+static void
+hn_link_taskfunc(void *xsc, int pending __unused)
+{
+	struct hn_softc *sc = xsc;
+
+	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
+		return;
+	hn_link_status(sc);
+}
+
+static void
+hn_netchg_init_taskfunc(void *xsc, int pending __unused)
+{
+	struct hn_softc *sc = xsc;
+
+	/* Prevent any link status checks from running. */
+	sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
+
+	/*
+	 * Fake up a [link down --> link up] state change; 5 seconds
+	 * delay is used, which closely simulates miibus reaction
+	 * upon link down event.
+	 */
+	sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
+	if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
+	taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
+	    &sc->hn_netchg_status, 5 * hz);
+}
+
+static void
+hn_netchg_status_taskfunc(void *xsc, int pending __unused)
+{
+	struct hn_softc *sc = xsc;
+
+	/* Re-allow link status checks. */
+	sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
+	hn_link_status(sc);
+}
+
+static void
+hn_update_link_status(struct hn_softc *sc)
+{
+
+	if (sc->hn_mgmt_taskq != NULL)
+		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
+}
+
+static void
+hn_change_network(struct hn_softc *sc)
+{
+
+	if (sc->hn_mgmt_taskq != NULL)
+		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
+}
+
+static __inline int
+hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
+    struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
+{
+	struct mbuf *m = *m_head;
+	int error;
+
+	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
+
+	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
+	    m, segs, nsegs, BUS_DMA_NOWAIT);
+	if (error == EFBIG) {
+		struct mbuf *m_new;
+
+		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
+		if (m_new == NULL)
+			return ENOBUFS;
+		else
+			*m_head = m = m_new;
+		txr->hn_tx_collapsed++;
+
+		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
+		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
+	}
+	if (!error) {
+		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
+		    BUS_DMASYNC_PREWRITE);
+		txd->flags |= HN_TXD_FLAG_DMAMAP;
+	}
+	return error;
+}
+
+static __inline int
+hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+
+	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
+	    ("put an onlist txd %#x", txd->flags));
+	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+	    ("put an onagg txd %#x", txd->flags));
+
+	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
+	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
+		return 0;
+
+	if (!STAILQ_EMPTY(&txd->agg_list)) {
+		struct hn_txdesc *tmp_txd;
+
+		while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
+			int freed;
+
+			KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
+			    ("resursive aggregation on aggregated txdesc"));
+			KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
+			    ("not aggregated txdesc"));
+			KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
+			    ("aggregated txdesc uses dmamap"));
+			KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
+			    ("aggregated txdesc consumes "
+			     "chimney sending buffer"));
+			KASSERT(tmp_txd->chim_size == 0,
+			    ("aggregated txdesc has non-zero "
+			     "chimney sending size"));
+
+			STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
+			tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
+			freed = hn_txdesc_put(txr, tmp_txd);
+			KASSERT(freed, ("failed to free aggregated txdesc"));
+		}
+	}
+
+	if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
+		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
+		    ("chim txd uses dmamap"));
+		hn_chim_free(txr->hn_sc, txd->chim_index);
+		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+		txd->chim_size = 0;
+	} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
+		bus_dmamap_sync(txr->hn_tx_data_dtag,
+		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
+		bus_dmamap_unload(txr->hn_tx_data_dtag,
+		    txd->data_dmap);
+		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
+	}
+
+	if (txd->m != NULL) {
+		m_freem(txd->m);
+		txd->m = NULL;
+	}
+
+	txd->flags |= HN_TXD_FLAG_ONLIST;
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	KASSERT(txr->hn_txdesc_avail >= 0 &&
+	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
+	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
+	txr->hn_txdesc_avail++;
+	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+	mtx_unlock_spin(&txr->hn_txlist_spin);
+#else	/* HN_USE_TXDESC_BUFRING */
+#ifdef HN_DEBUG
+	atomic_add_int(&txr->hn_txdesc_avail, 1);
+#endif
+	buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif	/* !HN_USE_TXDESC_BUFRING */
+
+	return 1;
+}
+
+static __inline struct hn_txdesc *
+hn_txdesc_get(struct hn_tx_ring *txr)
+{
+	struct hn_txdesc *txd;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	txd = SLIST_FIRST(&txr->hn_txlist);
+	if (txd != NULL) {
+		KASSERT(txr->hn_txdesc_avail > 0,
+		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
+		txr->hn_txdesc_avail--;
+		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
+	}
+	mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
+#endif
+
+	if (txd != NULL) {
+#ifdef HN_USE_TXDESC_BUFRING
+#ifdef HN_DEBUG
+		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
+#endif
+#endif	/* HN_USE_TXDESC_BUFRING */
+		KASSERT(txd->m == NULL && txd->refs == 0 &&
+		    STAILQ_EMPTY(&txd->agg_list) &&
+		    txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
+		    txd->chim_size == 0 &&
+		    (txd->flags & HN_TXD_FLAG_ONLIST) &&
+		    (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
+		    (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
+		txd->flags &= ~HN_TXD_FLAG_ONLIST;
+		txd->refs = 1;
+	}
+	return txd;
+}
+
+static __inline void
+hn_txdesc_hold(struct hn_txdesc *txd)
+{
+
+	/* 0->1 transition will never work */
+	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
+	atomic_add_int(&txd->refs, 1);
+}
+
+static __inline void
+hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
+{
+
+	KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+	    ("recursive aggregation on aggregating txdesc"));
+
+	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+	    ("already aggregated"));
+	KASSERT(STAILQ_EMPTY(&txd->agg_list),
+	    ("recursive aggregation on to-be-aggregated txdesc"));
+
+	txd->flags |= HN_TXD_FLAG_ONAGG;
+	STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
+}
+
+static bool
+hn_tx_ring_pending(struct hn_tx_ring *txr)
+{
+	bool pending = false;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
+		pending = true;
+	mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+	if (!buf_ring_full(txr->hn_txdesc_br))
+		pending = true;
+#endif
+	return (pending);
+}
+
+static __inline void
+hn_txeof(struct hn_tx_ring *txr)
+{
+	txr->hn_has_txeof = 0;
+	txr->hn_txeof(txr);
+}
+
+static void
+hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
+    struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
+{
+	struct hn_txdesc *txd = sndc->hn_cbarg;
+	struct hn_tx_ring *txr;
+
+	txr = txd->txr;
+	KASSERT(txr->hn_chan == chan,
+	    ("channel mismatch, on chan%u, should be chan%u",
+	     vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
+
+	txr->hn_has_txeof = 1;
+	hn_txdesc_put(txr, txd);
+
+	++txr->hn_txdone_cnt;
+	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
+		txr->hn_txdone_cnt = 0;
+		if (txr->hn_oactive)
+			hn_txeof(txr);
+	}
+}
+
+static void
+hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
+{
+#if defined(INET) || defined(INET6)
+	struct lro_ctrl *lro = &rxr->hn_lro;
+	struct lro_entry *queued;
+
+	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
+		SLIST_REMOVE_HEAD(&lro->lro_active, next);
+		tcp_lro_flush(lro, queued);
+	}
+#endif
+
+	/*
+	 * NOTE:
+	 * 'txr' could be NULL, if multiple channels and
+	 * ifnet.if_start method are enabled.
+	 */
+	if (txr == NULL || !txr->hn_has_txeof)
+		return;
+
+	txr->hn_txdone_cnt = 0;
+	hn_txeof(txr);
+}
+
+static __inline uint32_t
+hn_rndis_pktmsg_offset(uint32_t ofs)
+{
+
+	KASSERT(ofs >= sizeof(struct rndis_packet_msg),
+	    ("invalid RNDIS packet msg offset %u", ofs));
+	return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
+}
+
+static __inline void *
+hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
+    size_t pi_dlen, uint32_t pi_type)
+{
+	const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
+	struct rndis_pktinfo *pi;
+
+	KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
+	    ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
+
+	/*
+	 * Per-packet-info does not move; it only grows.
+	 *
+	 * NOTE:
+	 * rm_pktinfooffset in this phase counts from the beginning
+	 * of rndis_packet_msg.
+	 */
+	KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
+	    ("%u pktinfo overflows RNDIS packet msg", pi_type));
+	pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
+	    pkt->rm_pktinfolen);
+	pkt->rm_pktinfolen += pi_size;
+
+	pi->rm_size = pi_size;
+	pi->rm_type = pi_type;
+	pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
+
+	return (pi->rm_data);
+}
+
+static __inline int
+hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
+{
+	struct hn_txdesc *txd;
+	struct mbuf *m;
+	int error, pkts;
+
+	txd = txr->hn_agg_txd;
+	KASSERT(txd != NULL, ("no aggregate txdesc"));
+
+	/*
+	 * Since hn_txpkt() will reset this temporary stat, save
+	 * it now, so that oerrors can be updated properly, if
+	 * hn_txpkt() ever fails.
+	 */
+	pkts = txr->hn_stat_pkts;
+
+	/*
+	 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
+	 * failure, save it for later freeing, if hn_txpkt() ever
+	 * fails.
+	 */
+	m = txd->m;
+	error = hn_txpkt(ifp, txr, txd);
+	if (__predict_false(error)) {
+		/* txd is freed, but m is not. */
+		m_freem(m);
+
+		txr->hn_flush_failed++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
+	}
+
+	/* Reset all aggregation states. */
+	txr->hn_agg_txd = NULL;
+	txr->hn_agg_szleft = 0;
+	txr->hn_agg_pktleft = 0;
+	txr->hn_agg_prevpkt = NULL;
+
+	return (error);
+}
+
+static void *
+hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+    int pktsize)
+{
+	void *chim;
+
+	if (txr->hn_agg_txd != NULL) {
+		if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
+			struct hn_txdesc *agg_txd = txr->hn_agg_txd;
+			struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
+			int olen;
+
+			/*
+			 * Update the previous RNDIS packet's total length,
+			 * it can be increased due to the mandatory alignment
+			 * padding for this RNDIS packet.  And update the
+			 * aggregating txdesc's chimney sending buffer size
+			 * accordingly.
+			 *
+			 * XXX
+			 * Zero-out the padding, as required by the RNDIS spec.
+			 */
+			olen = pkt->rm_len;
+			pkt->rm_len = roundup2(olen, txr->hn_agg_align);
+			agg_txd->chim_size += pkt->rm_len - olen;
+
+			/* Link this txdesc to the parent. */
+			hn_txdesc_agg(agg_txd, txd);
+
+			chim = (uint8_t *)pkt + pkt->rm_len;
+			/* Save the current packet for later fixup. */
+			txr->hn_agg_prevpkt = chim;
+
+			txr->hn_agg_pktleft--;
+			txr->hn_agg_szleft -= pktsize;
+			if (txr->hn_agg_szleft <=
+			    HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+				/*
+				 * Probably can't aggregate more packets,
+				 * flush this aggregating txdesc proactively.
+				 */
+				txr->hn_agg_pktleft = 0;
+			}
+			/* Done! */
+			return (chim);
+		}
+		hn_flush_txagg(ifp, txr);
+	}
+	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
+
+	txr->hn_tx_chimney_tried++;
+	txd->chim_index = hn_chim_alloc(txr->hn_sc);
+	if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
+		return (NULL);
+	txr->hn_tx_chimney++;
+
+	chim = txr->hn_sc->hn_chim +
+	    (txd->chim_index * txr->hn_sc->hn_chim_szmax);
+
+	if (txr->hn_agg_pktmax > 1 &&
+	    txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+		txr->hn_agg_txd = txd;
+		txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
+		txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
+		txr->hn_agg_prevpkt = chim;
+	}
+	return (chim);
+}
+
+/*
+ * NOTE:
+ * If this function fails, then both txd and m_head0 will be freed.
+ */
+static int
+hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+    struct mbuf **m_head0)
+{
+	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
+	int error, nsegs, i;
+	struct mbuf *m_head = *m_head0;
+	struct rndis_packet_msg *pkt;
+	uint32_t *pi_data;
+	void *chim = NULL;
+	int pkt_hlen, pkt_size;
+
+	pkt = txd->rndis_pkt;
+	pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
+	if (pkt_size < txr->hn_chim_size) {
+		chim = hn_try_txagg(ifp, txr, txd, pkt_size);
+		if (chim != NULL)
+			pkt = chim;
+	} else {
+		if (txr->hn_agg_txd != NULL)
+			hn_flush_txagg(ifp, txr);
+	}
+
+	pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
+	pkt->rm_len = m_head->m_pkthdr.len;
+	pkt->rm_dataoffset = 0;
+	pkt->rm_datalen = m_head->m_pkthdr.len;
+	pkt->rm_oobdataoffset = 0;
+	pkt->rm_oobdatalen = 0;
+	pkt->rm_oobdataelements = 0;
+	pkt->rm_pktinfooffset = sizeof(*pkt);
+	pkt->rm_pktinfolen = 0;
+	pkt->rm_vchandle = 0;
+	pkt->rm_reserved = 0;
+
+	if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
+		/*
+		 * Set the hash value for this packet, so that the host could
+		 * dispatch the TX done event for this packet back to this TX
+		 * ring's channel.
+		 */
+		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
+		    HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
+		*pi_data = txr->hn_tx_idx;
+	}
+
+	if (m_head->m_flags & M_VLANTAG) {
+		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
+		    NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
+		*pi_data = NDIS_VLAN_INFO_MAKE(
+		    EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
+		    EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
+		    EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
+	}
+
+	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+#if defined(INET6) || defined(INET)
+		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
+		    NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
+#ifdef INET
+		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
+			*pi_data = NDIS_LSO2_INFO_MAKEIPV4(
+			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
+			    m_head->m_pkthdr.tso_segsz);
+		}
+#endif
+#if defined(INET6) && defined(INET)
+		else
+#endif
+#ifdef INET6
+		{
+			*pi_data = NDIS_LSO2_INFO_MAKEIPV6(
+			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
+			    m_head->m_pkthdr.tso_segsz);
+		}
+#endif
+#endif	/* INET6 || INET */
+	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
+		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
+		    NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
+		if (m_head->m_pkthdr.csum_flags &
+		    (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
+			*pi_data = NDIS_TXCSUM_INFO_IPV6;
+		} else {
+			*pi_data = NDIS_TXCSUM_INFO_IPV4;
+			if (m_head->m_pkthdr.csum_flags & CSUM_IP)
+				*pi_data |= NDIS_TXCSUM_INFO_IPCS;
+		}
+
+		if (m_head->m_pkthdr.csum_flags &
+		    (CSUM_IP_TCP | CSUM_IP6_TCP)) {
+			*pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(
+			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
+		} else if (m_head->m_pkthdr.csum_flags &
+		    (CSUM_IP_UDP | CSUM_IP6_UDP)) {
+			*pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(
+			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
+		}
+	}
+
+	pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
+	/* Fixup RNDIS packet message total length */
+	pkt->rm_len += pkt_hlen;
+	/* Convert RNDIS packet message offsets */
+	pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
+	pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
+
+	/*
+	 * Fast path: Chimney sending.
+	 */
+	if (chim != NULL) {
+		struct hn_txdesc *tgt_txd = txd;
+
+		if (txr->hn_agg_txd != NULL) {
+			tgt_txd = txr->hn_agg_txd;
+#ifdef INVARIANTS
+			*m_head0 = NULL;
+#endif
+		}
+
+		KASSERT(pkt == chim,
+		    ("RNDIS pkt not in chimney sending buffer"));
+		KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
+		    ("chimney sending buffer is not used"));
+		tgt_txd->chim_size += pkt->rm_len;
+
+		m_copydata(m_head, 0, m_head->m_pkthdr.len,
+		    ((uint8_t *)chim) + pkt_hlen);
+
+		txr->hn_gpa_cnt = 0;
+		txr->hn_sendpkt = hn_txpkt_chim;
+		goto done;
+	}
+
+	KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
+	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
+	    ("chimney buffer is used"));
+	KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
+
+	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
+	if (__predict_false(error)) {
+		int freed;
+
+		/*
+		 * This mbuf is not linked w/ the txd yet, so free it now.
+		 */
+		m_freem(m_head);
+		*m_head0 = NULL;
+
+		freed = hn_txdesc_put(txr, txd);
+		KASSERT(freed != 0,
+		    ("fail to free txd upon txdma error"));
+
+		txr->hn_txdma_failed++;
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return error;
+	}
+	*m_head0 = m_head;
+
+	/* +1 RNDIS packet message */
+	txr->hn_gpa_cnt = nsegs + 1;
+
+	/* send packet with page buffer */
+	txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
+	txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
+	txr->hn_gpa[0].gpa_len = pkt_hlen;
+
+	/*
+	 * Fill the page buffers with mbuf info after the page
+	 * buffer for RNDIS packet message.
+	 */
+	for (i = 0; i < nsegs; ++i) {
+		struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
+
+		gpa->gpa_page = atop(segs[i].ds_addr);
+		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
+		gpa->gpa_len = segs[i].ds_len;
+	}
+
+	txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+	txd->chim_size = 0;
+	txr->hn_sendpkt = hn_txpkt_sglist;
+done:
+	txd->m = m_head;
+
+	/* Set the completion routine */
+	hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
+
+	/* Update temporary stats for later use. */
+	txr->hn_stat_pkts++;
+	txr->hn_stat_size += m_head->m_pkthdr.len;
+	if (m_head->m_flags & M_MCAST)
+		txr->hn_stat_mcasts++;
+
+	return 0;
+}
+
+/*
+ * NOTE:
+ * If this function fails, then txd will be freed, but the mbuf
+ * associated w/ the txd will _not_ be freed.
+ */
+static int
+hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+	int error, send_failed = 0, has_bpf;
+
+again:
+	has_bpf = bpf_peers_present(ifp->if_bpf);
+	if (has_bpf) {
+		/*
+		 * Make sure that this txd and any aggregated txds are not
+		 * freed before ETHER_BPF_MTAP.
+		 */
+		hn_txdesc_hold(txd);
+	}
+	error = txr->hn_sendpkt(txr, txd);
+	if (!error) {
+		if (has_bpf) {
+			const struct hn_txdesc *tmp_txd;
+
+			ETHER_BPF_MTAP(ifp, txd->m);
+			STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
+				ETHER_BPF_MTAP(ifp, tmp_txd->m);
+		}
+
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
+#ifdef HN_IFSTART_SUPPORT
+		if (!hn_use_if_start)
+#endif
+		{
+			if_inc_counter(ifp, IFCOUNTER_OBYTES,
+			    txr->hn_stat_size);
+			if (txr->hn_stat_mcasts != 0) {
+				if_inc_counter(ifp, IFCOUNTER_OMCASTS,
+				    txr->hn_stat_mcasts);
+			}
+		}
+		txr->hn_pkts += txr->hn_stat_pkts;
+		txr->hn_sends++;
+	}
+	if (has_bpf)
+		hn_txdesc_put(txr, txd);
+
+	if (__predict_false(error)) {
+		int freed;
+
+		/*
+		 * This should "really rarely" happen.
+		 *
+		 * XXX Too many RX to be acked or too many sideband
+		 * commands to run?  Ask netvsc_channel_rollup()
+		 * to kick start later.
+		 */
+		txr->hn_has_txeof = 1;
+		if (!send_failed) {
+			txr->hn_send_failed++;
+			send_failed = 1;
+			/*
+			 * Try sending again after set hn_has_txeof;
+			 * in case that we missed the last
+			 * netvsc_channel_rollup().
+			 */
+			goto again;
+		}
+		if_printf(ifp, "send failed\n");
+
+		/*
+		 * Caller will perform further processing on the
+		 * associated mbuf, so don't free it in hn_txdesc_put();
+		 * only unload it from the DMA map in hn_txdesc_put(),
+		 * if it was loaded.
+		 */
+		txd->m = NULL;
+		freed = hn_txdesc_put(txr, txd);
+		KASSERT(freed != 0,
+		    ("fail to free txd upon send error"));
+
+		txr->hn_send_failed++;
+	}
+
+	/* Reset temporary stats, after this sending is done. */
+	txr->hn_stat_size = 0;
+	txr->hn_stat_pkts = 0;
+	txr->hn_stat_mcasts = 0;
+
+	return (error);
+}
+
+/*
+ * Append the specified data to the indicated mbuf chain,
+ * Extend the mbuf chain if the new data does not fit in
+ * existing space.
+ *
+ * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
+ * There should be an equivalent in the kernel mbuf code,
+ * but there does not appear to be one yet.
+ *
+ * Differs from m_append() in that additional mbufs are
+ * allocated with cluster size MJUMPAGESIZE, and filled
+ * accordingly.
+ *
+ * Return 1 if able to complete the job; otherwise 0.
+ */
+static int
+hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
+{
+	struct mbuf *m, *n;
+	int remainder, space;
+
+	for (m = m0; m->m_next != NULL; m = m->m_next)
+		;
+	remainder = len;
+	space = M_TRAILINGSPACE(m);
+	if (space > 0) {
+		/*
+		 * Copy into available space.
+		 */
+		if (space > remainder)
+			space = remainder;
+		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
+		m->m_len += space;
+		cp += space;
+		remainder -= space;
+	}
+	while (remainder > 0) {
+		/*
+		 * Allocate a new mbuf; could check space
+		 * and allocate a cluster instead.
+		 */
+		n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
+		if (n == NULL)
+			break;
+		n->m_len = min(MJUMPAGESIZE, remainder);
+		bcopy(cp, mtod(n, caddr_t), n->m_len);
+		cp += n->m_len;
+		remainder -= n->m_len;
+		m->m_next = n;
+		m = n;
+	}
+	if (m0->m_flags & M_PKTHDR)
+		m0->m_pkthdr.len += len - remainder;
+
+	return (remainder == 0);
+}
+
+#if defined(INET) || defined(INET6)
+static __inline int
+hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
+{
+#if __FreeBSD_version >= 1100095
+	if (hn_lro_mbufq_depth) {
+		tcp_lro_queue_mbuf(lc, m);
+		return 0;
+	}
+#endif
+	return tcp_lro_rx(lc, m, 0);
+}
+#endif
+
+static int
+hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
+    const struct hn_rxinfo *info)
+{
+	struct ifnet *ifp, *hn_ifp = rxr->hn_ifp;
+	struct mbuf *m_new;
+	int size, do_lro = 0, do_csum = 1, is_vf = 0;
+	int hash_type = M_HASHTYPE_NONE;
+	int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
+
+	ifp = hn_ifp;
+	if (rxr->hn_rxvf_ifp != NULL) {
+		/*
+		 * Non-transparent mode VF; pretend this packet is from
+		 * the VF.
+		 */
+		ifp = rxr->hn_rxvf_ifp;
+		is_vf = 1;
+	} else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) {
+		/* Transparent mode VF. */
+		is_vf = 1;
+	}
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		/*
+		 * NOTE:
+		 * See the NOTE of hn_rndis_init_fixat().  This
+		 * function can be reached, immediately after the
+		 * RNDIS is initialized but before the ifnet is
+		 * setup on the hn_attach() path; drop the unexpected
+		 * packets.
+		 */
+		return (0);
+	}
+
+	if (__predict_false(dlen < ETHER_HDR_LEN)) {
+		if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
+		return (0);
+	}
+
+	if (dlen <= MHLEN) {
+		m_new = m_gethdr(M_NOWAIT, MT_DATA);
+		if (m_new == NULL) {
+			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
+			return (0);
+		}
+		memcpy(mtod(m_new, void *), data, dlen);
+		m_new->m_pkthdr.len = m_new->m_len = dlen;
+		rxr->hn_small_pkts++;
+	} else {
+		/*
+		 * Get an mbuf with a cluster.  For packets 2K or less,
+		 * get a standard 2K cluster.  For anything larger, get a
+		 * 4K cluster.  Any buffers larger than 4K can cause problems
+		 * if looped around to the Hyper-V TX channel, so avoid them.
+		 */
+		size = MCLBYTES;
+		if (dlen > MCLBYTES) {
+			/* 4096 */
+			size = MJUMPAGESIZE;
+		}
+
+		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
+		if (m_new == NULL) {
+			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
+			return (0);
+		}
+
+		hv_m_append(m_new, dlen, data);
+	}
+	m_new->m_pkthdr.rcvif = ifp;
+
+	if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0))
+		do_csum = 0;
+
+	/* receive side checksum offload */
+	if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
+		/* IP csum offload */
+		if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
+			m_new->m_pkthdr.csum_flags |=
+			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
+			rxr->hn_csum_ip++;
+		}
+
+		/* TCP/UDP csum offload */
+		if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
+		     NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
+			m_new->m_pkthdr.csum_flags |=
+			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m_new->m_pkthdr.csum_data = 0xffff;
+			if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
+				rxr->hn_csum_tcp++;
+			else
+				rxr->hn_csum_udp++;
+		}
+
+		/*
+		 * XXX
+		 * As of this write (Oct 28th, 2016), host side will turn
+		 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
+		 * the do_lro setting here is actually _not_ accurate.  We
+		 * depend on the RSS hash type check to reset do_lro.
+		 */
+		if ((info->csum_info &
+		     (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
+		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
+			do_lro = 1;
+	} else {
+		hn_rxpkt_proto(m_new, &l3proto, &l4proto);
+		if (l3proto == ETHERTYPE_IP) {
+			if (l4proto == IPPROTO_TCP) {
+				if (do_csum &&
+				    (rxr->hn_trust_hcsum &
+				     HN_TRUST_HCSUM_TCP)) {
+					rxr->hn_csum_trusted++;
+					m_new->m_pkthdr.csum_flags |=
+					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
+					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+					m_new->m_pkthdr.csum_data = 0xffff;
+				}
+				do_lro = 1;
+			} else if (l4proto == IPPROTO_UDP) {
+				if (do_csum &&
+				    (rxr->hn_trust_hcsum &
+				     HN_TRUST_HCSUM_UDP)) {
+					rxr->hn_csum_trusted++;
+					m_new->m_pkthdr.csum_flags |=
+					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
+					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+					m_new->m_pkthdr.csum_data = 0xffff;
+				}
+			} else if (l4proto != IPPROTO_DONE && do_csum &&
+			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
+				rxr->hn_csum_trusted++;
+				m_new->m_pkthdr.csum_flags |=
+				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
+			}
+		}
+	}
+
+	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
+		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
+		    NDIS_VLAN_INFO_ID(info->vlan_info),
+		    NDIS_VLAN_INFO_PRI(info->vlan_info),
+		    NDIS_VLAN_INFO_CFI(info->vlan_info));
+		m_new->m_flags |= M_VLANTAG;
+	}
+
+	/*
+	 * If VF is activated (tranparent/non-transparent mode does not
+	 * matter here).
+	 *
+	 * - Disable LRO
+	 *
+	 *   hn(4) will only receive broadcast packets, multicast packets,
+	 *   TCP SYN and SYN|ACK (in Azure), LRO is useless for these
+	 *   packet types.
+	 *
+	 *   For non-transparent, we definitely _cannot_ enable LRO at
+	 *   all, since the LRO flush will use hn(4) as the receiving
+	 *   interface; i.e. hn_ifp->if_input(hn_ifp, m).
+	 */
+	if (is_vf)
+		do_lro = 0;
+
+	/*
+	 * If VF is activated (tranparent/non-transparent mode does not
+	 * matter here), do _not_ mess with unsupported hash types or
+	 * functions.
+	 */
+	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
+		rxr->hn_rss_pkts++;
+		m_new->m_pkthdr.flowid = info->hash_value;
+		if (!is_vf)
+			hash_type = M_HASHTYPE_OPAQUE;
+		if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
+		    NDIS_HASH_FUNCTION_TOEPLITZ) {
+			uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK &
+			    rxr->hn_mbuf_hash);
+
+			/*
+			 * NOTE:
+			 * do_lro is resetted, if the hash types are not TCP
+			 * related.  See the comment in the above csum_flags
+			 * setup section.
+			 */
+			switch (type) {
+			case NDIS_HASH_IPV4:
+				hash_type = M_HASHTYPE_RSS_IPV4;
+				do_lro = 0;
+				break;
+
+			case NDIS_HASH_TCP_IPV4:
+				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
+				if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) {
+					int def_htype = M_HASHTYPE_OPAQUE;
+
+					if (is_vf)
+						def_htype = M_HASHTYPE_NONE;
+
+					/*
+					 * UDP 4-tuple hash is delivered as
+					 * TCP 4-tuple hash.
+					 */
+					if (l3proto == ETHERTYPE_MAX) {
+						hn_rxpkt_proto(m_new,
+						    &l3proto, &l4proto);
+					}
+					if (l3proto == ETHERTYPE_IP) {
+						if (l4proto == IPPROTO_UDP &&
+						    (rxr->hn_mbuf_hash &
+						     NDIS_HASH_UDP_IPV4_X)) {
+							hash_type =
+							M_HASHTYPE_RSS_UDP_IPV4;
+							do_lro = 0;
+						} else if (l4proto !=
+						    IPPROTO_TCP) {
+							hash_type = def_htype;
+							do_lro = 0;
+						}
+					} else {
+						hash_type = def_htype;
+						do_lro = 0;
+					}
+				}
+				break;
+
+			case NDIS_HASH_IPV6:
+				hash_type = M_HASHTYPE_RSS_IPV6;
+				do_lro = 0;
+				break;
+
+			case NDIS_HASH_IPV6_EX:
+				hash_type = M_HASHTYPE_RSS_IPV6_EX;
+				do_lro = 0;
+				break;
+
+			case NDIS_HASH_TCP_IPV6:
+				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
+				break;
+
+			case NDIS_HASH_TCP_IPV6_EX:
+				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
+				break;
+			}
+		}
+	} else if (!is_vf) {
+		m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
+	}
+	M_HASHTYPE_SET(m_new, hash_type);
+
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if (hn_ifp != ifp) {
+		const struct ether_header *eh;
+
+		/*
+		 * Non-transparent mode VF is activated.
+		 */
+
+		/*
+		 * Allow tapping on hn(4).
+		 */
+		ETHER_BPF_MTAP(hn_ifp, m_new);
+
+		/*
+		 * Update hn(4)'s stats.
+		 */
+		if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
+		if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len);
+		/* Checked at the beginning of this function. */
+		KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame"));
+		eh = mtod(m_new, struct ether_header *);
+		if (ETHER_IS_MULTICAST(eh->ether_dhost))
+			if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1);
+	}
+	rxr->hn_pkts++;
+
+	if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) {
+#if defined(INET) || defined(INET6)
+		struct lro_ctrl *lro = &rxr->hn_lro;
+
+		if (lro->lro_cnt) {
+			rxr->hn_lro_tried++;
+			if (hn_lro_rx(lro, m_new) == 0) {
+				/* DONE! */
+				return 0;
+			}
+		}
+#endif
+	}
+	ifp->if_input(ifp, m_new);
+
+	return (0);
+}
+
+static int
+hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct hn_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
+	struct ifnet *vf_ifp;
+	int mask, error = 0;
+	struct ifrsskey *ifrk;
+	struct ifrsshash *ifrh;
+	uint32_t mtu;
+
+	switch (cmd) {
+	case SIOCSIFMTU:
+		if (ifr->ifr_mtu > HN_MTU_MAX) {
+			error = EINVAL;
+			break;
+		}
+
+		HN_LOCK(sc);
+
+		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
+			HN_UNLOCK(sc);
+			break;
+		}
+
+		if ((sc->hn_caps & HN_CAP_MTU) == 0) {
+			/* Can't change MTU */
+			HN_UNLOCK(sc);
+			error = EOPNOTSUPP;
+			break;
+		}
+
+		if (ifp->if_mtu == ifr->ifr_mtu) {
+			HN_UNLOCK(sc);
+			break;
+		}
+
+		if (hn_xpnt_vf_isready(sc)) {
+			vf_ifp = sc->hn_vf_ifp;
+			ifr_vf = *ifr;
+			strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname,
+			    sizeof(ifr_vf.ifr_name));
+			error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU,
+			    (caddr_t)&ifr_vf);
+			if (error) {
+				HN_UNLOCK(sc);
+				if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
+				    vf_ifp->if_xname, ifr->ifr_mtu, error);
+				break;
+			}
+		}
+
+		/*
+		 * Suspend this interface before the synthetic parts
+		 * are ripped.
+		 */
+		hn_suspend(sc);
+
+		/*
+		 * Detach the synthetics parts, i.e. NVS and RNDIS.
+		 */
+		hn_synth_detach(sc);
+
+		/*
+		 * Reattach the synthetic parts, i.e. NVS and RNDIS,
+		 * with the new MTU setting.
+		 */
+		error = hn_synth_attach(sc, ifr->ifr_mtu);
+		if (error) {
+			HN_UNLOCK(sc);
+			break;
+		}
+
+		error = hn_rndis_get_mtu(sc, &mtu);
+		if (error)
+			mtu = ifr->ifr_mtu;
+		else if (bootverbose)
+			if_printf(ifp, "RNDIS mtu %u\n", mtu);
+
+		/*
+		 * Commit the requested MTU, after the synthetic parts
+		 * have been successfully attached.
+		 */
+		if (mtu >= ifr->ifr_mtu) {
+			mtu = ifr->ifr_mtu;
+		} else {
+			if_printf(ifp, "fixup mtu %d -> %u\n",
+			    ifr->ifr_mtu, mtu);
+		}
+		ifp->if_mtu = mtu;
+
+		/*
+		 * Synthetic parts' reattach may change the chimney
+		 * sending size; update it.
+		 */
+		if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
+			hn_set_chim_size(sc, sc->hn_chim_szmax);
+
+		/*
+		 * Make sure that various parameters based on MTU are
+		 * still valid, after the MTU change.
+		 */
+		hn_mtu_change_fixup(sc);
+
+		/*
+		 * All done!  Resume the interface now.
+		 */
+		hn_resume(sc);
+
+		if ((sc->hn_flags & HN_FLAG_RXVF) ||
+		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
+			/*
+			 * Since we have reattached the NVS part,
+			 * change the datapath to VF again; in case
+			 * that it is lost, after the NVS was detached.
+			 */
+			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+		}
+
+		HN_UNLOCK(sc);
+		break;
+
+	case SIOCSIFFLAGS:
+		HN_LOCK(sc);
+
+		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
+			HN_UNLOCK(sc);
+			break;
+		}
+
+		if (hn_xpnt_vf_isready(sc))
+			hn_xpnt_vf_saveifflags(sc);
+
+		if (ifp->if_flags & IFF_UP) {
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+				/*
+				 * Caller meight hold mutex, e.g.
+				 * bpf; use busy-wait for the RNDIS
+				 * reply.
+				 */
+				HN_NO_SLEEPING(sc);
+				hn_rxfilter_config(sc);
+				HN_SLEEPING_OK(sc);
+
+				if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+					error = hn_xpnt_vf_iocsetflags(sc);
+			} else {
+				hn_init_locked(sc);
+			}
+		} else {
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+				hn_stop(sc, false);
+		}
+		sc->hn_if_flags = ifp->if_flags;
+
+		HN_UNLOCK(sc);
+		break;
+
+	case SIOCSIFCAP:
+		HN_LOCK(sc);
+
+		if (hn_xpnt_vf_isready(sc)) {
+			ifr_vf = *ifr;
+			strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname,
+			    sizeof(ifr_vf.ifr_name));
+			error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
+			HN_UNLOCK(sc);
+			break;
+		}
+
+		/*
+		 * Fix up requested capabilities w/ supported capabilities,
+		 * since the supported capabilities could have been changed.
+		 */
+		mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^
+		    ifp->if_capenable;
+
+		if (mask & IFCAP_TXCSUM) {
+			ifp->if_capenable ^= IFCAP_TXCSUM;
+			if (ifp->if_capenable & IFCAP_TXCSUM)
+				ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
+			else
+				ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
+		}
+		if (mask & IFCAP_TXCSUM_IPV6) {
+			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+			if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+				ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
+			else
+				ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
+		}
+
+		/* TODO: flip RNDIS offload parameters for RXCSUM. */
+		if (mask & IFCAP_RXCSUM)
+			ifp->if_capenable ^= IFCAP_RXCSUM;
+#ifdef foo
+		/* We can't diff IPv6 packets from IPv4 packets on RX path. */
+		if (mask & IFCAP_RXCSUM_IPV6)
+			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+#endif
+
+		if (mask & IFCAP_LRO)
+			ifp->if_capenable ^= IFCAP_LRO;
+
+		if (mask & IFCAP_TSO4) {
+			ifp->if_capenable ^= IFCAP_TSO4;
+			if (ifp->if_capenable & IFCAP_TSO4)
+				ifp->if_hwassist |= CSUM_IP_TSO;
+			else
+				ifp->if_hwassist &= ~CSUM_IP_TSO;
+		}
+		if (mask & IFCAP_TSO6) {
+			ifp->if_capenable ^= IFCAP_TSO6;
+			if (ifp->if_capenable & IFCAP_TSO6)
+				ifp->if_hwassist |= CSUM_IP6_TSO;
+			else
+				ifp->if_hwassist &= ~CSUM_IP6_TSO;
+		}
+
+		HN_UNLOCK(sc);
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		HN_LOCK(sc);
+
+		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
+			HN_UNLOCK(sc);
+			break;
+		}
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+			/*
+			 * Multicast uses mutex; use busy-wait for
+			 * the RNDIS reply.
+			 */
+			HN_NO_SLEEPING(sc);
+			hn_rxfilter_config(sc);
+			HN_SLEEPING_OK(sc);
+		}
+
+		/* XXX vlan(4) style mcast addr maintenance */
+		if (hn_xpnt_vf_isready(sc)) {
+			int old_if_flags;
+
+			old_if_flags = sc->hn_vf_ifp->if_flags;
+			hn_xpnt_vf_saveifflags(sc);
+
+			if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
+			    ((old_if_flags ^ sc->hn_vf_ifp->if_flags) &
+			     IFF_ALLMULTI))
+				error = hn_xpnt_vf_iocsetflags(sc);
+		}
+
+		HN_UNLOCK(sc);
+		break;
+
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		HN_LOCK(sc);
+		if (hn_xpnt_vf_isready(sc)) {
+			/*
+			 * SIOCGIFMEDIA expects ifmediareq, so don't
+			 * create and pass ifr_vf to the VF here; just
+			 * replace the ifr_name.
+			 */
+			vf_ifp = sc->hn_vf_ifp;
+			strlcpy(ifr->ifr_name, vf_ifp->if_xname,
+			    sizeof(ifr->ifr_name));
+			error = vf_ifp->if_ioctl(vf_ifp, cmd, data);
+			/* Restore the ifr_name. */
+			strlcpy(ifr->ifr_name, ifp->if_xname,
+			    sizeof(ifr->ifr_name));
+			HN_UNLOCK(sc);
+			break;
+		}
+		HN_UNLOCK(sc);
+		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
+		break;
+
+	case SIOCGIFRSSHASH:
+		ifrh = (struct ifrsshash *)data;
+		HN_LOCK(sc);
+		if (sc->hn_rx_ring_inuse == 1) {
+			HN_UNLOCK(sc);
+			ifrh->ifrh_func = RSS_FUNC_NONE;
+			ifrh->ifrh_types = 0;
+			break;
+		}
+
+		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
+			ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
+		else
+			ifrh->ifrh_func = RSS_FUNC_PRIVATE;
+		ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash);
+		HN_UNLOCK(sc);
+		break;
+
+	case SIOCGIFRSSKEY:
+		ifrk = (struct ifrsskey *)data;
+		HN_LOCK(sc);
+		if (sc->hn_rx_ring_inuse == 1) {
+			HN_UNLOCK(sc);
+			ifrk->ifrk_func = RSS_FUNC_NONE;
+			ifrk->ifrk_keylen = 0;
+			break;
+		}
+		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
+			ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
+		else
+			ifrk->ifrk_func = RSS_FUNC_PRIVATE;
+		ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ;
+		memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key,
+		    NDIS_HASH_KEYSIZE_TOEPLITZ);
+		HN_UNLOCK(sc);
+		break;
+
+	default:
+		error = ether_ioctl(ifp, cmd, data);
+		break;
+	}
+	return (error);
+}
+
+static void
+hn_stop(struct hn_softc *sc, bool detaching)
+{
+	struct ifnet *ifp = sc->hn_ifp;
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
+	    ("synthetic parts were not attached"));
+
+	/* Clear RUNNING bit ASAP. */
+	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+
+	/* Disable polling. */
+	hn_polling(sc, 0);
+
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+		KASSERT(sc->hn_vf_ifp != NULL,
+		    ("%s: VF is not attached", ifp->if_xname));
+
+		/* Mark transparent mode VF as disabled. */
+		hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */);
+
+		/*
+		 * NOTE:
+		 * Datapath setting must happen _before_ bringing
+		 * the VF down.
+		 */
+		hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+		/*
+		 * Bring the VF down.
+		 */
+		hn_xpnt_vf_saveifflags(sc);
+		sc->hn_vf_ifp->if_flags &= ~IFF_UP;
+		hn_xpnt_vf_iocsetflags(sc);
+	}
+
+	/* Suspend data transfers. */
+	hn_suspend_data(sc);
+
+	/* Clear OACTIVE bit. */
+	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+		sc->hn_tx_ring[i].hn_oactive = 0;
+
+	/*
+	 * If the non-transparent mode VF is active, make sure
+	 * that the RX filter still allows packet reception.
+	 */
+	if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
+		hn_rxfilter_config(sc);
+}
+
+static void
+hn_init_locked(struct hn_softc *sc)
+{
+	struct ifnet *ifp = sc->hn_ifp;
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
+		return;
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+		return;
+
+	/* Configure RX filter */
+	hn_rxfilter_config(sc);
+
+	/* Clear OACTIVE bit. */
+	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+		sc->hn_tx_ring[i].hn_oactive = 0;
+
+	/* Clear TX 'suspended' bit. */
+	hn_resume_tx(sc, sc->hn_tx_ring_inuse);
+
+	if (hn_xpnt_vf_isready(sc)) {
+		/* Initialize transparent VF. */
+		hn_xpnt_vf_init(sc);
+	}
+
+	/* Everything is ready; unleash! */
+	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+
+	/* Re-enable polling if requested. */
+	if (sc->hn_pollhz > 0)
+		hn_polling(sc, sc->hn_pollhz);
+}
+
+static void
+hn_init(void *xsc)
+{
+	struct hn_softc *sc = xsc;
+
+	HN_LOCK(sc);
+	hn_init_locked(sc);
+	HN_UNLOCK(sc);
+}
+
+#if __FreeBSD_version >= 1100099
+
+static int
+hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	unsigned int lenlim;
+	int error;
+
+	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
+	error = sysctl_handle_int(oidp, &lenlim, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	HN_LOCK(sc);
+	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
+	    lenlim > TCP_LRO_LENGTH_MAX) {
+		HN_UNLOCK(sc);
+		return EINVAL;
+	}
+	hn_set_lro_lenlim(sc, lenlim);
+	HN_UNLOCK(sc);
+
+	return 0;
+}
+
+static int
+hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ackcnt, error, i;
+
+	/*
+	 * lro_ackcnt_lim is append count limit,
+	 * +1 to turn it into aggregation limit.
+	 */
+	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
+	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
+		return EINVAL;
+
+	/*
+	 * Convert aggregation limit back to append
+	 * count limit.
+	 */
+	--ackcnt;
+	HN_LOCK(sc);
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
+	HN_UNLOCK(sc);
+	return 0;
+}
+
+#endif
+
+static int
+hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int hcsum = arg2;
+	int on, error, i;
+
+	on = 0;
+	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
+		on = 1;
+
+	error = sysctl_handle_int(oidp, &on, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	HN_LOCK(sc);
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (on)
+			rxr->hn_trust_hcsum |= hcsum;
+		else
+			rxr->hn_trust_hcsum &= ~hcsum;
+	}
+	HN_UNLOCK(sc);
+	return 0;
+}
+
+static int
+hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int chim_size, error;
+
+	chim_size = sc->hn_tx_ring[0].hn_chim_size;
+	error = sysctl_handle_int(oidp, &chim_size, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
+		return EINVAL;
+
+	HN_LOCK(sc);
+	hn_set_chim_size(sc, chim_size);
+	HN_UNLOCK(sc);
+	return 0;
+}
+
+#if __FreeBSD_version < 1100095
+static int
+hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	uint64_t stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((int *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_64(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((int *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+#else
+static int
+hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	uint64_t stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_64(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+
+#endif
+
+static int
+hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	u_long stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((u_long *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_long(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+
+static int
+hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_tx_ring *txr;
+	u_long stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		stat += *((u_long *)((uint8_t *)txr + ofs));
+	}
+
+	error = sysctl_handle_long(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((u_long *)((uint8_t *)txr + ofs)) = 0;
+	}
+	return 0;
+}
+
+static int
+hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error, conf;
+	struct hn_tx_ring *txr;
+
+	txr = &sc->hn_tx_ring[0];
+	conf = *((int *)((uint8_t *)txr + ofs));
+
+	error = sysctl_handle_int(oidp, &conf, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	HN_LOCK(sc);
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((int *)((uint8_t *)txr + ofs)) = conf;
+	}
+	HN_UNLOCK(sc);
+
+	return 0;
+}
+
+static int
+hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int error, size;
+
+	size = sc->hn_agg_size;
+	error = sysctl_handle_int(oidp, &size, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+
+	HN_LOCK(sc);
+	sc->hn_agg_size = size;
+	hn_set_txagg(sc);
+	HN_UNLOCK(sc);
+
+	return (0);
+}
+
+static int
+hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int error, pkts;
+
+	pkts = sc->hn_agg_pkts;
+	error = sysctl_handle_int(oidp, &pkts, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+
+	HN_LOCK(sc);
+	sc->hn_agg_pkts = pkts;
+	hn_set_txagg(sc);
+	HN_UNLOCK(sc);
+
+	return (0);
+}
+
+static int
+hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int pkts;
+
+	pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
+	return (sysctl_handle_int(oidp, &pkts, 0, req));
+}
+
+static int
+hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int align;
+
+	align = sc->hn_tx_ring[0].hn_agg_align;
+	return (sysctl_handle_int(oidp, &align, 0, req));
+}
+
+static void
+hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
+{
+	if (pollhz == 0)
+		vmbus_chan_poll_disable(chan);
+	else
+		vmbus_chan_poll_enable(chan, pollhz);
+}
+
+static void
+hn_polling(struct hn_softc *sc, u_int pollhz)
+{
+	int nsubch = sc->hn_rx_ring_inuse - 1;
+
+	HN_LOCK_ASSERT(sc);
+
+	if (nsubch > 0) {
+		struct vmbus_channel **subch;
+		int i;
+
+		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
+		for (i = 0; i < nsubch; ++i)
+			hn_chan_polling(subch[i], pollhz);
+		vmbus_subchan_rel(subch, nsubch);
+	}
+	hn_chan_polling(sc->hn_prichan, pollhz);
+}
+
+static int
+hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int pollhz, error;
+
+	pollhz = sc->hn_pollhz;
+	error = sysctl_handle_int(oidp, &pollhz, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+
+	if (pollhz != 0 &&
+	    (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
+		return (EINVAL);
+
+	HN_LOCK(sc);
+	if (sc->hn_pollhz != pollhz) {
+		sc->hn_pollhz = pollhz;
+		if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
+		    (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
+			hn_polling(sc, sc->hn_pollhz);
+	}
+	HN_UNLOCK(sc);
+
+	return (0);
+}
+
+static int
+hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char verstr[16];
+
+	snprintf(verstr, sizeof(verstr), "%u.%u",
+	    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
+	    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
+	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
+}
+
+static int
+hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char caps_str[128];
+	uint32_t caps;
+
+	HN_LOCK(sc);
+	caps = sc->hn_caps;
+	HN_UNLOCK(sc);
+	snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
+	return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
+}
+
+static int
+hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char assist_str[128];
+	uint32_t hwassist;
+
+	HN_LOCK(sc);
+	hwassist = sc->hn_ifp->if_hwassist;
+	HN_UNLOCK(sc);
+	snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
+	return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
+}
+
+static int
+hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char filter_str[128];
+	uint32_t filter;
+
+	HN_LOCK(sc);
+	filter = sc->hn_rx_filter;
+	HN_UNLOCK(sc);
+	snprintf(filter_str, sizeof(filter_str), "%b", filter,
+	    NDIS_PACKET_TYPES);
+	return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
+}
+
+static int
+hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int error;
+
+	HN_LOCK(sc);
+
+	error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
+	if (error || req->newptr == NULL)
+		goto back;
+
+	if ((sc->hn_flags & HN_FLAG_RXVF) ||
+	    (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) {
+		/*
+		 * RSS key is synchronized w/ VF's, don't allow users
+		 * to change it.
+		 */
+		error = EBUSY;
+		goto back;
+	}
+
+	error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
+	if (error)
+		goto back;
+	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
+
+	if (sc->hn_rx_ring_inuse > 1) {
+		error = hn_rss_reconfig(sc);
+	} else {
+		/* Not RSS capable, at least for now; just save the RSS key. */
+		error = 0;
+	}
+back:
+	HN_UNLOCK(sc);
+	return (error);
+}
+
+static int
+hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int error;
+
+	HN_LOCK(sc);
+
+	error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
+	if (error || req->newptr == NULL)
+		goto back;
+
+	/*
+	 * Don't allow RSS indirect table change, if this interface is not
+	 * RSS capable currently.
+	 */
+	if (sc->hn_rx_ring_inuse == 1) {
+		error = EOPNOTSUPP;
+		goto back;
+	}
+
+	error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
+	if (error)
+		goto back;
+	sc->hn_flags |= HN_FLAG_HAS_RSSIND;
+
+	hn_rss_ind_fixup(sc);
+	error = hn_rss_reconfig(sc);
+back:
+	HN_UNLOCK(sc);
+	return (error);
+}
+
+static int
+hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char hash_str[128];
+	uint32_t hash;
+
+	HN_LOCK(sc);
+	hash = sc->hn_rss_hash;
+	HN_UNLOCK(sc);
+	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
+	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
+}
+
+static int
+hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char hash_str[128];
+	uint32_t hash;
+
+	HN_LOCK(sc);
+	hash = sc->hn_rss_hcap;
+	HN_UNLOCK(sc);
+	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
+	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
+}
+
+static int
+hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char hash_str[128];
+	uint32_t hash;
+
+	HN_LOCK(sc);
+	hash = sc->hn_rx_ring[0].hn_mbuf_hash;
+	HN_UNLOCK(sc);
+	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
+	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
+}
+
+static int
+hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char vf_name[IFNAMSIZ + 1];
+	struct ifnet *vf_ifp;
+
+	HN_LOCK(sc);
+	vf_name[0] = '\0';
+	vf_ifp = sc->hn_vf_ifp;
+	if (vf_ifp != NULL)
+		snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
+	HN_UNLOCK(sc);
+	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
+}
+
+static int
+hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	char vf_name[IFNAMSIZ + 1];
+	struct ifnet *vf_ifp;
+
+	HN_LOCK(sc);
+	vf_name[0] = '\0';
+	vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp;
+	if (vf_ifp != NULL)
+		snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
+	HN_UNLOCK(sc);
+	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
+}
+
+static int
+hn_vflist_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker pt;
+	struct sbuf *sb;
+	int error, i;
+	bool first;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
+	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
+	if (sb == NULL)
+		return (ENOMEM);
+
+	rm_rlock(&hn_vfmap_lock, &pt);
+
+	first = true;
+	for (i = 0; i < hn_vfmap_size; ++i) {
+		struct ifnet *ifp;
+
+		if (hn_vfmap[i] == NULL)
+			continue;
+
+		ifp = ifnet_byindex(i);
+		if (ifp != NULL) {
+			if (first)
+				sbuf_printf(sb, "%s", ifp->if_xname);
+			else
+				sbuf_printf(sb, " %s", ifp->if_xname);
+			first = false;
+		}
+	}
+
+	rm_runlock(&hn_vfmap_lock, &pt);
+
+	error = sbuf_finish(sb);
+	sbuf_delete(sb);
+	return (error);
+}
+
+static int
+hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker pt;
+	struct sbuf *sb;
+	int error, i;
+	bool first;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
+	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
+	if (sb == NULL)
+		return (ENOMEM);
+
+	rm_rlock(&hn_vfmap_lock, &pt);
+
+	first = true;
+	for (i = 0; i < hn_vfmap_size; ++i) {
+		struct ifnet *ifp, *hn_ifp;
+
+		hn_ifp = hn_vfmap[i];
+		if (hn_ifp == NULL)
+			continue;
+
+		ifp = ifnet_byindex(i);
+		if (ifp != NULL) {
+			if (first) {
+				sbuf_printf(sb, "%s:%s", ifp->if_xname,
+				    hn_ifp->if_xname);
+			} else {
+				sbuf_printf(sb, " %s:%s", ifp->if_xname,
+				    hn_ifp->if_xname);
+			}
+			first = false;
+		}
+	}
+
+	rm_runlock(&hn_vfmap_lock, &pt);
+
+	error = sbuf_finish(sb);
+	sbuf_delete(sb);
+	return (error);
+}
+
+static int
+hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int error, onoff = 0;
+
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
+		onoff = 1;
+	error = sysctl_handle_int(oidp, &onoff, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+
+	HN_LOCK(sc);
+	/* NOTE: hn_vf_lock for hn_transmit() */
+	rm_wlock(&sc->hn_vf_lock);
+	if (onoff)
+		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
+	else
+		sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
+	rm_wunlock(&sc->hn_vf_lock);
+	HN_UNLOCK(sc);
+
+	return (0);
+}
+
+static int
+hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int enabled = 0;
+
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+		enabled = 1;
+	return (sysctl_handle_int(oidp, &enabled, 0, req));
+}
+
+static int
+hn_check_iplen(const struct mbuf *m, int hoff)
+{
+	const struct ip *ip;
+	int len, iphlen, iplen;
+	const struct tcphdr *th;
+	int thoff;				/* TCP data offset */
+
+	len = hoff + sizeof(struct ip);
+
+	/* The packet must be at least the size of an IP header. */
+	if (m->m_pkthdr.len < len)
+		return IPPROTO_DONE;
+
+	/* The fixed IP header must reside completely in the first mbuf. */
+	if (m->m_len < len)
+		return IPPROTO_DONE;
+
+	ip = mtodo(m, hoff);
+
+	/* Bound check the packet's stated IP header length. */
+	iphlen = ip->ip_hl << 2;
+	if (iphlen < sizeof(struct ip))		/* minimum header length */
+		return IPPROTO_DONE;
+
+	/* The full IP header must reside completely in the one mbuf. */
+	if (m->m_len < hoff + iphlen)
+		return IPPROTO_DONE;
+
+	iplen = ntohs(ip->ip_len);
+
+	/*
+	 * Check that the amount of data in the buffers is as
+	 * at least much as the IP header would have us expect.
+	 */
+	if (m->m_pkthdr.len < hoff + iplen)
+		return IPPROTO_DONE;
+
+	/*
+	 * Ignore IP fragments.
+	 */
+	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
+		return IPPROTO_DONE;
+
+	/*
+	 * The TCP/IP or UDP/IP header must be entirely contained within
+	 * the first fragment of a packet.
+	 */
+	switch (ip->ip_p) {
+	case IPPROTO_TCP:
+		if (iplen < iphlen + sizeof(struct tcphdr))
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
+			return IPPROTO_DONE;
+		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
+		thoff = th->th_off << 2;
+		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + thoff)
+			return IPPROTO_DONE;
+		break;
+	case IPPROTO_UDP:
+		if (iplen < iphlen + sizeof(struct udphdr))
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
+			return IPPROTO_DONE;
+		break;
+	default:
+		if (iplen < iphlen)
+			return IPPROTO_DONE;
+		break;
+	}
+	return ip->ip_p;
+}
+
+static void
+hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto)
+{
+	const struct ether_header *eh;
+	uint16_t etype;
+	int hoff;
+
+	hoff = sizeof(*eh);
+	/* Checked at the beginning of this function. */
+	KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
+
+	eh = mtod(m_new, const struct ether_header *);
+	etype = ntohs(eh->ether_type);
+	if (etype == ETHERTYPE_VLAN) {
+		const struct ether_vlan_header *evl;
+
+		hoff = sizeof(*evl);
+		if (m_new->m_len < hoff)
+			return;
+		evl = mtod(m_new, const struct ether_vlan_header *);
+		etype = ntohs(evl->evl_proto);
+	}
+	*l3proto = etype;
+
+	if (etype == ETHERTYPE_IP)
+		*l4proto = hn_check_iplen(m_new, hoff);
+	else
+		*l4proto = IPPROTO_DONE;
+}
+
+static int
+hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	device_t dev = sc->hn_dev;
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+	int lroent_cnt;
+#endif
+#endif
+	int i;
+
+	/*
+	 * Create RXBUF for reception.
+	 *
+	 * NOTE:
+	 * - It is shared by all channels.
+	 * - A large enough buffer is allocated, certain version of NVSes
+	 *   may further limit the usable space.
+	 */
+	sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
+	    PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
+	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (sc->hn_rxbuf == NULL) {
+		device_printf(sc->hn_dev, "allocate rxbuf failed\n");
+		return (ENOMEM);
+	}
+
+	sc->hn_rx_ring_cnt = ring_cnt;
+	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
+
+	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+	lroent_cnt = hn_lro_entry_count;
+	if (lroent_cnt < TCP_LRO_ENTRIES)
+		lroent_cnt = TCP_LRO_ENTRIES;
+	if (bootverbose)
+		device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
+#endif
+#endif	/* INET || INET6 */
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	/* Create dev.hn.UNIT.rx sysctl tree */
+	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
+		    PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
+		    &rxr->hn_br_dma, BUS_DMA_WAITOK);
+		if (rxr->hn_br == NULL) {
+			device_printf(dev, "allocate bufring failed\n");
+			return (ENOMEM);
+		}
+
+		if (hn_trust_hosttcp)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
+		if (hn_trust_hostudp)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
+		if (hn_trust_hostip)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
+		rxr->hn_mbuf_hash = NDIS_HASH_ALL;
+		rxr->hn_ifp = sc->hn_ifp;
+		if (i < sc->hn_tx_ring_cnt)
+			rxr->hn_txr = &sc->hn_tx_ring[i];
+		rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
+		rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
+		rxr->hn_rx_idx = i;
+		rxr->hn_rxbuf = sc->hn_rxbuf;
+
+		/*
+		 * Initialize LRO.
+		 */
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
+		    hn_lro_mbufq_depth);
+#else
+		tcp_lro_init(&rxr->hn_lro);
+		rxr->hn_lro.ifp = sc->hn_ifp;
+#endif
+#if __FreeBSD_version >= 1100099
+		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
+		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
+#endif
+#endif	/* INET || INET6 */
+
+		if (sc->hn_rx_sysctl_tree != NULL) {
+			char name[16];
+
+			/*
+			 * Create per RX ring sysctl tree:
+			 * dev.hn.UNIT.rx.RINGID
+			 */
+			snprintf(name, sizeof(name), "%d", i);
+			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
+			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
+			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+			if (rxr->hn_rx_sysctl_tree != NULL) {
+				SYSCTL_ADD_ULONG(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "packets", CTLFLAG_RW,
+				    &rxr->hn_pkts, "# of packets received");
+				SYSCTL_ADD_ULONG(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
+				    &rxr->hn_rss_pkts,
+				    "# of packets w/ RSS info received");
+				SYSCTL_ADD_INT(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "pktbuf_len", CTLFLAG_RD,
+				    &rxr->hn_pktbuf_len, 0,
+				    "Temporary channel packet buffer length");
+			}
+		}
+	}
+
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
+	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
+#if __FreeBSD_version < 1100095
+	    hn_rx_stat_int_sysctl,
+#else
+	    hn_rx_stat_u64_sysctl,
+#endif
+	    "LU", "LRO queued");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
+	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
+#if __FreeBSD_version < 1100095
+	    hn_rx_stat_int_sysctl,
+#else
+	    hn_rx_stat_u64_sysctl,
+#endif
+	    "LU", "LRO flushed");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro_tried),
+	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
+#if __FreeBSD_version >= 1100099
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
+	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_lro_lenlim_sysctl, "IU",
+	    "Max # of data bytes to be aggregated by LRO");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_lro_ackcnt_sysctl, "I",
+	    "Max # of ACKs to be aggregated by LRO");
+#endif
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust tcp segement verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust udp datagram verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust ip packet verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_ip),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_udp),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
+	    hn_rx_stat_ulong_sysctl, "LU",
+	    "# of packets that we trust host's csum verification");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_small_pkts),
+	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_rx_ring, hn_ack_failed),
+	    hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
+	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
+	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
+
+	return (0);
+}
+
+static void
+hn_destroy_rx_data(struct hn_softc *sc)
+{
+	int i;
+
+	if (sc->hn_rxbuf != NULL) {
+		if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
+			hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
+		else
+			device_printf(sc->hn_dev, "RXBUF is referenced\n");
+		sc->hn_rxbuf = NULL;
+	}
+
+	if (sc->hn_rx_ring_cnt == 0)
+		return;
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (rxr->hn_br == NULL)
+			continue;
+		if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
+			hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
+		} else {
+			device_printf(sc->hn_dev,
+			    "%dth channel bufring is referenced", i);
+		}
+		rxr->hn_br = NULL;
+
+#if defined(INET) || defined(INET6)
+		tcp_lro_free(&rxr->hn_lro);
+#endif
+		free(rxr->hn_pktbuf, M_DEVBUF);
+	}
+	free(sc->hn_rx_ring, M_DEVBUF);
+	sc->hn_rx_ring = NULL;
+
+	sc->hn_rx_ring_cnt = 0;
+	sc->hn_rx_ring_inuse = 0;
+}
+
+static int
+hn_tx_ring_create(struct hn_softc *sc, int id)
+{
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
+	device_t dev = sc->hn_dev;
+	bus_dma_tag_t parent_dtag;
+	int error, i;
+
+	txr->hn_sc = sc;
+	txr->hn_tx_idx = id;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+#endif
+	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
+
+	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
+	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+#ifndef HN_USE_TXDESC_BUFRING
+	SLIST_INIT(&txr->hn_txlist);
+#else
+	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
+	    M_WAITOK, &txr->hn_tx_lock);
+#endif
+
+	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
+		txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
+		    device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
+	} else {
+		txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
+	}
+
+#ifdef HN_IFSTART_SUPPORT
+	if (hn_use_if_start) {
+		txr->hn_txeof = hn_start_txeof;
+		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
+		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
+	} else
+#endif
+	{
+		int br_depth;
+
+		txr->hn_txeof = hn_xmit_txeof;
+		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
+		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
+
+		br_depth = hn_get_txswq_depth(txr);
+		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
+		    M_WAITOK, &txr->hn_tx_lock);
+	}
+
+	txr->hn_direct_tx_size = hn_direct_tx_size;
+
+	/*
+	 * Always schedule transmission instead of trying to do direct
+	 * transmission.  This one gives the best performance so far.
+	 */
+	txr->hn_sched_tx = 1;
+
+	parent_dtag = bus_get_dma_tag(dev);
+
+	/* DMA tag for RNDIS packet messages. */
+	error = bus_dma_tag_create(parent_dtag, /* parent */
+	    HN_RNDIS_PKT_ALIGN,		/* alignment */
+	    HN_RNDIS_PKT_BOUNDARY,	/* boundary */
+	    BUS_SPACE_MAXADDR,		/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    HN_RNDIS_PKT_LEN,		/* maxsize */
+	    1,				/* nsegments */
+	    HN_RNDIS_PKT_LEN,		/* maxsegsize */
+	    0,				/* flags */
+	    NULL,			/* lockfunc */
+	    NULL,			/* lockfuncarg */
+	    &txr->hn_tx_rndis_dtag);
+	if (error) {
+		device_printf(dev, "failed to create rndis dmatag\n");
+		return error;
+	}
+
+	/* DMA tag for data. */
+	error = bus_dma_tag_create(parent_dtag, /* parent */
+	    1,				/* alignment */
+	    HN_TX_DATA_BOUNDARY,	/* boundary */
+	    BUS_SPACE_MAXADDR,		/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    HN_TX_DATA_MAXSIZE,		/* maxsize */
+	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
+	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
+	    0,				/* flags */
+	    NULL,			/* lockfunc */
+	    NULL,			/* lockfuncarg */
+	    &txr->hn_tx_data_dtag);
+	if (error) {
+		device_printf(dev, "failed to create data dmatag\n");
+		return error;
+	}
+
+	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
+		struct hn_txdesc *txd = &txr->hn_txdesc[i];
+
+		txd->txr = txr;
+		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+		STAILQ_INIT(&txd->agg_list);
+
+		/*
+		 * Allocate and load RNDIS packet message.
+		 */
+        	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
+		    (void **)&txd->rndis_pkt,
+		    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
+		    &txd->rndis_pkt_dmap);
+		if (error) {
+			device_printf(dev,
+			    "failed to allocate rndis_packet_msg, %d\n", i);
+			return error;
+		}
+
+		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
+		    txd->rndis_pkt_dmap,
+		    txd->rndis_pkt, HN_RNDIS_PKT_LEN,
+		    hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
+		    BUS_DMA_NOWAIT);
+		if (error) {
+			device_printf(dev,
+			    "failed to load rndis_packet_msg, %d\n", i);
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
+			    txd->rndis_pkt, txd->rndis_pkt_dmap);
+			return error;
+		}
+
+		/* DMA map for TX data. */
+		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
+		    &txd->data_dmap);
+		if (error) {
+			device_printf(dev,
+			    "failed to allocate tx data dmamap\n");
+			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
+			    txd->rndis_pkt_dmap);
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
+			    txd->rndis_pkt, txd->rndis_pkt_dmap);
+			return error;
+		}
+
+		/* All set, put it to list */
+		txd->flags |= HN_TXD_FLAG_ONLIST;
+#ifndef HN_USE_TXDESC_BUFRING
+		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+#else
+		buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif
+	}
+	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
+
+	if (sc->hn_tx_sysctl_tree != NULL) {
+		struct sysctl_oid_list *child;
+		struct sysctl_ctx_list *ctx;
+		char name[16];
+
+		/*
+		 * Create per TX ring sysctl tree:
+		 * dev.hn.UNIT.tx.RINGID
+		 */
+		ctx = device_get_sysctl_ctx(dev);
+		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
+
+		snprintf(name, sizeof(name), "%d", id);
+		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
+		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+		if (txr->hn_tx_sysctl_tree != NULL) {
+			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
+
+#ifdef HN_DEBUG
+			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
+			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
+			    "# of available TX descs");
+#endif
+#ifdef HN_IFSTART_SUPPORT
+			if (!hn_use_if_start)
+#endif
+			{
+				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
+				    CTLFLAG_RD, &txr->hn_oactive, 0,
+				    "over active");
+			}
+			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
+			    CTLFLAG_RW, &txr->hn_pkts,
+			    "# of packets transmitted");
+			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
+			    CTLFLAG_RW, &txr->hn_sends, "# of sends");
+		}
+	}
+
+	return 0;
+}
+
+static void
+hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
+{
+	struct hn_tx_ring *txr = txd->txr;
+
+	KASSERT(txd->m == NULL, ("still has mbuf installed"));
+	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
+
+	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
+	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
+	    txd->rndis_pkt_dmap);
+	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
+}
+
+static void
+hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+
+	KASSERT(txd->refs == 0 || txd->refs == 1,
+	    ("invalid txd refs %d", txd->refs));
+
+	/* Aggregated txds will be freed by their aggregating txd. */
+	if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
+		int freed;
+
+		freed = hn_txdesc_put(txr, txd);
+		KASSERT(freed, ("can't free txdesc"));
+	}
+}
+
+static void
+hn_tx_ring_destroy(struct hn_tx_ring *txr)
+{
+	int i;
+
+	if (txr->hn_txdesc == NULL)
+		return;
+
+	/*
+	 * NOTE:
+	 * Because the freeing of aggregated txds will be deferred
+	 * to the aggregating txd, two passes are used here:
+	 * - The first pass GCes any pending txds.  This GC is necessary,
+	 *   since if the channels are revoked, hypervisor will not
+	 *   deliver send-done for all pending txds.
+	 * - The second pass frees the busdma stuffs, i.e. after all txds
+	 *   were freed.
+	 */
+	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
+		hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
+	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
+		hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
+
+	if (txr->hn_tx_data_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
+	if (txr->hn_tx_rndis_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
+
+#ifdef HN_USE_TXDESC_BUFRING
+	buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
+#endif
+
+	free(txr->hn_txdesc, M_DEVBUF);
+	txr->hn_txdesc = NULL;
+
+	if (txr->hn_mbuf_br != NULL)
+		buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_destroy(&txr->hn_txlist_spin);
+#endif
+	mtx_destroy(&txr->hn_tx_lock);
+}
+
+static int
+hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	int i;
+
+	/*
+	 * Create TXBUF for chimney sending.
+	 *
+	 * NOTE: It is shared by all channels.
+	 */
+	sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
+	    PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
+	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (sc->hn_chim == NULL) {
+		device_printf(sc->hn_dev, "allocate txbuf failed\n");
+		return (ENOMEM);
+	}
+
+	sc->hn_tx_ring_cnt = ring_cnt;
+	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
+
+	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+
+	ctx = device_get_sysctl_ctx(sc->hn_dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
+
+	/* Create dev.hn.UNIT.tx sysctl tree */
+	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		int error;
+
+		error = hn_tx_ring_create(sc, i);
+		if (error)
+			return error;
+	}
+
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_send_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_flush_failed),
+	    hn_tx_stat_ulong_sysctl, "LU",
+	    "# of packet transmission aggregation flush failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
+	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
+	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
+	    "# of total TX descs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
+	    CTLFLAG_RD, &sc->hn_chim_szmax, 0,
+	    "Chimney send packet size upper boundary");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    hn_chim_size_sysctl, "I", "Chimney send packet size limit");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Size of the packet for direct transmission");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+	    __offsetof(struct hn_tx_ring, hn_sched_tx),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Always schedule transmission "
+	    "instead of doing direct transmission");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
+	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
+	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
+	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
+	    "Applied packet transmission aggregation size");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_txagg_pktmax_sysctl, "I",
+	    "Applied packet transmission aggregation packets");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    hn_txagg_align_sysctl, "I",
+	    "Applied packet transmission aggregation alignment");
+
+	return 0;
+}
+
+static void
+hn_set_chim_size(struct hn_softc *sc, int chim_size)
+{
+	int i;
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+		sc->hn_tx_ring[i].hn_chim_size = chim_size;
+}
+
+static void
+hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
+{
+	struct ifnet *ifp = sc->hn_ifp;
+	u_int hw_tsomax;
+	int tso_minlen;
+
+	HN_LOCK_ASSERT(sc);
+
+	if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
+		return;
+
+	KASSERT(sc->hn_ndis_tso_sgmin >= 2,
+	    ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
+	tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
+
+	KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
+	    sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
+	    ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
+
+	if (tso_maxlen < tso_minlen)
+		tso_maxlen = tso_minlen;
+	else if (tso_maxlen > IP_MAXPACKET)
+		tso_maxlen = IP_MAXPACKET;
+	if (tso_maxlen > sc->hn_ndis_tso_szmax)
+		tso_maxlen = sc->hn_ndis_tso_szmax;
+	hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+
+	if (hn_xpnt_vf_isready(sc)) {
+		if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax)
+			hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax;
+	}
+	ifp->if_hw_tsomax = hw_tsomax;
+	if (bootverbose)
+		if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
+}
+
+static void
+hn_fixup_tx_data(struct hn_softc *sc)
+{
+	uint64_t csum_assist;
+	int i;
+
+	hn_set_chim_size(sc, sc->hn_chim_szmax);
+	if (hn_tx_chimney_size > 0 &&
+	    hn_tx_chimney_size < sc->hn_chim_szmax)
+		hn_set_chim_size(sc, hn_tx_chimney_size);
+
+	csum_assist = 0;
+	if (sc->hn_caps & HN_CAP_IPCS)
+		csum_assist |= CSUM_IP;
+	if (sc->hn_caps & HN_CAP_TCP4CS)
+		csum_assist |= CSUM_IP_TCP;
+	if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs)
+		csum_assist |= CSUM_IP_UDP;
+	if (sc->hn_caps & HN_CAP_TCP6CS)
+		csum_assist |= CSUM_IP6_TCP;
+	if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs)
+		csum_assist |= CSUM_IP6_UDP;
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+		sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
+
+	if (sc->hn_caps & HN_CAP_HASHVAL) {
+		/*
+		 * Support HASHVAL pktinfo on TX path.
+		 */
+		if (bootverbose)
+			if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
+		for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+			sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
+	}
+}
+
+static void
+hn_fixup_rx_data(struct hn_softc *sc)
+{
+
+	if (sc->hn_caps & HN_CAP_UDPHASH) {
+		int i;
+
+		for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+			sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH;
+	}
+}
+
+static void
+hn_destroy_tx_data(struct hn_softc *sc)
+{
+	int i;
+
+	if (sc->hn_chim != NULL) {
+		if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
+			hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
+		} else {
+			device_printf(sc->hn_dev,
+			    "chimney sending buffer is referenced");
+		}
+		sc->hn_chim = NULL;
+	}
+
+	if (sc->hn_tx_ring_cnt == 0)
+		return;
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+		hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
+
+	free(sc->hn_tx_ring, M_DEVBUF);
+	sc->hn_tx_ring = NULL;
+
+	sc->hn_tx_ring_cnt = 0;
+	sc->hn_tx_ring_inuse = 0;
+}
+
+#ifdef HN_IFSTART_SUPPORT
+
+static void
+hn_start_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	hn_start_locked(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static int
+hn_start_locked(struct hn_tx_ring *txr, int len)
+{
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
+	int sched = 0;
+
+	KASSERT(hn_use_if_start,
+	    ("hn_start_locked is called, when if_start is disabled"));
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
+
+	if (__predict_false(txr->hn_suspended))
+		return (0);
+
+	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+	    IFF_DRV_RUNNING)
+		return (0);
+
+	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+		struct hn_txdesc *txd;
+		struct mbuf *m_head;
+		int error;
+
+		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
+		if (m_head == NULL)
+			break;
+
+		if (len > 0 && m_head->m_pkthdr.len > len) {
+			/*
+			 * This sending could be time consuming; let callers
+			 * dispatch this packet sending (and sending of any
+			 * following up packets) to tx taskqueue.
+			 */
+			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+			sched = 1;
+			break;
+		}
+
+#if defined(INET6) || defined(INET)
+		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+			m_head = hn_tso_fixup(m_head);
+			if (__predict_false(m_head == NULL)) {
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+				continue;
+			}
+		} else if (m_head->m_pkthdr.csum_flags &
+		    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
+			m_head = hn_set_hlen(m_head);
+			if (__predict_false(m_head == NULL)) {
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+				continue;
+			}
+		}
+#endif
+
+		txd = hn_txdesc_get(txr);
+		if (txd == NULL) {
+			txr->hn_no_txdescs++;
+			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+			break;
+		}
+
+		error = hn_encap(ifp, txr, txd, &m_head);
+		if (error) {
+			/* Both txd and m_head are freed */
+			KASSERT(txr->hn_agg_txd == NULL,
+			    ("encap failed w/ pending aggregating txdesc"));
+			continue;
+		}
+
+		if (txr->hn_agg_pktleft == 0) {
+			if (txr->hn_agg_txd != NULL) {
+				KASSERT(m_head == NULL,
+				    ("pending mbuf for aggregating txdesc"));
+				error = hn_flush_txagg(ifp, txr);
+				if (__predict_false(error)) {
+					atomic_set_int(&ifp->if_drv_flags,
+					    IFF_DRV_OACTIVE);
+					break;
+				}
+			} else {
+				KASSERT(m_head != NULL, ("mbuf was freed"));
+				error = hn_txpkt(ifp, txr, txd);
+				if (__predict_false(error)) {
+					/* txd is freed, but m_head is not */
+					IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+					atomic_set_int(&ifp->if_drv_flags,
+					    IFF_DRV_OACTIVE);
+					break;
+				}
+			}
+		}
+#ifdef INVARIANTS
+		else {
+			KASSERT(txr->hn_agg_txd != NULL,
+			    ("no aggregating txdesc"));
+			KASSERT(m_head == NULL,
+			    ("pending mbuf for aggregating txdesc"));
+		}
+#endif
+	}
+
+	/* Flush pending aggerated transmission. */
+	if (txr->hn_agg_txd != NULL)
+		hn_flush_txagg(ifp, txr);
+	return (sched);
+}
+
+static void
+hn_start(struct ifnet *ifp)
+{
+	struct hn_softc *sc = ifp->if_softc;
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
+
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (!sched)
+			return;
+	}
+do_sched:
+	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
+}
+
+static void
+hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
+	hn_start_locked(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_start_txeof(struct hn_tx_ring *txr)
+{
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
+
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (sched) {
+			taskqueue_enqueue(txr->hn_tx_taskq,
+			    &txr->hn_tx_task);
+		}
+	} else {
+do_sched:
+		/*
+		 * Release the OACTIVE earlier, with the hope, that
+		 * others could catch up.  The task will clear the
+		 * flag again with the hn_tx_lock to avoid possible
+		 * races.
+		 */
+		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
+	}
+}
+
+#endif	/* HN_IFSTART_SUPPORT */
+
+static int
+hn_xmit(struct hn_tx_ring *txr, int len)
+{
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
+	struct mbuf *m_head;
+	int sched = 0;
+
+	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+#ifdef HN_IFSTART_SUPPORT
+	KASSERT(hn_use_if_start == 0,
+	    ("hn_xmit is called, when if_start is enabled"));
+#endif
+	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
+
+	if (__predict_false(txr->hn_suspended))
+		return (0);
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
+		return (0);
+
+	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
+		struct hn_txdesc *txd;
+		int error;
+
+		if (len > 0 && m_head->m_pkthdr.len > len) {
+			/*
+			 * This sending could be time consuming; let callers
+			 * dispatch this packet sending (and sending of any
+			 * following up packets) to tx taskqueue.
+			 */
+			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
+			sched = 1;
+			break;
+		}
+
+		txd = hn_txdesc_get(txr);
+		if (txd == NULL) {
+			txr->hn_no_txdescs++;
+			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
+			txr->hn_oactive = 1;
+			break;
+		}
+
+		error = hn_encap(ifp, txr, txd, &m_head);
+		if (error) {
+			/* Both txd and m_head are freed; discard */
+			KASSERT(txr->hn_agg_txd == NULL,
+			    ("encap failed w/ pending aggregating txdesc"));
+			drbr_advance(ifp, txr->hn_mbuf_br);
+			continue;
+		}
+
+		if (txr->hn_agg_pktleft == 0) {
+			if (txr->hn_agg_txd != NULL) {
+				KASSERT(m_head == NULL,
+				    ("pending mbuf for aggregating txdesc"));
+				error = hn_flush_txagg(ifp, txr);
+				if (__predict_false(error)) {
+					txr->hn_oactive = 1;
+					break;
+				}
+			} else {
+				KASSERT(m_head != NULL, ("mbuf was freed"));
+				error = hn_txpkt(ifp, txr, txd);
+				if (__predict_false(error)) {
+					/* txd is freed, but m_head is not */
+					drbr_putback(ifp, txr->hn_mbuf_br,
+					    m_head);
+					txr->hn_oactive = 1;
+					break;
+				}
+			}
+		}
+#ifdef INVARIANTS
+		else {
+			KASSERT(txr->hn_agg_txd != NULL,
+			    ("no aggregating txdesc"));
+			KASSERT(m_head == NULL,
+			    ("pending mbuf for aggregating txdesc"));
+		}
+#endif
+
+		/* Sent */
+		drbr_advance(ifp, txr->hn_mbuf_br);
+	}
+
+	/* Flush pending aggerated transmission. */
+	if (txr->hn_agg_txd != NULL)
+		hn_flush_txagg(ifp, txr);
+	return (sched);
+}
+
+static int
+hn_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	struct hn_softc *sc = ifp->if_softc;
+	struct hn_tx_ring *txr;
+	int error, idx = 0;
+
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+		struct rm_priotracker pt;
+
+		rm_rlock(&sc->hn_vf_lock, &pt);
+		if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
+			struct mbuf *m_bpf = NULL;
+			int obytes, omcast;
+
+			obytes = m->m_pkthdr.len;
+			if (m->m_flags & M_MCAST)
+				omcast = 1;
+
+			if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) {
+				if (bpf_peers_present(ifp->if_bpf)) {
+					m_bpf = m_copypacket(m, M_NOWAIT);
+					if (m_bpf == NULL) {
+						/*
+						 * Failed to grab a shallow
+						 * copy; tap now.
+						 */
+						ETHER_BPF_MTAP(ifp, m);
+					}
+				}
+			} else {
+				ETHER_BPF_MTAP(ifp, m);
+			}
+
+			error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m);
+			rm_runlock(&sc->hn_vf_lock, &pt);
+
+			if (m_bpf != NULL) {
+				if (!error)
+					ETHER_BPF_MTAP(ifp, m_bpf);
+				m_freem(m_bpf);
+			}
+
+			if (error == ENOBUFS) {
+				if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+			} else if (error) {
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+			} else {
+				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+				if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes);
+				if (omcast) {
+					if_inc_counter(ifp, IFCOUNTER_OMCASTS,
+					    omcast);
+				}
+			}
+			return (error);
+		}
+		rm_runlock(&sc->hn_vf_lock, &pt);
+	}
+
+#if defined(INET6) || defined(INET)
+	/*
+	 * Perform TSO packet header fixup or get l2/l3 header length now,
+	 * since packet headers should be cache-hot.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+		m = hn_tso_fixup(m);
+		if (__predict_false(m == NULL)) {
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+			return EIO;
+		}
+	} else if (m->m_pkthdr.csum_flags &
+	    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
+		m = hn_set_hlen(m);
+		if (__predict_false(m == NULL)) {
+			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+			return EIO;
+		}
+	}
+#endif
+
+	/*
+	 * Select the TX ring based on flowid
+	 */
+	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+#if defined(INET6) || defined(INET)
+		int tcpsyn = 0;
+
+		if (m->m_pkthdr.len < 128 &&
+		    (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) &&
+		    (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
+			m = hn_check_tcpsyn(m, &tcpsyn);
+			if (__predict_false(m == NULL)) {
+				if_inc_counter(ifp,
+				    IFCOUNTER_OERRORS, 1);
+				return (EIO);
+			}
+		}
+#else
+		const int tcpsyn = 0;
+#endif
+		if (tcpsyn)
+			idx = 0;
+		else
+			idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
+	}
+	txr = &sc->hn_tx_ring[idx];
+
+	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
+	if (error) {
+		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+		return error;
+	}
+
+	if (txr->hn_oactive)
+		return 0;
+
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		sched = hn_xmit(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (!sched)
+			return 0;
+	}
+do_sched:
+	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
+	return 0;
+}
+
+static void
+hn_tx_ring_qflush(struct hn_tx_ring *txr)
+{
+	struct mbuf *m;
+
+	mtx_lock(&txr->hn_tx_lock);
+	while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
+		m_freem(m);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_xmit_qflush(struct ifnet *ifp)
+{
+	struct hn_softc *sc = ifp->if_softc;
+	struct rm_priotracker pt;
+	int i;
+
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+		hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
+	if_qflush(ifp);
+
+	rm_rlock(&sc->hn_vf_lock, &pt);
+	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+		sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp);
+	rm_runlock(&sc->hn_vf_lock, &pt);
+}
+
+static void
+hn_xmit_txeof(struct hn_tx_ring *txr)
+{
+
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		txr->hn_oactive = 0;
+		sched = hn_xmit(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (sched) {
+			taskqueue_enqueue(txr->hn_tx_taskq,
+			    &txr->hn_tx_task);
+		}
+	} else {
+do_sched:
+		/*
+		 * Release the oactive earlier, with the hope, that
+		 * others could catch up.  The task will clear the
+		 * oactive again with the hn_tx_lock to avoid possible
+		 * races.
+		 */
+		txr->hn_oactive = 0;
+		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
+	}
+}
+
+static void
+hn_xmit_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	hn_xmit(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	txr->hn_oactive = 0;
+	hn_xmit(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static int
+hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
+{
+	struct vmbus_chan_br cbr;
+	struct hn_rx_ring *rxr;
+	struct hn_tx_ring *txr = NULL;
+	int idx, error;
+
+	idx = vmbus_chan_subidx(chan);
+
+	/*
+	 * Link this channel to RX/TX ring.
+	 */
+	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
+	    ("invalid channel index %d, should > 0 && < %d",
+	     idx, sc->hn_rx_ring_inuse));
+	rxr = &sc->hn_rx_ring[idx];
+	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
+	    ("RX ring %d already attached", idx));
+	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
+	rxr->hn_chan = chan;
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
+		    idx, vmbus_chan_id(chan));
+	}
+
+	if (idx < sc->hn_tx_ring_inuse) {
+		txr = &sc->hn_tx_ring[idx];
+		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
+		    ("TX ring %d already attached", idx));
+		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
+
+		txr->hn_chan = chan;
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
+			    idx, vmbus_chan_id(chan));
+		}
+	}
+
+	/* Bind this channel to a proper CPU. */
+	vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
+
+	/*
+	 * Open this channel
+	 */
+	cbr.cbr = rxr->hn_br;
+	cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
+	cbr.cbr_txsz = HN_TXBR_SIZE;
+	cbr.cbr_rxsz = HN_RXBR_SIZE;
+	error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
+	if (error) {
+		if (error == EISCONN) {
+			if_printf(sc->hn_ifp, "bufring is connected after "
+			    "chan%u open failure\n", vmbus_chan_id(chan));
+			rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+		} else {
+			if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
+			    vmbus_chan_id(chan), error);
+		}
+	}
+	return (error);
+}
+
+static void
+hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
+{
+	struct hn_rx_ring *rxr;
+	int idx, error;
+
+	idx = vmbus_chan_subidx(chan);
+
+	/*
+	 * Link this channel to RX/TX ring.
+	 */
+	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
+	    ("invalid channel index %d, should > 0 && < %d",
+	     idx, sc->hn_rx_ring_inuse));
+	rxr = &sc->hn_rx_ring[idx];
+	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
+	    ("RX ring %d is not attached", idx));
+	rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
+
+	if (idx < sc->hn_tx_ring_inuse) {
+		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
+
+		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
+		    ("TX ring %d is not attached attached", idx));
+		txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
+	}
+
+	/*
+	 * Close this channel.
+	 *
+	 * NOTE:
+	 * Channel closing does _not_ destroy the target channel.
+	 */
+	error = vmbus_chan_close_direct(chan);
+	if (error == EISCONN) {
+		if_printf(sc->hn_ifp, "chan%u bufring is connected "
+		    "after being closed\n", vmbus_chan_id(chan));
+		rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+	} else if (error) {
+		if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
+		    vmbus_chan_id(chan), error);
+	}
+}
+
+static int
+hn_attach_subchans(struct hn_softc *sc)
+{
+	struct vmbus_channel **subchans;
+	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
+	int i, error = 0;
+
+	KASSERT(subchan_cnt > 0, ("no sub-channels"));
+
+	/* Attach the sub-channels. */
+	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
+	for (i = 0; i < subchan_cnt; ++i) {
+		int error1;
+
+		error1 = hn_chan_attach(sc, subchans[i]);
+		if (error1) {
+			error = error1;
+			/* Move on; all channels will be detached later. */
+		}
+	}
+	vmbus_subchan_rel(subchans, subchan_cnt);
+
+	if (error) {
+		if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
+	} else {
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "%d sub-channels attached\n",
+			    subchan_cnt);
+		}
+	}
+	return (error);
+}
+
+static void
+hn_detach_allchans(struct hn_softc *sc)
+{
+	struct vmbus_channel **subchans;
+	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
+	int i;
+
+	if (subchan_cnt == 0)
+		goto back;
+
+	/* Detach the sub-channels. */
+	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
+	for (i = 0; i < subchan_cnt; ++i)
+		hn_chan_detach(sc, subchans[i]);
+	vmbus_subchan_rel(subchans, subchan_cnt);
+
+back:
+	/*
+	 * Detach the primary channel, _after_ all sub-channels
+	 * are detached.
+	 */
+	hn_chan_detach(sc, sc->hn_prichan);
+
+	/* Wait for sub-channels to be destroyed, if any. */
+	vmbus_subchan_drain(sc->hn_prichan);
+
+#ifdef INVARIANTS
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
+		    HN_RX_FLAG_ATTACHED) == 0,
+		    ("%dth RX ring is still attached", i));
+	}
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
+		    HN_TX_FLAG_ATTACHED) == 0,
+		    ("%dth TX ring is still attached", i));
+	}
+#endif
+}
+
+static int
+hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
+{
+	struct vmbus_channel **subchans;
+	int nchan, rxr_cnt, error;
+
+	nchan = *nsubch + 1;
+	if (nchan == 1) {
+		/*
+		 * Multiple RX/TX rings are not requested.
+		 */
+		*nsubch = 0;
+		return (0);
+	}
+
+	/*
+	 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
+	 * table entries.
+	 */
+	error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
+	if (error) {
+		/* No RSS; this is benign. */
+		*nsubch = 0;
+		return (0);
+	}
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
+		    rxr_cnt, nchan);
+	}
+
+	if (nchan > rxr_cnt)
+		nchan = rxr_cnt;
+	if (nchan == 1) {
+		if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
+		*nsubch = 0;
+		return (0);
+	}
+
+	/*
+	 * Allocate sub-channels from NVS.
+	 */
+	*nsubch = nchan - 1;
+	error = hn_nvs_alloc_subchans(sc, nsubch);
+	if (error || *nsubch == 0) {
+		/* Failed to allocate sub-channels. */
+		*nsubch = 0;
+		return (0);
+	}
+
+	/*
+	 * Wait for all sub-channels to become ready before moving on.
+	 */
+	subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
+	vmbus_subchan_rel(subchans, *nsubch);
+	return (0);
+}
+
+static bool
+hn_synth_attachable(const struct hn_softc *sc)
+{
+	int i;
+
+	if (sc->hn_flags & HN_FLAG_ERRORS)
+		return (false);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
+			return (false);
+	}
+	return (true);
+}
+
+/*
+ * Make sure that the RX filter is zero after the successful
+ * RNDIS initialization.
+ *
+ * NOTE:
+ * Under certain conditions on certain versions of Hyper-V,
+ * the RNDIS rxfilter is _not_ zero on the hypervisor side
+ * after the successful RNDIS initialization, which breaks
+ * the assumption of any following code (well, it breaks the
+ * RNDIS API contract actually).  Clear the RNDIS rxfilter
+ * explicitly, drain packets sneaking through, and drain the
+ * interrupt taskqueues scheduled due to the stealth packets.
+ */
+static void
+hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
+{
+
+	hn_disable_rx(sc);
+	hn_drain_rxtx(sc, nchan);
+}
+
+static int
+hn_synth_attach(struct hn_softc *sc, int mtu)
+{
+#define ATTACHED_NVS		0x0002
+#define ATTACHED_RNDIS		0x0004
+
+	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
+	int error, nsubch, nchan = 1, i, rndis_inited;
+	uint32_t old_caps, attached = 0;
+
+	KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
+	    ("synthetic parts were attached"));
+
+	if (!hn_synth_attachable(sc))
+		return (ENXIO);
+
+	/* Save capabilities for later verification. */
+	old_caps = sc->hn_caps;
+	sc->hn_caps = 0;
+
+	/* Clear RSS stuffs. */
+	sc->hn_rss_ind_size = 0;
+	sc->hn_rss_hash = 0;
+	sc->hn_rss_hcap = 0;
+
+	/*
+	 * Attach the primary channel _before_ attaching NVS and RNDIS.
+	 */
+	error = hn_chan_attach(sc, sc->hn_prichan);
+	if (error)
+		goto failed;
+
+	/*
+	 * Attach NVS.
+	 */
+	error = hn_nvs_attach(sc, mtu);
+	if (error)
+		goto failed;
+	attached |= ATTACHED_NVS;
+
+	/*
+	 * Attach RNDIS _after_ NVS is attached.
+	 */
+	error = hn_rndis_attach(sc, mtu, &rndis_inited);
+	if (rndis_inited)
+		attached |= ATTACHED_RNDIS;
+	if (error)
+		goto failed;
+
+	/*
+	 * Make sure capabilities are not changed.
+	 */
+	if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
+		if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
+		    old_caps, sc->hn_caps);
+		error = ENXIO;
+		goto failed;
+	}
+
+	/*
+	 * Allocate sub-channels for multi-TX/RX rings.
+	 *
+	 * NOTE:
+	 * The # of RX rings that can be used is equivalent to the # of
+	 * channels to be requested.
+	 */
+	nsubch = sc->hn_rx_ring_cnt - 1;
+	error = hn_synth_alloc_subchans(sc, &nsubch);
+	if (error)
+		goto failed;
+	/* NOTE: _Full_ synthetic parts detach is required now. */
+	sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
+
+	/*
+	 * Set the # of TX/RX rings that could be used according to
+	 * the # of channels that NVS offered.
+	 */
+	nchan = nsubch + 1;
+	hn_set_ring_inuse(sc, nchan);
+	if (nchan == 1) {
+		/* Only the primary channel can be used; done */
+		goto back;
+	}
+
+	/*
+	 * Attach the sub-channels.
+	 *
+	 * NOTE: hn_set_ring_inuse() _must_ have been called.
+	 */
+	error = hn_attach_subchans(sc);
+	if (error)
+		goto failed;
+
+	/*
+	 * Configure RSS key and indirect table _after_ all sub-channels
+	 * are attached.
+	 */
+	if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
+		/*
+		 * RSS key is not set yet; set it to the default RSS key.
+		 */
+		if (bootverbose)
+			if_printf(sc->hn_ifp, "setup default RSS key\n");
+		memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
+		sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
+	}
+
+	if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
+		/*
+		 * RSS indirect table is not set yet; set it up in round-
+		 * robin fashion.
+		 */
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "setup default RSS indirect "
+			    "table\n");
+		}
+		for (i = 0; i < NDIS_HASH_INDCNT; ++i)
+			rss->rss_ind[i] = i % nchan;
+		sc->hn_flags |= HN_FLAG_HAS_RSSIND;
+	} else {
+		/*
+		 * # of usable channels may be changed, so we have to
+		 * make sure that all entries in RSS indirect table
+		 * are valid.
+		 *
+		 * NOTE: hn_set_ring_inuse() _must_ have been called.
+		 */
+		hn_rss_ind_fixup(sc);
+	}
+
+	sc->hn_rss_hash = sc->hn_rss_hcap;
+	if ((sc->hn_flags & HN_FLAG_RXVF) ||
+	    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
+		/* NOTE: Don't reconfigure RSS; will do immediately. */
+		hn_vf_rss_fixup(sc, false);
+	}
+	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
+	if (error)
+		goto failed;
+back:
+	/*
+	 * Fixup transmission aggregation setup.
+	 */
+	hn_set_txagg(sc);
+	hn_rndis_init_fixat(sc, nchan);
+	return (0);
+
+failed:
+	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
+		hn_rndis_init_fixat(sc, nchan);
+		hn_synth_detach(sc);
+	} else {
+		if (attached & ATTACHED_RNDIS) {
+			hn_rndis_init_fixat(sc, nchan);
+			hn_rndis_detach(sc);
+		}
+		if (attached & ATTACHED_NVS)
+			hn_nvs_detach(sc);
+		hn_chan_detach(sc, sc->hn_prichan);
+		/* Restore old capabilities. */
+		sc->hn_caps = old_caps;
+	}
+	return (error);
+
+#undef ATTACHED_RNDIS
+#undef ATTACHED_NVS
+}
+
+/*
+ * NOTE:
+ * The interface must have been suspended though hn_suspend(), before
+ * this function get called.
+ */
+static void
+hn_synth_detach(struct hn_softc *sc)
+{
+
+	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
+	    ("synthetic parts were not attached"));
+
+	/* Detach the RNDIS first. */
+	hn_rndis_detach(sc);
+
+	/* Detach NVS. */
+	hn_nvs_detach(sc);
+
+	/* Detach all of the channels. */
+	hn_detach_allchans(sc);
+
+	sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
+}
+
+static void
+hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
+{
+	KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
+	    ("invalid ring count %d", ring_cnt));
+
+	if (sc->hn_tx_ring_cnt > ring_cnt)
+		sc->hn_tx_ring_inuse = ring_cnt;
+	else
+		sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
+	sc->hn_rx_ring_inuse = ring_cnt;
+
+	if (bootverbose) {
+		if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
+		    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
+	}
+}
+
+static void
+hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
+{
+
+	/*
+	 * NOTE:
+	 * The TX bufring will not be drained by the hypervisor,
+	 * if the primary channel is revoked.
+	 */
+	while (!vmbus_chan_rx_empty(chan) ||
+	    (!vmbus_chan_is_revoked(sc->hn_prichan) &&
+	     !vmbus_chan_tx_empty(chan)))
+		pause("waitch", 1);
+	vmbus_chan_intr_drain(chan);
+}
+
+static void
+hn_disable_rx(struct hn_softc *sc)
+{
+
+	/*
+	 * Disable RX by clearing RX filter forcefully.
+	 */
+	sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
+	hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
+
+	/*
+	 * Give RNDIS enough time to flush all pending data packets.
+	 */
+	pause("waitrx", (200 * hz) / 1000);
+}
+
+/*
+ * NOTE:
+ * RX/TX _must_ have been suspended/disabled, before this function
+ * is called.
+ */
+static void
+hn_drain_rxtx(struct hn_softc *sc, int nchan)
+{
+	struct vmbus_channel **subch = NULL;
+	int nsubch;
+
+	/*
+	 * Drain RX/TX bufrings and interrupts.
+	 */
+	nsubch = nchan - 1;
+	if (nsubch > 0)
+		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
+
+	if (subch != NULL) {
+		int i;
+
+		for (i = 0; i < nsubch; ++i)
+			hn_chan_drain(sc, subch[i]);
+	}
+	hn_chan_drain(sc, sc->hn_prichan);
+
+	if (subch != NULL)
+		vmbus_subchan_rel(subch, nsubch);
+}
+
+static void
+hn_suspend_data(struct hn_softc *sc)
+{
+	struct hn_tx_ring *txr;
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/*
+	 * Suspend TX.
+	 */
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+		txr = &sc->hn_tx_ring[i];
+
+		mtx_lock(&txr->hn_tx_lock);
+		txr->hn_suspended = 1;
+		mtx_unlock(&txr->hn_tx_lock);
+		/* No one is able send more packets now. */
+
+		/*
+		 * Wait for all pending sends to finish.
+		 *
+		 * NOTE:
+		 * We will _not_ receive all pending send-done, if the
+		 * primary channel is revoked.
+		 */
+		while (hn_tx_ring_pending(txr) &&
+		    !vmbus_chan_is_revoked(sc->hn_prichan))
+			pause("hnwtx", 1 /* 1 tick */);
+	}
+
+	/*
+	 * Disable RX.
+	 */
+	hn_disable_rx(sc);
+
+	/*
+	 * Drain RX/TX.
+	 */
+	hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
+
+	/*
+	 * Drain any pending TX tasks.
+	 *
+	 * NOTE:
+	 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX
+	 * tasks will have to be drained _after_ the above hn_drain_rxtx().
+	 */
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+		txr = &sc->hn_tx_ring[i];
+
+		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
+		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
+	}
+}
+
+static void
+hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
+{
+
+	((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
+}
+
+static void
+hn_suspend_mgmt(struct hn_softc *sc)
+{
+	struct task task;
+
+	HN_LOCK_ASSERT(sc);
+
+	/*
+	 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
+	 * through hn_mgmt_taskq.
+	 */
+	TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
+	vmbus_chan_run_task(sc->hn_prichan, &task);
+
+	/*
+	 * Make sure that all pending management tasks are completed.
+	 */
+	taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
+	taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
+	taskqueue_drain_all(sc->hn_mgmt_taskq0);
+}
+
+static void
+hn_suspend(struct hn_softc *sc)
+{
+
+	/* Disable polling. */
+	hn_polling(sc, 0);
+
+	/*
+	 * If the non-transparent mode VF is activated, the synthetic
+	 * device is receiving packets, so the data path of the
+	 * synthetic device must be suspended.
+	 */
+	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+	    (sc->hn_flags & HN_FLAG_RXVF))
+		hn_suspend_data(sc);
+	hn_suspend_mgmt(sc);
+}
+
+static void
+hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
+{
+	int i;
+
+	KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
+	    ("invalid TX ring count %d", tx_ring_cnt));
+
+	for (i = 0; i < tx_ring_cnt; ++i) {
+		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+		mtx_lock(&txr->hn_tx_lock);
+		txr->hn_suspended = 0;
+		mtx_unlock(&txr->hn_tx_lock);
+	}
+}
+
+static void
+hn_resume_data(struct hn_softc *sc)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/*
+	 * Re-enable RX.
+	 */
+	hn_rxfilter_config(sc);
+
+	/*
+	 * Make sure to clear suspend status on "all" TX rings,
+	 * since hn_tx_ring_inuse can be changed after
+	 * hn_suspend_data().
+	 */
+	hn_resume_tx(sc, sc->hn_tx_ring_cnt);
+
+#ifdef HN_IFSTART_SUPPORT
+	if (!hn_use_if_start)
+#endif
+	{
+		/*
+		 * Flush unused drbrs, since hn_tx_ring_inuse may be
+		 * reduced.
+		 */
+		for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
+			hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
+	}
+
+	/*
+	 * Kick start TX.
+	 */
+	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+		/*
+		 * Use txeof task, so that any pending oactive can be
+		 * cleared properly.
+		 */
+		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
+	}
+}
+
+static void
+hn_resume_mgmt(struct hn_softc *sc)
+{
+
+	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
+
+	/*
+	 * Kick off network change detection, if it was pending.
+	 * If no network change was pending, start link status
+	 * checks, which is more lightweight than network change
+	 * detection.
+	 */
+	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
+		hn_change_network(sc);
+	else
+		hn_update_link_status(sc);
+}
+
+static void
+hn_resume(struct hn_softc *sc)
+{
+
+	/*
+	 * If the non-transparent mode VF is activated, the synthetic
+	 * device have to receive packets, so the data path of the
+	 * synthetic device must be resumed.
+	 */
+	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+	    (sc->hn_flags & HN_FLAG_RXVF))
+		hn_resume_data(sc);
+
+	/*
+	 * Don't resume link status change if VF is attached/activated.
+	 * - In the non-transparent VF mode, the synthetic device marks
+	 *   link down until the VF is deactivated; i.e. VF is down.
+	 * - In transparent VF mode, VF's media status is used until
+	 *   the VF is detached.
+	 */
+	if ((sc->hn_flags & HN_FLAG_RXVF) == 0 &&
+	    !(hn_xpnt_vf && sc->hn_vf_ifp != NULL))
+		hn_resume_mgmt(sc);
+
+	/*
+	 * Re-enable polling if this interface is running and
+	 * the polling is requested.
+	 */
+	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
+		hn_polling(sc, sc->hn_pollhz);
+}
+
+static void 
+hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
+{
+	const struct rndis_status_msg *msg;
+	int ofs;
+
+	if (dlen < sizeof(*msg)) {
+		if_printf(sc->hn_ifp, "invalid RNDIS status\n");
+		return;
+	}
+	msg = data;
+
+	switch (msg->rm_status) {
+	case RNDIS_STATUS_MEDIA_CONNECT:
+	case RNDIS_STATUS_MEDIA_DISCONNECT:
+		hn_update_link_status(sc);
+		break;
+
+	case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
+	case RNDIS_STATUS_LINK_SPEED_CHANGE:
+		/* Not really useful; ignore. */
+		break;
+
+	case RNDIS_STATUS_NETWORK_CHANGE:
+		ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
+		if (dlen < ofs + msg->rm_stbuflen ||
+		    msg->rm_stbuflen < sizeof(uint32_t)) {
+			if_printf(sc->hn_ifp, "network changed\n");
+		} else {
+			uint32_t change;
+
+			memcpy(&change, ((const uint8_t *)msg) + ofs,
+			    sizeof(change));
+			if_printf(sc->hn_ifp, "network changed, change %u\n",
+			    change);
+		}
+		hn_change_network(sc);
+		break;
+
+	default:
+		if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
+		    msg->rm_status);
+		break;
+	}
+}
+
+static int
+hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
+{
+	const struct rndis_pktinfo *pi = info_data;
+	uint32_t mask = 0;
+
+	while (info_dlen != 0) {
+		const void *data;
+		uint32_t dlen;
+
+		if (__predict_false(info_dlen < sizeof(*pi)))
+			return (EINVAL);
+		if (__predict_false(info_dlen < pi->rm_size))
+			return (EINVAL);
+		info_dlen -= pi->rm_size;
+
+		if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
+			return (EINVAL);
+		if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
+			return (EINVAL);
+		dlen = pi->rm_size - pi->rm_pktinfooffset;
+		data = pi->rm_data;
+
+		switch (pi->rm_type) {
+		case NDIS_PKTINFO_TYPE_VLAN:
+			if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
+				return (EINVAL);
+			info->vlan_info = *((const uint32_t *)data);
+			mask |= HN_RXINFO_VLAN;
+			break;
+
+		case NDIS_PKTINFO_TYPE_CSUM:
+			if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
+				return (EINVAL);
+			info->csum_info = *((const uint32_t *)data);
+			mask |= HN_RXINFO_CSUM;
+			break;
+
+		case HN_NDIS_PKTINFO_TYPE_HASHVAL:
+			if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
+				return (EINVAL);
+			info->hash_value = *((const uint32_t *)data);
+			mask |= HN_RXINFO_HASHVAL;
+			break;
+
+		case HN_NDIS_PKTINFO_TYPE_HASHINF:
+			if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
+				return (EINVAL);
+			info->hash_info = *((const uint32_t *)data);
+			mask |= HN_RXINFO_HASHINF;
+			break;
+
+		default:
+			goto next;
+		}
+
+		if (mask == HN_RXINFO_ALL) {
+			/* All found; done */
+			break;
+		}
+next:
+		pi = (const struct rndis_pktinfo *)
+		    ((const uint8_t *)pi + pi->rm_size);
+	}
+
+	/*
+	 * Final fixup.
+	 * - If there is no hash value, invalidate the hash info.
+	 */
+	if ((mask & HN_RXINFO_HASHVAL) == 0)
+		info->hash_info = HN_NDIS_HASH_INFO_INVALID;
+	return (0);
+}
+
+static __inline bool
+hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
+{
+
+	if (off < check_off) {
+		if (__predict_true(off + len <= check_off))
+			return (false);
+	} else if (off > check_off) {
+		if (__predict_true(check_off + check_len <= off))
+			return (false);
+	}
+	return (true);
+}
+
+static void
+hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
+{
+	const struct rndis_packet_msg *pkt;
+	struct hn_rxinfo info;
+	int data_off, pktinfo_off, data_len, pktinfo_len;
+
+	/*
+	 * Check length.
+	 */
+	if (__predict_false(dlen < sizeof(*pkt))) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
+		return;
+	}
+	pkt = data;
+
+	if (__predict_false(dlen < pkt->rm_len)) {
+		if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
+		    "dlen %d, msglen %u\n", dlen, pkt->rm_len);
+		return;
+	}
+	if (__predict_false(pkt->rm_len <
+	    pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
+		    "msglen %u, data %u, oob %u, pktinfo %u\n",
+		    pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
+		    pkt->rm_pktinfolen);
+		return;
+	}
+	if (__predict_false(pkt->rm_datalen == 0)) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
+		return;
+	}
+
+	/*
+	 * Check offests.
+	 */
+#define IS_OFFSET_INVALID(ofs)			\
+	((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN ||	\
+	 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
+
+	/* XXX Hyper-V does not meet data offset alignment requirement */
+	if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+		    "data offset %u\n", pkt->rm_dataoffset);
+		return;
+	}
+	if (__predict_false(pkt->rm_oobdataoffset > 0 &&
+	    IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+		    "oob offset %u\n", pkt->rm_oobdataoffset);
+		return;
+	}
+	if (__predict_true(pkt->rm_pktinfooffset > 0) &&
+	    __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+		    "pktinfo offset %u\n", pkt->rm_pktinfooffset);
+		return;
+	}
+
+#undef IS_OFFSET_INVALID
+
+	data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
+	data_len = pkt->rm_datalen;
+	pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
+	pktinfo_len = pkt->rm_pktinfolen;
+
+	/*
+	 * Check OOB coverage.
+	 */
+	if (__predict_false(pkt->rm_oobdatalen != 0)) {
+		int oob_off, oob_len;
+
+		if_printf(rxr->hn_ifp, "got oobdata\n");
+		oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
+		oob_len = pkt->rm_oobdatalen;
+
+		if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+			    "oob overflow, msglen %u, oob abs %d len %d\n",
+			    pkt->rm_len, oob_off, oob_len);
+			return;
+		}
+
+		/*
+		 * Check against data.
+		 */
+		if (hn_rndis_check_overlap(oob_off, oob_len,
+		    data_off, data_len)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+			    "oob overlaps data, oob abs %d len %d, "
+			    "data abs %d len %d\n",
+			    oob_off, oob_len, data_off, data_len);
+			return;
+		}
+
+		/*
+		 * Check against pktinfo.
+		 */
+		if (pktinfo_len != 0 &&
+		    hn_rndis_check_overlap(oob_off, oob_len,
+		    pktinfo_off, pktinfo_len)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+			    "oob overlaps pktinfo, oob abs %d len %d, "
+			    "pktinfo abs %d len %d\n",
+			    oob_off, oob_len, pktinfo_off, pktinfo_len);
+			return;
+		}
+	}
+
+	/*
+	 * Check per-packet-info coverage and find useful per-packet-info.
+	 */
+	info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
+	info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
+	info.hash_info = HN_NDIS_HASH_INFO_INVALID;
+	if (__predict_true(pktinfo_len != 0)) {
+		bool overlap;
+		int error;
+
+		if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+			    "pktinfo overflow, msglen %u, "
+			    "pktinfo abs %d len %d\n",
+			    pkt->rm_len, pktinfo_off, pktinfo_len);
+			return;
+		}
+
+		/*
+		 * Check packet info coverage.
+		 */
+		overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
+		    data_off, data_len);
+		if (__predict_false(overlap)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+			    "pktinfo overlap data, pktinfo abs %d len %d, "
+			    "data abs %d len %d\n",
+			    pktinfo_off, pktinfo_len, data_off, data_len);
+			return;
+		}
+
+		/*
+		 * Find useful per-packet-info.
+		 */
+		error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
+		    pktinfo_len, &info);
+		if (__predict_false(error)) {
+			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
+			    "pktinfo\n");
+			return;
+		}
+	}
+
+	if (__predict_false(data_off + data_len > pkt->rm_len)) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
+		    "data overflow, msglen %u, data abs %d len %d\n",
+		    pkt->rm_len, data_off, data_len);
+		return;
+	}
+	hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
+}
+
+static __inline void
+hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
+{
+	const struct rndis_msghdr *hdr;
+
+	if (__predict_false(dlen < sizeof(*hdr))) {
+		if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
+		return;
+	}
+	hdr = data;
+
+	if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
+		/* Hot data path. */
+		hn_rndis_rx_data(rxr, data, dlen);
+		/* Done! */
+		return;
+	}
+
+	if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
+		hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
+	else
+		hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
+}
+
+static void
+hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
+{
+	const struct hn_nvs_hdr *hdr;
+
+	if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
+		if_printf(sc->hn_ifp, "invalid nvs notify\n");
+		return;
+	}
+	hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
+
+	if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
+		/* Useless; ignore */
+		return;
+	}
+	if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
+}
+
+static void
+hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
+    const struct vmbus_chanpkt_hdr *pkt)
+{
+	struct hn_nvs_sendctx *sndc;
+
+	sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
+	sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
+	    VMBUS_CHANPKT_DATALEN(pkt));
+	/*
+	 * NOTE:
+	 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
+	 * its callback.
+	 */
+}
+
+static void
+hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
+    const struct vmbus_chanpkt_hdr *pkthdr)
+{
+	const struct vmbus_chanpkt_rxbuf *pkt;
+	const struct hn_nvs_hdr *nvs_hdr;
+	int count, i, hlen;
+
+	if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
+		if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
+		return;
+	}
+	nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
+
+	/* Make sure that this is a RNDIS message. */
+	if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
+		if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
+		    nvs_hdr->nvs_type);
+		return;
+	}
+
+	hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
+	if (__predict_false(hlen < sizeof(*pkt))) {
+		if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
+		return;
+	}
+	pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
+
+	if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
+		if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
+		    pkt->cp_rxbuf_id);
+		return;
+	}
+
+	count = pkt->cp_rxbuf_cnt;
+	if (__predict_false(hlen <
+	    __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
+		if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
+		return;
+	}
+
+	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
+	for (i = 0; i < count; ++i) {
+		int ofs, len;
+
+		ofs = pkt->cp_rxbuf[i].rb_ofs;
+		len = pkt->cp_rxbuf[i].rb_len;
+		if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
+			if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
+			    "ofs %d, len %d\n", i, ofs, len);
+			continue;
+		}
+		hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
+	}
+
+	/*
+	 * Ack the consumed RXBUF associated w/ this channel packet,
+	 * so that this RXBUF can be recycled by the hypervisor.
+	 */
+	hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
+}
+
+static void
+hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
+    uint64_t tid)
+{
+	struct hn_nvs_rndis_ack ack;
+	int retries, error;
+	
+	ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
+	ack.nvs_status = HN_NVS_STATUS_OK;
+
+	retries = 0;
+again:
+	error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
+	    VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
+	if (__predict_false(error == EAGAIN)) {
+		/*
+		 * NOTE:
+		 * This should _not_ happen in real world, since the
+		 * consumption of the TX bufring from the TX path is
+		 * controlled.
+		 */
+		if (rxr->hn_ack_failed == 0)
+			if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
+		rxr->hn_ack_failed++;
+		retries++;
+		if (retries < 10) {
+			DELAY(100);
+			goto again;
+		}
+		/* RXBUF leaks! */
+		if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
+	}
+}
+
+static void
+hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
+{
+	struct hn_rx_ring *rxr = xrxr;
+	struct hn_softc *sc = rxr->hn_ifp->if_softc;
+
+	for (;;) {
+		struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
+		int error, pktlen;
+
+		pktlen = rxr->hn_pktbuf_len;
+		error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
+		if (__predict_false(error == ENOBUFS)) {
+			void *nbuf;
+			int nlen;
+
+			/*
+			 * Expand channel packet buffer.
+			 *
+			 * XXX
+			 * Use M_WAITOK here, since allocation failure
+			 * is fatal.
+			 */
+			nlen = rxr->hn_pktbuf_len * 2;
+			while (nlen < pktlen)
+				nlen *= 2;
+			nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
+
+			if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
+			    rxr->hn_pktbuf_len, nlen);
+
+			free(rxr->hn_pktbuf, M_DEVBUF);
+			rxr->hn_pktbuf = nbuf;
+			rxr->hn_pktbuf_len = nlen;
+			/* Retry! */
+			continue;
+		} else if (__predict_false(error == EAGAIN)) {
+			/* No more channel packets; done! */
+			break;
+		}
+		KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
+
+		switch (pkt->cph_type) {
+		case VMBUS_CHANPKT_TYPE_COMP:
+			hn_nvs_handle_comp(sc, chan, pkt);
+			break;
+
+		case VMBUS_CHANPKT_TYPE_RXBUF:
+			hn_nvs_handle_rxbuf(rxr, chan, pkt);
+			break;
+
+		case VMBUS_CHANPKT_TYPE_INBAND:
+			hn_nvs_handle_notify(sc, pkt);
+			break;
+
+		default:
+			if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
+			    pkt->cph_type);
+			break;
+		}
+	}
+	hn_chan_rollup(rxr, rxr->hn_txr);
+}
+
+static void
+hn_sysinit(void *arg __unused)
+{
+	int i;
+
+	hn_udpcs_fixup = counter_u64_alloc(M_WAITOK);
+
+#ifdef HN_IFSTART_SUPPORT
+	/*
+	 * Don't use ifnet.if_start if transparent VF mode is requested;
+	 * mainly due to the IFF_DRV_OACTIVE flag.
+	 */
+	if (hn_xpnt_vf && hn_use_if_start) {
+		hn_use_if_start = 0;
+		printf("hn: tranparent VF mode, if_transmit will be used, "
+		    "instead of if_start\n");
+	}
+#endif
+	if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) {
+		printf("hn: invalid transparent VF attach routing "
+		    "wait timeout %d, reset to %d\n",
+		    hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN);
+		hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
+	}
+
+	/*
+	 * Initialize VF map.
+	 */
+	rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE);
+	hn_vfmap_size = HN_VFMAP_SIZE_DEF;
+	hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+
+	/*
+	 * Fix the # of TX taskqueues.
+	 */
+	if (hn_tx_taskq_cnt <= 0)
+		hn_tx_taskq_cnt = 1;
+	else if (hn_tx_taskq_cnt > mp_ncpus)
+		hn_tx_taskq_cnt = mp_ncpus;
+
+	/*
+	 * Fix the TX taskqueue mode.
+	 */
+	switch (hn_tx_taskq_mode) {
+	case HN_TX_TASKQ_M_INDEP:
+	case HN_TX_TASKQ_M_GLOBAL:
+	case HN_TX_TASKQ_M_EVTTQ:
+		break;
+	default:
+		hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
+		break;
+	}
+
+	if (vm_guest != VM_GUEST_HV)
+		return;
+
+	if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
+		return;
+
+	hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
+	    M_DEVBUF, M_WAITOK);
+	for (i = 0; i < hn_tx_taskq_cnt; ++i) {
+		hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
+		    taskqueue_thread_enqueue, &hn_tx_taskque[i]);
+		taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
+		    "hn tx%d", i);
+	}
+}
+SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL);
+
+static void
+hn_sysuninit(void *arg __unused)
+{
+
+	if (hn_tx_taskque != NULL) {
+		int i;
+
+		for (i = 0; i < hn_tx_taskq_cnt; ++i)
+			taskqueue_free(hn_tx_taskque[i]);
+		free(hn_tx_taskque, M_DEVBUF);
+	}
+
+	if (hn_vfmap != NULL)
+		free(hn_vfmap, M_DEVBUF);
+	rm_destroy(&hn_vfmap_lock);
+
+	counter_u64_free(hn_udpcs_fixup);
+}
+SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL);


Property changes on: trunk/sys/dev/hyperv/netvsc/if_hn.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/if_hnreg.h
===================================================================
--- trunk/sys/dev/hyperv/netvsc/if_hnreg.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/if_hnreg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,257 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/netvsc/if_hnreg.h 324461 2017-10-10 02:22:34Z sephe $
+ */
+
+#ifndef _IF_HNREG_H_
+#define _IF_HNREG_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+/*
+ * NDIS protocol version numbers
+ */
+#define HN_NDIS_VERSION_6_1		0x00060001
+#define HN_NDIS_VERSION_6_20		0x00060014
+#define HN_NDIS_VERSION_6_30		0x0006001e
+#define HN_NDIS_VERSION_MAJOR(ver)	(((ver) & 0xffff0000) >> 16)
+#define HN_NDIS_VERSION_MINOR(ver)	((ver) & 0xffff)
+
+/*
+ * NVS versions.
+ */
+#define HN_NVS_VERSION_1		0x00002
+#define HN_NVS_VERSION_2		0x30002
+#define HN_NVS_VERSION_4		0x40000
+#define HN_NVS_VERSION_5		0x50000
+
+#define HN_NVS_RXBUF_SIG		0xcafe
+#define HN_NVS_CHIM_SIG			0xface
+
+#define HN_NVS_CHIM_IDX_INVALID		0xffffffff
+
+#define HN_NVS_RNDIS_MTYPE_DATA		0
+#define HN_NVS_RNDIS_MTYPE_CTRL		1
+
+/*
+ * NVS message transacion status codes.
+ */
+#define HN_NVS_STATUS_OK		1
+#define HN_NVS_STATUS_FAILED		2
+
+/*
+ * NVS request/response message types.
+ */
+#define HN_NVS_TYPE_INIT		1
+#define HN_NVS_TYPE_INIT_RESP		2
+#define HN_NVS_TYPE_NDIS_INIT		100
+#define HN_NVS_TYPE_RXBUF_CONN		101
+#define HN_NVS_TYPE_RXBUF_CONNRESP	102
+#define HN_NVS_TYPE_RXBUF_DISCONN	103
+#define HN_NVS_TYPE_CHIM_CONN		104
+#define HN_NVS_TYPE_CHIM_CONNRESP	105
+#define HN_NVS_TYPE_CHIM_DISCONN	106
+#define HN_NVS_TYPE_RNDIS		107
+#define HN_NVS_TYPE_RNDIS_ACK		108
+#define HN_NVS_TYPE_NDIS_CONF		125
+#define HN_NVS_TYPE_VFASSOC_NOTE	128	/* notification */
+#define HN_NVS_TYPE_SET_DATAPATH	129
+#define HN_NVS_TYPE_SUBCH_REQ		133
+#define HN_NVS_TYPE_SUBCH_RESP		133	/* same as SUBCH_REQ */
+#define HN_NVS_TYPE_TXTBL_NOTE		134	/* notification */
+
+/*
+ * Any size less than this one will _not_ work, e.g. hn_nvs_init
+ * only has 12B valid data, however, if only 12B data were sent,
+ * Hypervisor would never reply.
+ */
+#define HN_NVS_REQSIZE_MIN		32
+
+/* NVS message common header */
+struct hn_nvs_hdr {
+	uint32_t	nvs_type;
+} __packed;
+
+struct hn_nvs_init {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_INIT */
+	uint32_t	nvs_ver_min;
+	uint32_t	nvs_ver_max;
+	uint8_t		nvs_rsvd[20];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_init_resp {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_INIT_RESP */
+	uint32_t	nvs_ver;	/* deprecated */
+	uint32_t	nvs_rsvd;
+	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
+} __packed;
+
+/* No reponse */
+struct hn_nvs_ndis_conf {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_NDIS_CONF */
+	uint32_t	nvs_mtu;
+	uint32_t	nvs_rsvd;
+	uint64_t	nvs_caps;	/* HN_NVS_NDIS_CONF_ */
+	uint8_t		nvs_rsvd1[12];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN);
+
+#define HN_NVS_NDIS_CONF_SRIOV		0x0004
+#define HN_NVS_NDIS_CONF_VLAN		0x0008
+
+/* No response */
+struct hn_nvs_ndis_init {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_NDIS_INIT */
+	uint32_t	nvs_ndis_major;	/* NDIS_VERSION_MAJOR_ */
+	uint32_t	nvs_ndis_minor;	/* NDIS_VERSION_MINOR_ */
+	uint8_t		nvs_rsvd[20];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN);
+
+#define HN_NVS_DATAPATH_SYNTH		0
+#define HN_NVS_DATAPATH_VF		1
+
+/* No response */
+struct hn_nvs_datapath {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_SET_DATAPATH */
+	uint32_t	nvs_active_path;/* HN_NVS_DATAPATH_* */
+	uint32_t	nvs_rsvd[6];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_datapath) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_rxbuf_conn {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_RXBUF_CONN */
+	uint32_t	nvs_gpadl;	/* RXBUF vmbus GPADL */
+	uint16_t	nvs_sig;	/* HN_NVS_RXBUF_SIG */
+	uint8_t		nvs_rsvd[22];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_rxbuf_sect {
+	uint32_t	nvs_start;
+	uint32_t	nvs_slotsz;
+	uint32_t	nvs_slotcnt;
+	uint32_t	nvs_end;
+} __packed;
+
+struct hn_nvs_rxbuf_connresp {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_RXBUF_CONNRESP */
+	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
+	uint32_t	nvs_nsect;	/* # of elem in nvs_sect */
+	struct hn_nvs_rxbuf_sect nvs_sect[];
+} __packed;
+
+/* No response */
+struct hn_nvs_rxbuf_disconn {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_RXBUF_DISCONN */
+	uint16_t	nvs_sig;	/* HN_NVS_RXBUF_SIG */
+	uint8_t		nvs_rsvd[26];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_chim_conn {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_CHIM_CONN */
+	uint32_t	nvs_gpadl;	/* chimney buf vmbus GPADL */
+	uint16_t	nvs_sig;	/* NDIS_NVS_CHIM_SIG */
+	uint8_t		nvs_rsvd[22];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_chim_connresp {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_CHIM_CONNRESP */
+	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
+	uint32_t	nvs_sectsz;	/* section size */
+} __packed;
+
+/* No response */
+struct hn_nvs_chim_disconn {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_CHIM_DISCONN */
+	uint16_t	nvs_sig;	/* HN_NVS_CHIM_SIG */
+	uint8_t		nvs_rsvd[26];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_chim_disconn) >= HN_NVS_REQSIZE_MIN);
+
+#define HN_NVS_SUBCH_OP_ALLOC		1
+
+struct hn_nvs_subch_req {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_SUBCH_REQ */
+	uint32_t	nvs_op;		/* HN_NVS_SUBCH_OP_ */
+	uint32_t	nvs_nsubch;
+	uint8_t		nvs_rsvd[20];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_subch_req) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_subch_resp {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_SUBCH_RESP */
+	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
+	uint32_t	nvs_nsubch;
+} __packed;
+
+struct hn_nvs_rndis {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_RNDIS */
+	uint32_t	nvs_rndis_mtype;/* HN_NVS_RNDIS_MTYPE_ */
+	/*
+	 * Chimney sending buffer index and size.
+	 *
+	 * NOTE:
+	 * If nvs_chim_idx is set to HN_NVS_CHIM_IDX_INVALID
+	 * and nvs_chim_sz is set to 0, then chimney sending
+	 * buffer is _not_ used by this RNDIS message.
+	 */
+	uint32_t	nvs_chim_idx;
+	uint32_t	nvs_chim_sz;
+	uint8_t		nvs_rsvd[16];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_rndis) >= HN_NVS_REQSIZE_MIN);
+
+struct hn_nvs_rndis_ack {
+	uint32_t	nvs_type;	/* HN_NVS_TYPE_RNDIS_ACK */
+	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
+	uint8_t		nvs_rsvd[24];
+} __packed;
+CTASSERT(sizeof(struct hn_nvs_rndis_ack) >= HN_NVS_REQSIZE_MIN);
+
+/*
+ * RNDIS extension
+ */
+
+/* Per-packet hash info */
+#define HN_NDIS_HASH_INFO_SIZE		sizeof(uint32_t)
+#define HN_NDIS_PKTINFO_TYPE_HASHINF	NDIS_PKTINFO_TYPE_ORIG_NBLIST
+/* NDIS_HASH_ */
+
+/* Per-packet hash value */
+#define HN_NDIS_HASH_VALUE_SIZE		sizeof(uint32_t)
+#define HN_NDIS_PKTINFO_TYPE_HASHVAL	NDIS_PKTINFO_TYPE_PKT_CANCELID
+
+/* Per-packet-info size */
+#define HN_RNDIS_PKTINFO_SIZE(dlen)	\
+	__offsetof(struct rndis_pktinfo, rm_data[dlen])
+
+#endif	/* !_IF_HNREG_H_ */


Property changes on: trunk/sys/dev/hyperv/netvsc/if_hnreg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/if_hnvar.h
===================================================================
--- trunk/sys/dev/hyperv/netvsc/if_hnvar.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/if_hnvar.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,320 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/netvsc/if_hnvar.h 324574 2017-10-13 02:26:39Z sephe $
+ */
+
+#ifndef _IF_HNVAR_H_
+#define _IF_HNVAR_H_
+
+#define HN_USE_TXDESC_BUFRING
+
+#define HN_CHIM_SIZE			(15 * 1024 * 1024)
+
+#define HN_RXBUF_SIZE			(16 * 1024 * 1024)
+#define HN_RXBUF_SIZE_COMPAT		(15 * 1024 * 1024)
+
+#define HN_MTU_MAX			(65535 - ETHER_ADDR_LEN)
+
+#define HN_TXBR_SIZE			(128 * PAGE_SIZE)
+#define HN_RXBR_SIZE			(128 * PAGE_SIZE)
+
+#define HN_XACT_REQ_PGCNT		2
+#define HN_XACT_RESP_PGCNT		2
+#define HN_XACT_REQ_SIZE		(HN_XACT_REQ_PGCNT * PAGE_SIZE)
+#define HN_XACT_RESP_SIZE		(HN_XACT_RESP_PGCNT * PAGE_SIZE)
+
+#define HN_GPACNT_MAX			32
+
+struct hn_txdesc;
+#ifndef HN_USE_TXDESC_BUFRING
+SLIST_HEAD(hn_txdesc_list, hn_txdesc);
+#else
+struct buf_ring;
+#endif
+struct hn_tx_ring;
+
+struct hn_rx_ring {
+	struct ifnet	*hn_ifp;
+	struct ifnet	*hn_rxvf_ifp;	/* SR-IOV VF for RX */
+	struct hn_tx_ring *hn_txr;
+	void		*hn_pktbuf;
+	int		hn_pktbuf_len;
+	int		hn_rx_flags;	/* HN_RX_FLAG_ */
+	uint32_t	hn_mbuf_hash;	/* NDIS_HASH_ */
+	uint8_t		*hn_rxbuf;	/* shadow sc->hn_rxbuf */
+	int		hn_rx_idx;
+
+	/* Trust csum verification on host side */
+	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
+	struct lro_ctrl	hn_lro;
+
+	u_long		hn_csum_ip;
+	u_long		hn_csum_tcp;
+	u_long		hn_csum_udp;
+	u_long		hn_csum_trusted;
+	u_long		hn_lro_tried;
+	u_long		hn_small_pkts;
+	u_long		hn_pkts;
+	u_long		hn_rss_pkts;
+	u_long		hn_ack_failed;
+
+	/* Rarely used stuffs */
+	struct sysctl_oid *hn_rx_sysctl_tree;
+
+	void		*hn_br;		/* TX/RX bufring */
+	struct hyperv_dma hn_br_dma;
+
+	struct vmbus_channel *hn_chan;
+} __aligned(CACHE_LINE_SIZE);
+
+#define HN_TRUST_HCSUM_IP	0x0001
+#define HN_TRUST_HCSUM_TCP	0x0002
+#define HN_TRUST_HCSUM_UDP	0x0004
+
+#define HN_RX_FLAG_ATTACHED	0x0001
+#define HN_RX_FLAG_BR_REF	0x0002
+#define HN_RX_FLAG_XPNT_VF	0x0004
+#define HN_RX_FLAG_UDP_HASH	0x0008
+
+struct hn_tx_ring {
+#ifndef HN_USE_TXDESC_BUFRING
+	struct mtx	hn_txlist_spin;
+	struct hn_txdesc_list hn_txlist;
+#else
+	struct buf_ring	*hn_txdesc_br;
+#endif
+	int		hn_txdesc_cnt;
+	int		hn_txdesc_avail;
+	u_short		hn_has_txeof;
+	u_short		hn_txdone_cnt;
+
+	int		hn_sched_tx;
+	void		(*hn_txeof)(struct hn_tx_ring *);
+	struct taskqueue *hn_tx_taskq;
+	struct task	hn_tx_task;
+	struct task	hn_txeof_task;
+
+	struct buf_ring	*hn_mbuf_br;
+	int		hn_oactive;
+	int		hn_tx_idx;
+	int		hn_tx_flags;
+
+	struct mtx	hn_tx_lock;
+	struct hn_softc	*hn_sc;
+	struct vmbus_channel *hn_chan;
+
+	int		hn_direct_tx_size;
+	int		hn_chim_size;
+	bus_dma_tag_t	hn_tx_data_dtag;
+	uint64_t	hn_csum_assist;
+
+	/* Applied packet transmission aggregation limits. */
+	int		hn_agg_szmax;
+	short		hn_agg_pktmax;
+	short		hn_agg_align;
+
+	/* Packet transmission aggregation states. */
+	struct hn_txdesc *hn_agg_txd;
+	int		hn_agg_szleft;
+	short		hn_agg_pktleft;
+	struct rndis_packet_msg *hn_agg_prevpkt;
+
+	/* Temporary stats for each sends. */
+	int		hn_stat_size;
+	short		hn_stat_pkts;
+	short		hn_stat_mcasts;
+
+	int		(*hn_sendpkt)(struct hn_tx_ring *, struct hn_txdesc *);
+	int		hn_suspended;
+	int		hn_gpa_cnt;
+	struct vmbus_gpa hn_gpa[HN_GPACNT_MAX];
+
+	u_long		hn_no_txdescs;
+	u_long		hn_send_failed;
+	u_long		hn_txdma_failed;
+	u_long		hn_tx_collapsed;
+	u_long		hn_tx_chimney_tried;
+	u_long		hn_tx_chimney;
+	u_long		hn_pkts;
+	u_long		hn_sends;
+	u_long		hn_flush_failed;
+
+	/* Rarely used stuffs */
+	struct hn_txdesc *hn_txdesc;
+	bus_dma_tag_t	hn_tx_rndis_dtag;
+	struct sysctl_oid *hn_tx_sysctl_tree;
+} __aligned(CACHE_LINE_SIZE);
+
+#define HN_TX_FLAG_ATTACHED	0x0001
+#define HN_TX_FLAG_HASHVAL	0x0002	/* support HASHVAL pktinfo */
+
+/*
+ * Device-specific softc structure
+ */
+struct hn_softc {
+	struct ifnet    *hn_ifp;
+	struct arpcom   arpcom;
+	struct ifmedia	hn_media;
+	device_t        hn_dev;
+	int             hn_if_flags;
+	struct sx	hn_lock;
+	struct vmbus_channel *hn_prichan;
+
+	int		hn_rx_ring_cnt;
+	int		hn_rx_ring_inuse;
+	struct hn_rx_ring *hn_rx_ring;
+
+	struct rmlock	hn_vf_lock;
+	struct ifnet	*hn_vf_ifp;	/* SR-IOV VF */
+	uint32_t	hn_xvf_flags;	/* transparent VF flags */
+
+	int		hn_tx_ring_cnt;
+	int		hn_tx_ring_inuse;
+	struct hn_tx_ring *hn_tx_ring;
+
+	uint8_t		*hn_chim;
+	u_long		*hn_chim_bmap;
+	int		hn_chim_bmap_cnt;
+	int		hn_chim_cnt;
+	int		hn_chim_szmax;
+
+	int		hn_cpu;
+	struct taskqueue **hn_tx_taskqs;
+	struct sysctl_oid *hn_tx_sysctl_tree;
+	struct sysctl_oid *hn_rx_sysctl_tree;
+	struct vmbus_xact_ctx *hn_xact;
+	uint32_t	hn_nvs_ver;
+	uint32_t	hn_rx_filter;
+
+	/* Packet transmission aggregation user settings. */
+	int			hn_agg_size;
+	int			hn_agg_pkts;
+
+	struct taskqueue	*hn_mgmt_taskq;
+	struct taskqueue	*hn_mgmt_taskq0;
+	struct task		hn_link_task;
+	struct task		hn_netchg_init;
+	struct timeout_task	hn_netchg_status;
+	uint32_t		hn_link_flags;	/* HN_LINK_FLAG_ */
+
+	uint32_t		hn_caps;	/* HN_CAP_ */
+	uint32_t		hn_flags;	/* HN_FLAG_ */
+	u_int			hn_pollhz;
+
+	void			*hn_rxbuf;
+	uint32_t		hn_rxbuf_gpadl;
+	struct hyperv_dma	hn_rxbuf_dma;
+
+	uint32_t		hn_chim_gpadl;
+	struct hyperv_dma	hn_chim_dma;
+
+	uint32_t		hn_rndis_rid;
+	uint32_t		hn_ndis_ver;
+	int			hn_ndis_tso_szmax;
+	int			hn_ndis_tso_sgmin;
+	uint32_t		hn_rndis_agg_size;
+	uint32_t		hn_rndis_agg_pkts;
+	uint32_t		hn_rndis_agg_align;
+
+	int			hn_rss_ind_size;
+	uint32_t		hn_rss_hash;	/* setting, NDIS_HASH_ */
+	uint32_t		hn_rss_hcap;	/* caps, NDIS_HASH_ */
+	struct ndis_rssprm_toeplitz hn_rss;
+
+	eventhandler_tag	hn_ifaddr_evthand;
+	eventhandler_tag	hn_ifnet_evthand;
+	eventhandler_tag	hn_ifnet_atthand;
+	eventhandler_tag	hn_ifnet_dethand;
+	eventhandler_tag	hn_ifnet_lnkhand;
+
+	/*
+	 * Transparent VF delayed initialization.
+	 */
+	int			hn_vf_rdytick;	/* ticks, 0 == ready */
+	struct taskqueue	*hn_vf_taskq;
+	struct timeout_task	hn_vf_init;
+
+	/*
+	 * Saved information for VF under transparent mode.
+	 */
+	void			(*hn_vf_input)
+				(struct ifnet *, struct mbuf *);
+	int			hn_saved_caps;
+	u_int			hn_saved_tsomax;
+	u_int			hn_saved_tsosegcnt;
+	u_int			hn_saved_tsosegsz;
+};
+
+#define HN_FLAG_RXBUF_CONNECTED		0x0001
+#define HN_FLAG_CHIM_CONNECTED		0x0002
+#define HN_FLAG_HAS_RSSKEY		0x0004
+#define HN_FLAG_HAS_RSSIND		0x0008
+#define HN_FLAG_SYNTH_ATTACHED		0x0010
+#define HN_FLAG_NO_SLEEPING		0x0020
+#define HN_FLAG_RXBUF_REF		0x0040
+#define HN_FLAG_CHIM_REF		0x0080
+#define HN_FLAG_RXVF			0x0100
+
+#define HN_FLAG_ERRORS			(HN_FLAG_RXBUF_REF | HN_FLAG_CHIM_REF)
+
+#define HN_XVFFLAG_ENABLED		0x0001
+#define HN_XVFFLAG_ACCBPF		0x0002
+
+#define HN_NO_SLEEPING(sc)			\
+do {						\
+	(sc)->hn_flags |= HN_FLAG_NO_SLEEPING;	\
+} while (0)
+
+#define HN_SLEEPING_OK(sc)			\
+do {						\
+	(sc)->hn_flags &= ~HN_FLAG_NO_SLEEPING;	\
+} while (0)
+
+#define HN_CAN_SLEEP(sc)		\
+	(((sc)->hn_flags & HN_FLAG_NO_SLEEPING) == 0)
+
+#define HN_CAP_VLAN			0x0001
+#define HN_CAP_MTU			0x0002
+#define HN_CAP_IPCS			0x0004
+#define HN_CAP_TCP4CS			0x0008
+#define HN_CAP_TCP6CS			0x0010
+#define HN_CAP_UDP4CS			0x0020
+#define HN_CAP_UDP6CS			0x0040
+#define HN_CAP_TSO4			0x0080
+#define HN_CAP_TSO6			0x0100
+#define HN_CAP_HASHVAL			0x0200
+#define HN_CAP_UDPHASH			0x0400
+
+/* Capability description for use with printf(9) %b identifier. */
+#define HN_CAP_BITS				\
+	"\020\1VLAN\2MTU\3IPCS\4TCP4CS\5TCP6CS"	\
+	"\6UDP4CS\7UDP6CS\10TSO4\11TSO6\12HASHVAL\13UDPHASH"
+
+#define HN_LINK_FLAG_LINKUP		0x0001
+#define HN_LINK_FLAG_NETCHG		0x0002
+
+#endif	/* !_IF_HNVAR_H_ */


Property changes on: trunk/sys/dev/hyperv/netvsc/if_hnvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/netvsc/ndis.h
===================================================================
--- trunk/sys/dev/hyperv/netvsc/ndis.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/netvsc/ndis.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,424 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/netvsc/ndis.h 324574 2017-10-13 02:26:39Z sephe $
+ */
+
+#ifndef _NET_NDIS_H_
+#define _NET_NDIS_H_
+
+#define	NDIS_MEDIA_STATE_CONNECTED	0
+#define	NDIS_MEDIA_STATE_DISCONNECTED	1
+
+#define	NDIS_NETCHANGE_TYPE_POSSIBLE	1
+#define	NDIS_NETCHANGE_TYPE_DEFINITE	2
+#define	NDIS_NETCHANGE_TYPE_FROMMEDIA	3
+
+#define	NDIS_OFFLOAD_SET_NOCHG		0
+#define	NDIS_OFFLOAD_SET_ON		1
+#define	NDIS_OFFLOAD_SET_OFF		2
+
+/* a.k.a GRE MAC */
+#define	NDIS_ENCAP_TYPE_NVGRE		0x00000001
+
+#define	NDIS_HASH_FUNCTION_MASK		0x000000FF	/* see hash function */
+#define	NDIS_HASH_TYPE_MASK		0x00FFFF00	/* see hash type */
+
+/* hash function */
+#define	NDIS_HASH_FUNCTION_TOEPLITZ	0x00000001
+
+/* hash type */
+#define	NDIS_HASH_IPV4			0x00000100
+#define	NDIS_HASH_TCP_IPV4		0x00000200
+#define	NDIS_HASH_IPV6			0x00000400
+#define	NDIS_HASH_IPV6_EX		0x00000800
+#define	NDIS_HASH_TCP_IPV6		0x00001000
+#define	NDIS_HASH_TCP_IPV6_EX		0x00002000
+#define	NDIS_HASH_UDP_IPV4_X		0x00004000	/* XXX non-standard */
+
+#define	NDIS_HASH_ALL			(NDIS_HASH_IPV4 |	\
+					 NDIS_HASH_TCP_IPV4 |	\
+					 NDIS_HASH_IPV6 |	\
+					 NDIS_HASH_IPV6_EX |	\
+					 NDIS_HASH_TCP_IPV6 |	\
+					 NDIS_HASH_TCP_IPV6_EX |\
+					 NDIS_HASH_UDP_IPV4_X)
+
+#define	NDIS_HASH_STD			(NDIS_HASH_IPV4 |	\
+					 NDIS_HASH_TCP_IPV4 |	\
+					 NDIS_HASH_IPV6 |	\
+					 NDIS_HASH_IPV6_EX |	\
+					 NDIS_HASH_TCP_IPV6 |	\
+					 NDIS_HASH_TCP_IPV6_EX)
+
+/* Hash description for use with printf(9) %b identifier. */
+#define	NDIS_HASH_BITS			\
+	"\20\1TOEPLITZ\11IP4\12TCP4\13IP6\14IP6EX\15TCP6\16TCP6EX\17UDP4_X"
+
+#define	NDIS_HASH_KEYSIZE_TOEPLITZ	40
+#define	NDIS_HASH_INDCNT		128
+
+#define	NDIS_OBJTYPE_DEFAULT		0x80
+#define	NDIS_OBJTYPE_RSS_CAPS		0x88
+#define	NDIS_OBJTYPE_RSS_PARAMS		0x89
+#define	NDIS_OBJTYPE_OFFLOAD		0xa7
+
+struct ndis_object_hdr {
+	uint8_t			ndis_type;	/* NDIS_OBJTYPE_ */
+	uint8_t			ndis_rev;	/* type specific */
+	uint16_t		ndis_size;	/* incl. this hdr */
+};
+
+/*
+ * OID_TCP_OFFLOAD_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_DEFAULT
+ */
+struct ndis_offload_params {
+	struct ndis_object_hdr	ndis_hdr;
+	uint8_t			ndis_ip4csum;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_tcp4csum;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_udp4csum;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_tcp6csum;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_udp6csum;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_lsov1;	/* NDIS_OFFLOAD_PARAM_ */
+	uint8_t			ndis_ipsecv1;	/* NDIS_OFFLOAD_IPSECV1_ */
+	uint8_t			ndis_lsov2_ip4;	/* NDIS_OFFLOAD_LSOV2_ */
+	uint8_t			ndis_lsov2_ip6;	/* NDIS_OFFLOAD_LSOV2_ */
+	uint8_t			ndis_tcp4conn;	/* 0 */
+	uint8_t			ndis_tcp6conn;	/* 0 */
+	uint32_t		ndis_flags;	/* 0 */
+	/* NDIS >= 6.1 */
+	uint8_t			ndis_ipsecv2;	/* NDIS_OFFLOAD_IPSECV2_ */
+	uint8_t			ndis_ipsecv2_ip4;/* NDIS_OFFLOAD_IPSECV2_ */
+	/* NDIS >= 6.30 */
+	uint8_t			ndis_rsc_ip4;	/* NDIS_OFFLOAD_RSC_ */
+	uint8_t			ndis_rsc_ip6;	/* NDIS_OFFLOAD_RSC_ */
+	uint8_t			ndis_encap;	/* NDIS_OFFLOAD_SET_ */
+	uint8_t			ndis_encap_types;/* NDIS_ENCAP_TYPE_ */
+};
+
+#define	NDIS_OFFLOAD_PARAMS_SIZE	sizeof(struct ndis_offload_params)
+#define	NDIS_OFFLOAD_PARAMS_SIZE_6_1	\
+	__offsetof(struct ndis_offload_params, ndis_rsc_ip4)
+
+#define	NDIS_OFFLOAD_PARAMS_REV_2	2	/* NDIS 6.1 */
+#define	NDIS_OFFLOAD_PARAMS_REV_3	3	/* NDIS 6.30 */
+
+#define	NDIS_OFFLOAD_PARAM_NOCHG	0	/* common */
+#define	NDIS_OFFLOAD_PARAM_OFF		1
+#define	NDIS_OFFLOAD_PARAM_TX		2
+#define	NDIS_OFFLOAD_PARAM_RX		3
+#define	NDIS_OFFLOAD_PARAM_TXRX		4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define	NDIS_OFFLOAD_LSOV1_OFF		1
+#define	NDIS_OFFLOAD_LSOV1_ON		2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define	NDIS_OFFLOAD_IPSECV1_OFF	1
+#define	NDIS_OFFLOAD_IPSECV1_AH		2
+#define	NDIS_OFFLOAD_IPSECV1_ESP	3
+#define	NDIS_OFFLOAD_IPSECV1_AH_ESP	4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define	NDIS_OFFLOAD_LSOV2_OFF		1
+#define	NDIS_OFFLOAD_LSOV2_ON		2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define	NDIS_OFFLOAD_IPSECV2_OFF	1
+#define	NDIS_OFFLOAD_IPSECV2_AH		2
+#define	NDIS_OFFLOAD_IPSECV2_ESP	3
+#define	NDIS_OFFLOAD_IPSECV2_AH_ESP	4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define	NDIS_OFFLOAD_RSC_OFF		1
+#define	NDIS_OFFLOAD_RSC_ON		2
+
+/*
+ * OID_GEN_RECEIVE_SCALE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_RSS_CAPS
+ */
+struct ndis_rss_caps {
+	struct ndis_object_hdr		ndis_hdr;
+	uint32_t			ndis_caps;	/* NDIS_RSS_CAP_ */
+	uint32_t			ndis_nmsi;	/* # of MSIs */
+	uint32_t			ndis_nrxr;	/* # of RX rings */
+	/* NDIS >= 6.30 */
+	uint16_t			ndis_nind;	/* # of indtbl ent. */
+	uint16_t			ndis_pad;
+};
+
+#define	NDIS_RSS_CAPS_SIZE		\
+	__offsetof(struct ndis_rss_caps, ndis_pad)
+#define	NDIS_RSS_CAPS_SIZE_6_0		\
+	__offsetof(struct ndis_rss_caps, ndis_nind)
+
+#define	NDIS_RSS_CAPS_REV_1		1	/* NDIS 6.{0,1,20} */
+#define	NDIS_RSS_CAPS_REV_2		2	/* NDIS 6.30 */
+
+#define	NDIS_RSS_CAP_MSI		0x01000000
+#define	NDIS_RSS_CAP_CLASSIFY_ISR	0x02000000
+#define	NDIS_RSS_CAP_CLASSIFY_DPC	0x04000000
+#define	NDIS_RSS_CAP_MSIX		0x08000000
+#define	NDIS_RSS_CAP_IPV4		0x00000100
+#define	NDIS_RSS_CAP_IPV6		0x00000200
+#define	NDIS_RSS_CAP_IPV6_EX		0x00000400
+#define	NDIS_RSS_CAP_HASH_TOEPLITZ	NDIS_HASH_FUNCTION_TOEPLITZ
+#define	NDIS_RSS_CAP_HASHFUNC_MASK	NDIS_HASH_FUNCTION_MASK
+
+/*
+ * OID_GEN_RECEIVE_SCALE_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_RSS_PARAMS
+ */
+struct ndis_rss_params {
+	struct ndis_object_hdr		ndis_hdr;
+	uint16_t			ndis_flags;	/* NDIS_RSS_FLAG_ */
+	uint16_t			ndis_bcpu;	/* base cpu 0 */
+	uint32_t			ndis_hash;	/* NDIS_HASH_ */
+	uint16_t			ndis_indsize;	/* indirect table */
+	uint32_t			ndis_indoffset;
+	uint16_t			ndis_keysize;	/* hash key */
+	uint32_t			ndis_keyoffset;
+	/* NDIS >= 6.20 */
+	uint32_t			ndis_cpumaskoffset;
+	uint32_t			ndis_cpumaskcnt;
+	uint32_t			ndis_cpumaskentsz;
+};
+
+#define	NDIS_RSS_PARAMS_SIZE		sizeof(struct ndis_rss_params)
+#define	NDIS_RSS_PARAMS_SIZE_6_0	\
+	__offsetof(struct ndis_rss_params, ndis_cpumaskoffset)
+
+#define	NDIS_RSS_PARAMS_REV_1		1	/* NDIS 6.0 */
+#define	NDIS_RSS_PARAMS_REV_2		2	/* NDIS 6.20 */
+
+#define	NDIS_RSS_FLAG_NONE		0x0000
+#define	NDIS_RSS_FLAG_BCPU_UNCHG	0x0001
+#define	NDIS_RSS_FLAG_HASH_UNCHG	0x0002
+#define	NDIS_RSS_FLAG_IND_UNCHG		0x0004
+#define	NDIS_RSS_FLAG_KEY_UNCHG		0x0008
+#define	NDIS_RSS_FLAG_DISABLE		0x0010
+
+/* non-standard convenient struct */
+struct ndis_rssprm_toeplitz {
+	struct ndis_rss_params		rss_params;
+	/* Toeplitz hash key */
+	uint8_t				rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ];
+	/* Indirect table */
+	uint32_t			rss_ind[NDIS_HASH_INDCNT];
+};
+
+#define	NDIS_RSSPRM_TOEPLITZ_SIZE(nind)	\
+	__offsetof(struct ndis_rssprm_toeplitz, rss_ind[nind])
+
+/*
+ * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_OFFLOAD
+ */
+
+#define	NDIS_OFFLOAD_ENCAP_NONE		0x0000
+#define	NDIS_OFFLOAD_ENCAP_NULL		0x0001
+#define	NDIS_OFFLOAD_ENCAP_8023		0x0002
+#define	NDIS_OFFLOAD_ENCAP_8023PQ	0x0004
+#define	NDIS_OFFLOAD_ENCAP_8023PQ_OOB	0x0008
+#define	NDIS_OFFLOAD_ENCAP_RFC1483	0x0010
+
+struct ndis_csum_offload {
+	uint32_t			ndis_ip4_txenc;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip4_txcsum;
+#define	NDIS_TXCSUM_CAP_IP4OPT		0x001
+#define	NDIS_TXCSUM_CAP_TCP4OPT		0x004
+#define	NDIS_TXCSUM_CAP_TCP4		0x010
+#define	NDIS_TXCSUM_CAP_UDP4		0x040
+#define	NDIS_TXCSUM_CAP_IP4		0x100
+	uint32_t			ndis_ip4_rxenc;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip4_rxcsum;
+#define	NDIS_RXCSUM_CAP_IP4OPT		0x001
+#define	NDIS_RXCSUM_CAP_TCP4OPT		0x004
+#define	NDIS_RXCSUM_CAP_TCP4		0x010
+#define	NDIS_RXCSUM_CAP_UDP4		0x040
+#define	NDIS_RXCSUM_CAP_IP4		0x100
+	uint32_t			ndis_ip6_txenc;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip6_txcsum;
+#define	NDIS_TXCSUM_CAP_IP6EXT		0x001
+#define	NDIS_TXCSUM_CAP_TCP6OPT		0x004
+#define	NDIS_TXCSUM_CAP_TCP6		0x010
+#define	NDIS_TXCSUM_CAP_UDP6		0x040
+	uint32_t			ndis_ip6_rxenc;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip6_rxcsum;
+#define	NDIS_RXCSUM_CAP_IP6EXT		0x001
+#define	NDIS_RXCSUM_CAP_TCP6OPT		0x004
+#define	NDIS_RXCSUM_CAP_TCP6		0x010
+#define	NDIS_RXCSUM_CAP_UDP6		0x040
+};
+
+struct ndis_lsov1_offload {
+	uint32_t			ndis_encap;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_maxsize;
+	uint32_t			ndis_minsegs;
+	uint32_t			ndis_opts;
+};
+
+struct ndis_ipsecv1_offload {
+	uint32_t			ndis_encap;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ah_esp;
+	uint32_t			ndis_xport_tun;
+	uint32_t			ndis_ip4_opts;
+	uint32_t			ndis_flags;
+	uint32_t			ndis_ip4_ah;
+	uint32_t			ndis_ip4_esp;
+};
+
+struct ndis_lsov2_offload {
+	uint32_t			ndis_ip4_encap;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip4_maxsz;
+	uint32_t			ndis_ip4_minsg;
+	uint32_t			ndis_ip6_encap;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint32_t			ndis_ip6_maxsz;
+	uint32_t			ndis_ip6_minsg;
+	uint32_t			ndis_ip6_opts;
+#define	NDIS_LSOV2_CAP_IP6EXT		0x001
+#define	NDIS_LSOV2_CAP_TCP6OPT		0x004
+};
+
+struct ndis_ipsecv2_offload {
+	uint32_t			ndis_encap;	/*NDIS_OFFLOAD_ENCAP_*/
+	uint16_t			ndis_ip6;
+	uint16_t			ndis_ip4opt;
+	uint16_t			ndis_ip6ext;
+	uint16_t			ndis_ah;
+	uint16_t			ndis_esp;
+	uint16_t			ndis_ah_esp;
+	uint16_t			ndis_xport;
+	uint16_t			ndis_tun;
+	uint16_t			ndis_xport_tun;
+	uint16_t			ndis_lso;
+	uint16_t			ndis_extseq;
+	uint32_t			ndis_udp_esp;
+	uint32_t			ndis_auth;
+	uint32_t			ndis_crypto;
+	uint32_t			ndis_sa_caps;
+};
+
+struct ndis_rsc_offload {
+	uint16_t			ndis_ip4;
+	uint16_t			ndis_ip6;
+};
+
+struct ndis_encap_offload {
+	uint32_t			ndis_flags;
+	uint32_t			ndis_maxhdr;
+};
+
+struct ndis_offload {
+	struct ndis_object_hdr		ndis_hdr;
+	struct ndis_csum_offload	ndis_csum;
+	struct ndis_lsov1_offload	ndis_lsov1;
+	struct ndis_ipsecv1_offload	ndis_ipsecv1;
+	struct ndis_lsov2_offload	ndis_lsov2;
+	uint32_t			ndis_flags;
+	/* NDIS >= 6.1 */
+	struct ndis_ipsecv2_offload	ndis_ipsecv2;
+	/* NDIS >= 6.30 */
+	struct ndis_rsc_offload		ndis_rsc;
+	struct ndis_encap_offload	ndis_encap_gre;
+};
+
+#define	NDIS_OFFLOAD_SIZE		sizeof(struct ndis_offload)
+#define	NDIS_OFFLOAD_SIZE_6_0		\
+	__offsetof(struct ndis_offload, ndis_ipsecv2)
+#define	NDIS_OFFLOAD_SIZE_6_1		\
+	__offsetof(struct ndis_offload, ndis_rsc)
+
+#define	NDIS_OFFLOAD_REV_1		1	/* NDIS 6.0 */
+#define	NDIS_OFFLOAD_REV_2		2	/* NDIS 6.1 */
+#define	NDIS_OFFLOAD_REV_3		3	/* NDIS 6.30 */
+
+/*
+ * Per-packet-info
+ */
+
+/* VLAN */
+#define	NDIS_VLAN_INFO_SIZE		sizeof(uint32_t)
+#define	NDIS_VLAN_INFO_PRI_MASK		0x0007
+#define	NDIS_VLAN_INFO_CFI_MASK		0x0008
+#define	NDIS_VLAN_INFO_ID_MASK		0xfff0
+#define	NDIS_VLAN_INFO_MAKE(id, pri, cfi)	\
+        (((pri) & NDIS_VLAN_INFO_PRI_MASK) |	\
+	 (((cfi) & 0x1) << 3) | (((id) & 0xfff) << 4))
+#define	NDIS_VLAN_INFO_ID(inf)		(((inf) & NDIS_VLAN_INFO_ID_MASK) >> 4)
+#define	NDIS_VLAN_INFO_CFI(inf)		(((inf) & NDIS_VLAN_INFO_CFI_MASK) >> 3)
+#define	NDIS_VLAN_INFO_PRI(inf)		((inf) & NDIS_VLAN_INFO_PRI_MASK)
+
+/* Reception checksum */
+#define	NDIS_RXCSUM_INFO_SIZE		sizeof(uint32_t)
+#define	NDIS_RXCSUM_INFO_TCPCS_FAILED	0x0001
+#define	NDIS_RXCSUM_INFO_UDPCS_FAILED	0x0002
+#define	NDIS_RXCSUM_INFO_IPCS_FAILED	0x0004
+#define	NDIS_RXCSUM_INFO_TCPCS_OK	0x0008
+#define	NDIS_RXCSUM_INFO_UDPCS_OK	0x0010
+#define	NDIS_RXCSUM_INFO_IPCS_OK	0x0020
+#define	NDIS_RXCSUM_INFO_LOOPBACK	0x0040
+#define	NDIS_RXCSUM_INFO_TCPCS_INVAL	0x0080
+#define	NDIS_RXCSUM_INFO_IPCS_INVAL	0x0100
+
+/* LSOv2 */
+#define	NDIS_LSO2_INFO_SIZE		sizeof(uint32_t)
+#define	NDIS_LSO2_INFO_MSS_MASK		0x000fffff
+#define	NDIS_LSO2_INFO_THOFF_MASK	0x3ff00000
+#define	NDIS_LSO2_INFO_ISLSO2		0x40000000
+#define	NDIS_LSO2_INFO_ISIPV6		0x80000000
+
+#define	NDIS_LSO2_INFO_MAKE(thoff, mss)				\
+	((((uint32_t)(mss)) & NDIS_LSO2_INFO_MSS_MASK) |	\
+	 ((((uint32_t)(thoff)) & 0x3ff) << 20) |		\
+	 NDIS_LSO2_INFO_ISLSO2)
+
+#define	NDIS_LSO2_INFO_MAKEIPV4(thoff, mss)			\
+	NDIS_LSO2_INFO_MAKE((thoff), (mss))
+
+#define	NDIS_LSO2_INFO_MAKEIPV6(thoff, mss)			\
+	(NDIS_LSO2_INFO_MAKE((thoff), (mss)) | NDIS_LSO2_INFO_ISIPV6)
+
+/* Transmission checksum */
+#define	NDIS_TXCSUM_INFO_SIZE		sizeof(uint32_t)
+#define	NDIS_TXCSUM_INFO_IPV4		0x00000001
+#define	NDIS_TXCSUM_INFO_IPV6		0x00000002
+#define	NDIS_TXCSUM_INFO_TCPCS		0x00000004
+#define	NDIS_TXCSUM_INFO_UDPCS		0x00000008
+#define	NDIS_TXCSUM_INFO_IPCS		0x00000010
+#define	NDIS_TXCSUM_INFO_THOFF		0x03ff0000
+
+#define	NDIS_TXCSUM_INFO_MKL4CS(thoff, flag)			\
+	((((uint32_t)(thoff)) << 16) | (flag))
+
+#define	NDIS_TXCSUM_INFO_MKTCPCS(thoff)				\
+	NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_TCPCS)
+
+#define	NDIS_TXCSUM_INFO_MKUDPCS(thoff)				\
+	NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS)
+
+#endif	/* !_NET_NDIS_H_ */


Property changes on: trunk/sys/dev/hyperv/netvsc/ndis.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/pcib/vmbus_pcib.c
===================================================================
--- trunk/sys/dev/hyperv/pcib/vmbus_pcib.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/pcib/vmbus_pcib.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,1798 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/pcib/vmbus_pcib.c 324461 2017-10-10 02:22:34Z sephe $");
+
+#ifdef NEW_PCIB
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/frame.h>
+#include <machine/pci_cfgreg.h>
+#include <machine/resource.h>
+
+#include <sys/pciio.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pci_private.h>
+#include <dev/pci/pcib_private.h>
+#include "pcib_if.h"
+
+#include <machine/intr_machdep.h>
+#include <x86/apicreg.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
+
+#include "vmbus_if.h"
+
+#if __FreeBSD_version < 1100000
+typedef u_long rman_res_t;
+#define RM_MAX_END	(~(rman_res_t)0)
+#endif
+
+struct completion {
+	unsigned int done;
+	struct mtx lock;
+};
+
+static void
+init_completion(struct completion *c)
+{
+	memset(c, 0, sizeof(*c));
+	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
+	c->done = 0;
+}
+
+static void
+free_completion(struct completion *c)
+{
+	mtx_destroy(&c->lock);
+}
+
+static void
+complete(struct completion *c)
+{
+	mtx_lock(&c->lock);
+	c->done++;
+	mtx_unlock(&c->lock);
+	wakeup(c);
+}
+
+static void
+wait_for_completion(struct completion *c)
+{
+	mtx_lock(&c->lock);
+	while (c->done == 0)
+		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
+	c->done--;
+	mtx_unlock(&c->lock);
+}
+
+#define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major)))
+
+enum {
+	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
+	PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1
+};
+
+#define PCI_CONFIG_MMIO_LENGTH	0x2000
+#define CFG_PAGE_OFFSET 0x1000
+#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
+
+/*
+ * Message Types
+ */
+
+enum pci_message_type {
+	/*
+	 * Version 1.1
+	 */
+	PCI_MESSAGE_BASE                = 0x42490000,
+	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
+	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
+	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
+	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
+	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
+	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
+	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
+	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
+	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
+	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
+	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
+	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
+	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
+	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
+	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
+	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
+	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
+	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
+	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
+	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
+	PCI_MESSAGE_MAXIMUM
+};
+
+/*
+ * Structures defining the virtual PCI Express protocol.
+ */
+
+union pci_version {
+	struct {
+		uint16_t minor_version;
+		uint16_t major_version;
+	} parts;
+	uint32_t version;
+} __packed;
+
+/*
+ * This representation is the one used in Windows, which is
+ * what is expected when sending this back and forth with
+ * the Hyper-V parent partition.
+ */
+union win_slot_encoding {
+	struct {
+		uint32_t	slot:5;
+		uint32_t	func:3;
+		uint32_t	reserved:24;
+	} bits;
+	uint32_t val;
+} __packed;
+
+struct pci_func_desc {
+	uint16_t	v_id;	/* vendor ID */
+	uint16_t	d_id;	/* device ID */
+	uint8_t		rev;
+	uint8_t		prog_intf;
+	uint8_t		subclass;
+	uint8_t		base_class;
+	uint32_t	subsystem_id;
+	union win_slot_encoding wslot;
+	uint32_t	ser;	/* serial number */
+} __packed;
+
+struct hv_msi_desc {
+	uint8_t		vector;
+	uint8_t		delivery_mode;
+	uint16_t	vector_count;
+	uint32_t	reserved;
+	uint64_t	cpu_mask;
+} __packed;
+
+struct tran_int_desc {
+	uint16_t	reserved;
+	uint16_t	vector_count;
+	uint32_t	data;
+	uint64_t	address;
+} __packed;
+
+struct pci_message {
+	uint32_t type;
+} __packed;
+
+struct pci_child_message {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+} __packed;
+
+struct pci_incoming_message {
+	struct vmbus_chanpkt_hdr hdr;
+	struct pci_message message_type;
+} __packed;
+
+struct pci_response {
+	struct vmbus_chanpkt_hdr hdr;
+	int32_t status;	/* negative values are failures */
+} __packed;
+
+struct pci_packet {
+	void (*completion_func)(void *context, struct pci_response *resp,
+	    int resp_packet_size);
+	void *compl_ctxt;
+
+	struct pci_message message[0];
+};
+
+/*
+ * Specific message types supporting the PCI protocol.
+ */
+
+struct pci_version_request {
+	struct pci_message message_type;
+	uint32_t protocol_version;
+	uint32_t is_last_attempt:1;
+	uint32_t reservedz:31;
+} __packed;
+
+struct pci_bus_d0_entry {
+	struct pci_message message_type;
+	uint32_t reserved;
+	uint64_t mmio_base;
+} __packed;
+
+struct pci_bus_relations {
+	struct pci_incoming_message incoming;
+	uint32_t device_count;
+	struct pci_func_desc func[0];
+} __packed;
+
+#define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
+struct pci_q_res_req_response {
+	struct vmbus_chanpkt_hdr hdr;
+	int32_t status; /* negative values are failures */
+	uint32_t probed_bar[MAX_NUM_BARS];
+} __packed;
+
+struct pci_resources_assigned {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
+	uint32_t msi_descriptors;
+	uint32_t reserved[4];
+} __packed;
+
+struct pci_create_interrupt {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct hv_msi_desc int_desc;
+} __packed;
+
+struct pci_create_int_response {
+	struct pci_response response;
+	uint32_t reserved;
+	struct tran_int_desc int_desc;
+} __packed;
+
+struct pci_delete_interrupt {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct tran_int_desc int_desc;
+} __packed;
+
+struct pci_dev_incoming {
+	struct pci_incoming_message incoming;
+	union win_slot_encoding wslot;
+} __packed;
+
+struct pci_eject_response {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	uint32_t status;
+} __packed;
+
+/*
+ * Driver specific state.
+ */
+
+enum hv_pcibus_state {
+	hv_pcibus_init = 0,
+	hv_pcibus_installed,
+};
+
+struct hv_pcibus {
+	device_t pcib;
+	device_t pci_bus;
+	struct vmbus_pcib_softc *sc;
+
+	uint16_t pci_domain;
+
+	enum hv_pcibus_state state;
+
+	struct resource *cfg_res;
+
+	struct completion query_completion, *query_comp;
+
+	struct mtx config_lock; /* Avoid two threads writing index page */
+	struct mtx device_list_lock;    /* Protect lists below */
+	TAILQ_HEAD(, hv_pci_dev) children;
+	TAILQ_HEAD(, hv_dr_state) dr_list;
+
+	volatile int detaching;
+};
+
+struct hv_pci_dev {
+	TAILQ_ENTRY(hv_pci_dev) link;
+
+	struct pci_func_desc desc;
+
+	bool reported_missing;
+
+	struct hv_pcibus *hbus;
+	struct task eject_task;
+
+	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
+
+	/*
+	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
+	 * read it back, for each of the BAR offsets within config space.
+	 */
+	uint32_t probed_bar[MAX_NUM_BARS];
+};
+
+/*
+ * Tracks "Device Relations" messages from the host, which must be both
+ * processed in order.
+ */
+struct hv_dr_work {
+	struct task task;
+	struct hv_pcibus *bus;
+};
+
+struct hv_dr_state {
+	TAILQ_ENTRY(hv_dr_state) link;
+	uint32_t device_count;
+	struct pci_func_desc func[0];
+};
+
+struct hv_irq_desc {
+	TAILQ_ENTRY(hv_irq_desc) link;
+	struct tran_int_desc desc;
+	int irq;
+};
+
+#define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn)         ((devfn) & 0x07)
+
+static uint32_t
+devfn_to_wslot(unsigned int devfn)
+{
+	union win_slot_encoding wslot;
+
+	wslot.val = 0;
+	wslot.bits.slot = PCI_SLOT(devfn);
+	wslot.bits.func = PCI_FUNC(devfn);
+
+	return (wslot.val);
+}
+
+static unsigned int
+wslot_to_devfn(uint32_t wslot)
+{
+	union win_slot_encoding encoding;
+	unsigned int slot;
+	unsigned int func;
+
+	encoding.val = wslot;
+
+	slot = encoding.bits.slot;
+	func = encoding.bits.func;
+
+	return (PCI_DEVFN(slot, func));
+}
+
+struct vmbus_pcib_softc {
+	struct vmbus_channel	*chan;
+	void *rx_buf;
+
+	struct taskqueue	*taskq;
+
+	struct hv_pcibus	*hbus;
+};
+
+/* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
+static const struct hyperv_guid g_pass_through_dev_type = {
+	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
+	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
+};
+
+struct hv_pci_compl {
+	struct completion host_event;
+	int32_t completion_status;
+};
+
+struct q_res_req_compl {
+	struct completion host_event;
+	struct hv_pci_dev *hpdev;
+};
+
+struct compose_comp_ctxt {
+	struct hv_pci_compl comp_pkt;
+	struct tran_int_desc int_desc;
+};
+
+static void
+hv_pci_generic_compl(void *context, struct pci_response *resp,
+    int resp_packet_size)
+{
+	struct hv_pci_compl *comp_pkt = context;
+
+	if (resp_packet_size >= sizeof(struct pci_response))
+		comp_pkt->completion_status = resp->status;
+	else
+		comp_pkt->completion_status = -1;
+
+	complete(&comp_pkt->host_event);
+}
+
+static void
+q_resource_requirements(void *context, struct pci_response *resp,
+    int resp_packet_size)
+{
+	struct q_res_req_compl *completion = context;
+	struct pci_q_res_req_response *q_res_req =
+	    (struct pci_q_res_req_response *)resp;
+	int i;
+
+	if (resp->status < 0) {
+		printf("vmbus_pcib: failed to query resource requirements\n");
+	} else {
+		for (i = 0; i < MAX_NUM_BARS; i++)
+			completion->hpdev->probed_bar[i] =
+			    q_res_req->probed_bar[i];
+	}
+
+	complete(&completion->host_event);
+}
+
+static void
+hv_pci_compose_compl(void *context, struct pci_response *resp,
+    int resp_packet_size)
+{
+	struct compose_comp_ctxt *comp_pkt = context;
+	struct pci_create_int_response *int_resp =
+	    (struct pci_create_int_response *)resp;
+
+	comp_pkt->comp_pkt.completion_status = resp->status;
+	comp_pkt->int_desc = int_resp->int_desc;
+	complete(&comp_pkt->comp_pkt.host_event);
+}
+
+static void
+hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
+{
+	struct pci_delete_interrupt *int_pkt;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
+	} ctxt;
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
+	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
+	int_pkt->wslot.val = hpdev->desc.wslot.val;
+	int_pkt->int_desc = hid->desc;
+
+	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    int_pkt, sizeof(*int_pkt), 0);
+
+	free(hid, M_DEVBUF);
+}
+
+static void
+hv_pci_delete_device(struct hv_pci_dev *hpdev)
+{
+	struct hv_pcibus *hbus = hpdev->hbus;
+	struct hv_irq_desc *hid, *tmp_hid;
+	device_t pci_dev;
+	int devfn;
+
+	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
+
+	mtx_lock(&Giant);
+
+	pci_dev = pci_find_dbsf(hbus->pci_domain,
+	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
+	if (pci_dev)
+		device_delete_child(hbus->pci_bus, pci_dev);
+
+	mtx_unlock(&Giant);
+
+	mtx_lock(&hbus->device_list_lock);
+	TAILQ_REMOVE(&hbus->children, hpdev, link);
+	mtx_unlock(&hbus->device_list_lock);
+
+	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
+		hv_int_desc_free(hpdev, hid);
+
+	free(hpdev, M_DEVBUF);
+}
+
+static struct hv_pci_dev *
+new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc)
+{
+	struct hv_pci_dev *hpdev;
+	struct pci_child_message *res_req;
+	struct q_res_req_compl comp_pkt;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_child_message)];
+	} ctxt;
+	int ret;
+
+	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
+	hpdev->hbus = hbus;
+
+	TAILQ_INIT(&hpdev->irq_desc_list);
+
+	init_completion(&comp_pkt.host_event);
+	comp_pkt.hpdev = hpdev;
+
+	ctxt.pkt.compl_ctxt = &comp_pkt;
+	ctxt.pkt.completion_func = q_resource_requirements;
+
+	res_req = (struct pci_child_message *)&ctxt.pkt.message;
+	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
+	res_req->wslot.val = desc->wslot.val;
+
+	ret = vmbus_chan_send(hbus->sc->chan,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    res_req, sizeof(*res_req), (uint64_t)&ctxt.pkt);
+	if (ret)
+		goto err;
+
+	wait_for_completion(&comp_pkt.host_event);
+	free_completion(&comp_pkt.host_event);
+
+	hpdev->desc = *desc;
+
+	mtx_lock(&hbus->device_list_lock);
+	if (TAILQ_EMPTY(&hbus->children))
+		hbus->pci_domain = desc->ser & 0xFFFF;
+	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
+	mtx_unlock(&hbus->device_list_lock);
+	return (hpdev);
+err:
+	free_completion(&comp_pkt.host_event);
+	free(hpdev, M_DEVBUF);
+	return (NULL);
+}
+
+#if __FreeBSD_version < 1100000
+
+/* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */
+
+static struct pci_devinfo *
+pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
+    int slot, int func, size_t dinfo_size)
+{
+	struct pci_devinfo *dinfo;
+
+	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
+	if (dinfo != NULL)
+		pci_add_child(dev, dinfo);
+
+	return (dinfo);
+}
+
+static int
+pci_rescan(device_t dev)
+{
+#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
+	device_t pcib = device_get_parent(dev);
+	struct pci_softc *sc;
+	device_t child, *devlist, *unchanged;
+	int devcount, error, i, j, maxslots, oldcount;
+	int busno, domain, s, f, pcifunchigh;
+	uint8_t hdrtype;
+
+	/* No need to check for ARI on a rescan. */
+	error = device_get_children(dev, &devlist, &devcount);
+	if (error)
+		return (error);
+	if (devcount != 0) {
+		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
+		    M_NOWAIT | M_ZERO);
+		if (unchanged == NULL) {
+			free(devlist, M_TEMP);
+			return (ENOMEM);
+		}
+	} else
+		unchanged = NULL;
+
+	sc = device_get_softc(dev);
+	domain = pcib_get_domain(dev);
+	busno = pcib_get_bus(dev);
+	maxslots = PCIB_MAXSLOTS(pcib);
+	for (s = 0; s <= maxslots; s++) {
+		/* If function 0 is not present, skip to the next slot. */
+		f = 0;
+		if (REG(PCIR_VENDOR, 2) == 0xffff)
+			continue;
+		pcifunchigh = 0;
+		hdrtype = REG(PCIR_HDRTYPE, 1);
+		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
+			continue;
+		if (hdrtype & PCIM_MFDEV)
+			pcifunchigh = PCIB_MAXFUNCS(pcib);
+		for (f = 0; f <= pcifunchigh; f++) {
+			if (REG(PCIR_VENDOR, 2) == 0xffff)
+				continue;
+
+			/*
+			 * Found a valid function.  Check if a
+			 * device_t for this device already exists.
+			 */
+			for (i = 0; i < devcount; i++) {
+				child = devlist[i];
+				if (child == NULL)
+					continue;
+				if (pci_get_slot(child) == s &&
+				    pci_get_function(child) == f) {
+					unchanged[i] = child;
+					goto next_func;
+				}
+			}
+
+			pci_identify_function(pcib, dev, domain, busno, s, f,
+			    sizeof(struct pci_devinfo));
+		next_func:;
+		}
+	}
+
+	/* Remove devices that are no longer present. */
+	for (i = 0; i < devcount; i++) {
+		if (unchanged[i] != NULL)
+			continue;
+		device_delete_child(dev, devlist[i]);
+	}
+
+	free(devlist, M_TEMP);
+	oldcount = devcount;
+
+	/* Try to attach the devices just added. */
+	error = device_get_children(dev, &devlist, &devcount);
+	if (error) {
+		free(unchanged, M_TEMP);
+		return (error);
+	}
+
+	for (i = 0; i < devcount; i++) {
+		for (j = 0; j < oldcount; j++) {
+			if (devlist[i] == unchanged[j])
+				goto next_device;
+		}
+
+		device_probe_and_attach(devlist[i]);
+	next_device:;
+	}
+
+	free(unchanged, M_TEMP);
+	free(devlist, M_TEMP);
+	return (0);
+#undef REG
+}
+
+#else
+
+static int
+pci_rescan(device_t dev)
+{
+	return (BUS_RESCAN(dev));
+}
+
+#endif
+
+static void
+pci_devices_present_work(void *arg, int pending __unused)
+{
+	struct hv_dr_work *dr_wrk = arg;
+	struct hv_dr_state *dr = NULL;
+	struct hv_pcibus *hbus;
+	uint32_t child_no;
+	bool found;
+	struct pci_func_desc *new_desc;
+	struct hv_pci_dev *hpdev, *tmp_hpdev;
+	struct completion *query_comp;
+	bool need_rescan = false;
+
+	hbus = dr_wrk->bus;
+	free(dr_wrk, M_DEVBUF);
+
+	/* Pull this off the queue and process it if it was the last one. */
+	mtx_lock(&hbus->device_list_lock);
+	while (!TAILQ_EMPTY(&hbus->dr_list)) {
+		dr = TAILQ_FIRST(&hbus->dr_list);
+		TAILQ_REMOVE(&hbus->dr_list, dr, link);
+
+		/* Throw this away if the list still has stuff in it. */
+		if (!TAILQ_EMPTY(&hbus->dr_list)) {
+			free(dr, M_DEVBUF);
+			continue;
+		}
+	}
+	mtx_unlock(&hbus->device_list_lock);
+
+	if (!dr)
+		return;
+
+	/* First, mark all existing children as reported missing. */
+	mtx_lock(&hbus->device_list_lock);
+	TAILQ_FOREACH(hpdev, &hbus->children, link)
+		hpdev->reported_missing = true;
+	mtx_unlock(&hbus->device_list_lock);
+
+	/* Next, add back any reported devices. */
+	for (child_no = 0; child_no < dr->device_count; child_no++) {
+		found = false;
+		new_desc = &dr->func[child_no];
+
+		mtx_lock(&hbus->device_list_lock);
+		TAILQ_FOREACH(hpdev, &hbus->children, link) {
+			if ((hpdev->desc.wslot.val ==
+			    new_desc->wslot.val) &&
+			    (hpdev->desc.v_id == new_desc->v_id) &&
+			    (hpdev->desc.d_id == new_desc->d_id) &&
+			    (hpdev->desc.ser == new_desc->ser)) {
+				hpdev->reported_missing = false;
+				found = true;
+				break;
+			}
+		}
+		mtx_unlock(&hbus->device_list_lock);
+
+		if (!found) {
+			if (!need_rescan)
+				need_rescan = true;
+
+			hpdev = new_pcichild_device(hbus, new_desc);
+			if (!hpdev)
+				printf("vmbus_pcib: failed to add a child\n");
+		}
+	}
+
+	/* Remove missing device(s), if any */
+	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
+		if (hpdev->reported_missing)
+			hv_pci_delete_device(hpdev);
+	}
+
+	/* Rescan the bus to find any new device, if necessary. */
+	if (hbus->state == hv_pcibus_installed && need_rescan)
+		pci_rescan(hbus->pci_bus);
+
+	/* Wake up hv_pci_query_relations(), if it's waiting. */
+	query_comp = hbus->query_comp;
+	if (query_comp) {
+		hbus->query_comp = NULL;
+		complete(query_comp);
+	}
+
+	free(dr, M_DEVBUF);
+}
+
+static struct hv_pci_dev *
+get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
+{
+	struct hv_pci_dev *hpdev, *ret = NULL;
+
+	mtx_lock(&hbus->device_list_lock);
+	TAILQ_FOREACH(hpdev, &hbus->children, link) {
+		if (hpdev->desc.wslot.val == wslot) {
+			ret = hpdev;
+			break;
+		}
+	}
+	mtx_unlock(&hbus->device_list_lock);
+
+	return (ret);
+}
+
+static void
+hv_pci_devices_present(struct hv_pcibus *hbus,
+    struct pci_bus_relations *relations)
+{
+	struct hv_dr_state *dr;
+	struct hv_dr_work *dr_wrk;
+	unsigned long dr_size;
+
+	if (hbus->detaching && relations->device_count > 0)
+		return;
+
+	dr_size = offsetof(struct hv_dr_state, func) +
+	    (sizeof(struct pci_func_desc) * relations->device_count);
+	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
+
+	dr->device_count = relations->device_count;
+	if (dr->device_count != 0)
+		memcpy(dr->func, relations->func,
+		    sizeof(struct pci_func_desc) * dr->device_count);
+
+	mtx_lock(&hbus->device_list_lock);
+	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
+	mtx_unlock(&hbus->device_list_lock);
+
+	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
+	dr_wrk->bus = hbus;
+	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
+	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
+}
+
+static void
+hv_eject_device_work(void *arg, int pending __unused)
+{
+	struct hv_pci_dev *hpdev = arg;
+	union win_slot_encoding wslot = hpdev->desc.wslot;
+	struct hv_pcibus *hbus = hpdev->hbus;
+	struct pci_eject_response *eject_pkt;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_eject_response)];
+	} ctxt;
+
+	hv_pci_delete_device(hpdev);
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
+	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
+	eject_pkt->wslot.val = wslot.val;
+	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    eject_pkt, sizeof(*eject_pkt), 0);
+}
+
+static void
+hv_pci_eject_device(struct hv_pci_dev *hpdev)
+{
+	struct hv_pcibus *hbus = hpdev->hbus;
+	struct taskqueue *taskq;
+
+	if (hbus->detaching)
+		return;
+
+	/*
+	 * Push this task into the same taskqueue on which
+	 * vmbus_pcib_attach() runs, so we're sure this task can't run
+	 * concurrently with vmbus_pcib_attach().
+	 */
+	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
+	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
+	taskqueue_enqueue(taskq, &hpdev->eject_task);
+}
+
+#define PCIB_PACKET_SIZE	0x100
+
+static void
+vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
+{
+	struct vmbus_pcib_softc *sc = arg;
+	struct hv_pcibus *hbus = sc->hbus;
+
+	void *buffer;
+	int bufferlen = PCIB_PACKET_SIZE;
+
+	struct pci_packet *comp_packet;
+	struct pci_response *response;
+	struct pci_incoming_message *new_msg;
+	struct pci_bus_relations *bus_rel;
+	struct pci_dev_incoming *dev_msg;
+	struct hv_pci_dev *hpdev;
+
+	buffer = sc->rx_buf;
+	do {
+		struct vmbus_chanpkt_hdr *pkt = buffer;
+		uint32_t bytes_rxed;
+		int ret;
+
+		bytes_rxed = bufferlen;
+		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
+
+		if (ret == ENOBUFS) {
+			/* Handle large packet */
+			if (bufferlen > PCIB_PACKET_SIZE) {
+				free(buffer, M_DEVBUF);
+				buffer = NULL;
+			}
+
+			/* alloc new buffer */
+			buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
+			bufferlen = bytes_rxed;
+
+			continue;
+		}
+
+		if (ret != 0) {
+			/* ignore EIO or EAGAIN */
+			break;
+		}
+
+		if (bytes_rxed <= sizeof(struct pci_response))
+			continue;
+
+		switch (pkt->cph_type) {
+		case VMBUS_CHANPKT_TYPE_COMP:
+			comp_packet = (struct pci_packet *)pkt->cph_xactid;
+			response = (struct pci_response *)pkt;
+			comp_packet->completion_func(comp_packet->compl_ctxt,
+			    response, bytes_rxed);
+			break;
+		case VMBUS_CHANPKT_TYPE_INBAND:
+			new_msg = (struct pci_incoming_message *)buffer;
+
+			switch (new_msg->message_type.type) {
+			case PCI_BUS_RELATIONS:
+				bus_rel = (struct pci_bus_relations *)buffer;
+
+				if (bus_rel->device_count == 0)
+					break;
+
+				if (bytes_rxed <
+				    offsetof(struct pci_bus_relations, func) +
+				        (sizeof(struct pci_func_desc) *
+				            (bus_rel->device_count)))
+					break;
+
+				hv_pci_devices_present(hbus, bus_rel);
+				break;
+
+			case PCI_EJECT:
+				dev_msg = (struct pci_dev_incoming *)buffer;
+				hpdev = get_pcichild_wslot(hbus,
+				    dev_msg->wslot.val);
+
+				if (hpdev)
+					hv_pci_eject_device(hpdev);
+
+				break;
+			default:
+				printf("vmbus_pcib: Unknown msg type 0x%x\n",
+				    new_msg->message_type.type);
+				break;
+			}
+			break;
+		default:
+			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
+			    pkt->cph_type);
+			break;
+		}
+	} while (1);
+
+	if (bufferlen > PCIB_PACKET_SIZE)
+		free(buffer, M_DEVBUF);
+}
+
+static int
+hv_pci_protocol_negotiation(struct hv_pcibus *hbus)
+{
+	struct pci_version_request *version_req;
+	struct hv_pci_compl comp_pkt;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_version_request)];
+	} ctxt;
+	int ret;
+
+	init_completion(&comp_pkt.host_event);
+
+	ctxt.pkt.completion_func = hv_pci_generic_compl;
+	ctxt.pkt.compl_ctxt = &comp_pkt;
+	version_req = (struct pci_version_request *)&ctxt.pkt.message;
+	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
+	version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT;
+	version_req->is_last_attempt = 1;
+
+	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
+	    VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req),
+	    (uint64_t)&ctxt.pkt);
+	if (ret)
+		goto out;
+
+	wait_for_completion(&comp_pkt.host_event);
+
+	if (comp_pkt.completion_status < 0) {
+		device_printf(hbus->pcib,
+		    "vmbus_pcib version negotiation failed: %x\n",
+		    comp_pkt.completion_status);
+		ret = EPROTO;
+	} else {
+		ret = 0;
+	}
+out:
+	free_completion(&comp_pkt.host_event);
+	return (ret);
+}
+
+/* Ask the host to send along the list of child devices */
+static int
+hv_pci_query_relations(struct hv_pcibus *hbus)
+{
+	struct pci_message message;
+	int ret;
+
+	message.type = PCI_QUERY_BUS_RELATIONS;
+	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    &message, sizeof(message), 0);
+	return (ret);
+}
+
+static int
+hv_pci_enter_d0(struct hv_pcibus *hbus)
+{
+	struct pci_bus_d0_entry *d0_entry;
+	struct hv_pci_compl comp_pkt;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
+	} ctxt;
+	int ret;
+
+	/*
+	 * Tell the host that the bus is ready to use, and moved into the
+	 * powered-on state.  This includes telling the host which region
+	 * of memory-mapped I/O space has been chosen for configuration space
+	 * access.
+	 */
+	init_completion(&comp_pkt.host_event);
+
+	ctxt.pkt.completion_func = hv_pci_generic_compl;
+	ctxt.pkt.compl_ctxt = &comp_pkt;
+
+	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
+	memset(d0_entry, 0, sizeof(*d0_entry));
+	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
+	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
+
+	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
+	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
+	    (uint64_t)&ctxt.pkt);
+	if (ret)
+		goto out;
+
+	wait_for_completion(&comp_pkt.host_event);
+
+	if (comp_pkt.completion_status < 0) {
+		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
+		ret = EPROTO;
+	} else {
+		ret = 0;
+	}
+
+out:
+	free_completion(&comp_pkt.host_event);
+	return (ret);
+}
+
+/*
+ * It looks this is only needed by Windows VM, but let's send the message too
+ * just to make the host happy.
+ */
+static int
+hv_send_resources_allocated(struct hv_pcibus *hbus)
+{
+	struct pci_resources_assigned *res_assigned;
+	struct hv_pci_compl comp_pkt;
+	struct hv_pci_dev *hpdev;
+	struct pci_packet *pkt;
+	uint32_t wslot;
+	int ret = 0;
+
+	pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned),
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+
+	for (wslot = 0; wslot < 256; wslot++) {
+		hpdev = get_pcichild_wslot(hbus, wslot);
+		if (!hpdev)
+			continue;
+
+		init_completion(&comp_pkt.host_event);
+
+		memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned));
+		pkt->completion_func = hv_pci_generic_compl;
+		pkt->compl_ctxt = &comp_pkt;
+
+		res_assigned = (struct pci_resources_assigned *)&pkt->message;
+		res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED;
+		res_assigned->wslot.val = hpdev->desc.wslot.val;
+
+		ret = vmbus_chan_send(hbus->sc->chan,
+		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+		    &pkt->message, sizeof(*res_assigned), (uint64_t)pkt);
+		if (ret) {
+			free_completion(&comp_pkt.host_event);
+			break;
+		}
+
+		wait_for_completion(&comp_pkt.host_event);
+		free_completion(&comp_pkt.host_event);
+
+		if (comp_pkt.completion_status < 0) {
+			ret = EPROTO;
+			device_printf(hbus->pcib,
+			    "failed to send PCI_RESOURCES_ASSIGNED\n");
+			break;
+		}
+	}
+
+	free(pkt, M_DEVBUF);
+	return (ret);
+}
+
+static int
+hv_send_resources_released(struct hv_pcibus *hbus)
+{
+	struct pci_child_message pkt;
+	struct hv_pci_dev *hpdev;
+	uint32_t wslot;
+	int ret;
+
+	for (wslot = 0; wslot < 256; wslot++) {
+		hpdev = get_pcichild_wslot(hbus, wslot);
+		if (!hpdev)
+			continue;
+
+		pkt.message_type.type = PCI_RESOURCES_RELEASED;
+		pkt.wslot.val = hpdev->desc.wslot.val;
+
+		ret = vmbus_chan_send(hbus->sc->chan,
+		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
+		if (ret)
+			return (ret);
+	}
+
+	return (0);
+}
+
+#define hv_cfg_read(x, s)						\
+static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
+    bus_size_t offset)							\
+{									\
+	return (bus_read_##s(bus->cfg_res, offset));			\
+}
+
+#define hv_cfg_write(x, s)						\
+static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
+    bus_size_t offset, uint##x##_t val)					\
+{									\
+	return (bus_write_##s(bus->cfg_res, offset, val));		\
+}
+
+hv_cfg_read(8, 1)
+hv_cfg_read(16, 2)
+hv_cfg_read(32, 4)
+
+hv_cfg_write(8, 1)
+hv_cfg_write(16, 2)
+hv_cfg_write(32, 4)
+
+static void
+_hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
+    uint32_t *val)
+{
+	struct hv_pcibus *hbus = hpdev->hbus;
+	bus_size_t addr = CFG_PAGE_OFFSET + where;
+
+	/*
+	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
+	 */
+	if (where + size <= PCIR_COMMAND) {
+		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
+	} else if (where >= PCIR_REVID && where + size <=
+		   PCIR_CACHELNSZ) {
+		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
+		       PCIR_REVID, size);
+	} else if (where >= PCIR_SUBVEND_0 && where + size <=
+		   PCIR_BIOS) {
+		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
+		       PCIR_SUBVEND_0, size);
+	} else if (where >= PCIR_BIOS && where + size <=
+		   PCIR_CAP_PTR) {
+		/* ROM BARs are unimplemented */
+		*val = 0;
+	} else if ((where >= PCIR_INTLINE && where + size <=
+		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
+		/*
+		 * Interrupt Line and Interrupt PIN are hard-wired to zero
+		 * because this front-end only supports message-signaled
+		 * interrupts.
+		 */
+		*val = 0;
+	} else if (where + size <= CFG_PAGE_SIZE) {
+		mtx_lock(&hbus->config_lock);
+
+		/* Choose the function to be read. */
+		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
+
+		/* Make sure the function was chosen before we start reading.*/
+		mb();
+
+		/* Read from that function's config space. */
+		switch (size) {
+		case 1:
+			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
+			break;
+		case 2:
+			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
+			break;
+		default:
+			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
+			break;
+		}
+		/*
+		 * Make sure the write was done before we release the lock,
+		 * allowing consecutive reads/writes.
+		 */
+		mb();
+
+		mtx_unlock(&hbus->config_lock);
+	} else {
+		/* Invalid config read: it's unlikely to reach here. */
+		memset(val, 0, size);
+	}
+}
+
+static void
+_hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
+    uint32_t val)
+{
+	struct hv_pcibus *hbus = hpdev->hbus;
+	bus_size_t addr = CFG_PAGE_OFFSET + where;
+
+	/* SSIDs and ROM BARs are read-only */
+	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
+		return;
+
+	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
+		mtx_lock(&hbus->config_lock);
+
+		/* Choose the function to be written. */
+		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
+
+		/* Make sure the function was chosen before we start writing.*/
+		wmb();
+
+		/* Write to that function's config space. */
+		switch (size) {
+		case 1:
+			hv_cfg_write_1(hbus, addr, (uint8_t)val);
+			break;
+		case 2:
+			hv_cfg_write_2(hbus, addr, (uint16_t)val);
+			break;
+		default:
+			hv_cfg_write_4(hbus, addr, (uint32_t)val);
+			break;
+		}
+
+		/*
+		 * Make sure the write was done before we release the lock,
+		 * allowing consecutive reads/writes.
+		 */
+		mb();
+
+		mtx_unlock(&hbus->config_lock);
+	} else {
+		/* Invalid config write: it's unlikely to reach here. */
+		return;
+	}
+}
+
+static void
+vmbus_pcib_set_detaching(void *arg, int pending __unused)
+{
+	struct hv_pcibus *hbus = arg;
+
+	atomic_set_int(&hbus->detaching, 1);
+}
+
+static void
+vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
+{
+	struct task task;
+
+	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
+
+	/*
+	 * Make sure the channel callback won't push any possible new
+	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
+	 */
+	vmbus_chan_run_task(hbus->sc->chan, &task);
+
+	taskqueue_drain_all(hbus->sc->taskq);
+}
+
+
+/*
+ * Standard probe entry point.
+ *
+ */
+static int
+vmbus_pcib_probe(device_t dev)
+{
+	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
+	    &g_pass_through_dev_type) == 0) {
+		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
+		return (BUS_PROBE_DEFAULT);
+	}
+	return (ENXIO);
+}
+
+/*
+ * Standard attach entry point.
+ *
+ */
+static int
+vmbus_pcib_attach(device_t dev)
+{
+	const int pci_ring_size = (4 * PAGE_SIZE);
+	const struct hyperv_guid *inst_guid;
+	struct vmbus_channel *channel;
+	struct vmbus_pcib_softc *sc;
+	struct hv_pcibus *hbus;
+	int rid = 0;
+	int ret;
+
+	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
+	hbus->pcib = dev;
+
+	channel = vmbus_get_channel(dev);
+	inst_guid = vmbus_chan_guid_inst(channel);
+	hbus->pci_domain = inst_guid->hv_guid[9] |
+			  (inst_guid->hv_guid[8] << 8);
+
+	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
+	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
+	TAILQ_INIT(&hbus->children);
+	TAILQ_INIT(&hbus->dr_list);
+
+	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
+	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
+	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
+
+	if (!hbus->cfg_res) {
+		device_printf(dev, "failed to get resource for cfg window\n");
+		ret = ENXIO;
+		goto free_bus;
+	}
+
+	sc = device_get_softc(dev);
+	sc->chan = channel;
+	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
+	sc->hbus = hbus;
+
+	/*
+	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
+	 * messages. NB: we can't handle the messages in the channel callback
+	 * directly, because the message handlers need to send new messages
+	 * to the host and waits for the host's completion messages, which
+	 * must also be handled by the channel callback.
+	 */
+	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
+	    taskqueue_thread_enqueue, &sc->taskq);
+	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
+
+	hbus->sc = sc;
+
+	init_completion(&hbus->query_completion);
+	hbus->query_comp = &hbus->query_completion;
+
+	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
+		NULL, 0, vmbus_pcib_on_channel_callback, sc);
+	if (ret)
+		goto free_res;
+
+	ret = hv_pci_protocol_negotiation(hbus);
+	if (ret)
+		goto vmbus_close;
+
+	ret = hv_pci_query_relations(hbus);
+	if (ret)
+		goto vmbus_close;
+	wait_for_completion(hbus->query_comp);
+
+	ret = hv_pci_enter_d0(hbus);
+	if (ret)
+		goto vmbus_close;
+
+	ret = hv_send_resources_allocated(hbus);
+	if (ret)
+		goto vmbus_close;
+
+	hbus->pci_bus = device_add_child(dev, "pci", -1);
+	if (!hbus->pci_bus) {
+		device_printf(dev, "failed to create pci bus\n");
+		ret = ENXIO;
+		goto vmbus_close;
+	}
+
+	bus_generic_attach(dev);
+
+	hbus->state = hv_pcibus_installed;
+
+	return (0);
+
+vmbus_close:
+	vmbus_pcib_pre_detach(hbus);
+	vmbus_chan_close(sc->chan);
+free_res:
+	taskqueue_free(sc->taskq);
+	free_completion(&hbus->query_completion);
+	free(sc->rx_buf, M_DEVBUF);
+	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
+free_bus:
+	mtx_destroy(&hbus->device_list_lock);
+	mtx_destroy(&hbus->config_lock);
+	free(hbus, M_DEVBUF);
+	return (ret);
+}
+
+/*
+ * Standard detach entry point
+ */
+static int
+vmbus_pcib_detach(device_t dev)
+{
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+	struct hv_pcibus *hbus = sc->hbus;
+	struct pci_message teardown_packet;
+	struct pci_bus_relations relations;
+	int ret;
+
+	vmbus_pcib_pre_detach(hbus);
+
+	if (hbus->state == hv_pcibus_installed)
+		bus_generic_detach(dev);
+
+	/* Delete any children which might still exist. */
+	memset(&relations, 0, sizeof(relations));
+	hv_pci_devices_present(hbus, &relations);
+
+	ret = hv_send_resources_released(hbus);
+	if (ret)
+		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
+
+	teardown_packet.type = PCI_BUS_D0EXIT;
+	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    &teardown_packet, sizeof(struct pci_message), 0);
+	if (ret)
+		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
+
+	taskqueue_drain_all(hbus->sc->taskq);
+	vmbus_chan_close(sc->chan);
+	taskqueue_free(sc->taskq);
+
+	free_completion(&hbus->query_completion);
+	free(sc->rx_buf, M_DEVBUF);
+	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
+
+	mtx_destroy(&hbus->device_list_lock);
+	mtx_destroy(&hbus->config_lock);
+	free(hbus, M_DEVBUF);
+
+	return (0);
+}
+
+static int
+vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
+{
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+
+	switch (which) {
+	case PCIB_IVAR_DOMAIN:
+		*val = sc->hbus->pci_domain;
+		return (0);
+
+	case PCIB_IVAR_BUS:
+		/* There is only bus 0. */
+		*val = 0;
+		return (0);
+	}
+	return (ENOENT);
+}
+
+static int
+vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
+{
+	return (ENOENT);
+}
+
+static struct resource *
+vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
+	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+	unsigned int bar_no;
+	struct hv_pci_dev *hpdev;
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+	struct resource *res;
+	unsigned int devfn;
+
+	if (type == PCI_RES_BUS)
+		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
+		    start, end, count, flags));
+
+	/* Devices with port I/O BAR are not supported. */
+	if (type == SYS_RES_IOPORT)
+		return (NULL);
+
+	if (type == SYS_RES_MEMORY) {
+		devfn = PCI_DEVFN(pci_get_slot(child),
+		    pci_get_function(child));
+		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
+		if (!hpdev)
+			return (NULL);
+
+		bar_no = PCI_RID2BAR(*rid);
+		if (bar_no >= MAX_NUM_BARS)
+			return (NULL);
+
+		/* Make sure a 32-bit BAR gets a 32-bit address */
+		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
+			end = ulmin(end, 0xFFFFFFFF);
+	}
+
+	res = bus_generic_alloc_resource(dev, child, type, rid,
+		start, end, count, flags);
+	/*
+	 * If this is a request for a specific range, assume it is
+	 * correct and pass it up to the parent.
+	 */
+	if (res == NULL && start + count - 1 == end)
+		res = bus_generic_alloc_resource(dev, child, type, rid,
+		    start, end, count, flags);
+	return (res);
+}
+
+static int
+vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
+    struct resource *r)
+{
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+
+	if (type == PCI_RES_BUS)
+		return (pci_domain_release_bus(sc->hbus->pci_domain, child,
+		    rid, r));
+
+	if (type == SYS_RES_IOPORT)
+		return (EINVAL);
+
+	return (bus_generic_release_resource(dev, child, type, rid, r));
+}
+
+#if __FreeBSD_version >= 1100000
+static int
+vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
+    size_t setsize, cpuset_t *cpuset)
+{
+	return (bus_get_cpus(pcib, op, setsize, cpuset));
+}
+#endif
+
+static uint32_t
+vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
+    u_int reg, int bytes)
+{
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+	struct hv_pci_dev *hpdev;
+	unsigned int devfn = PCI_DEVFN(slot, func);
+	uint32_t data = 0;
+
+	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
+
+	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
+	if (!hpdev)
+		return (~0);
+
+	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
+
+	return (data);
+}
+
+static void
+vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
+    u_int reg, uint32_t data, int bytes)
+{
+	struct vmbus_pcib_softc *sc = device_get_softc(dev);
+	struct hv_pci_dev *hpdev;
+	unsigned int devfn = PCI_DEVFN(slot, func);
+
+	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
+
+	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
+	if (!hpdev)
+		return;
+
+	_hv_pcifront_write_config(hpdev, reg, bytes, data);
+}
+
+static int
+vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
+{
+	/* We only support MSI/MSI-X and don't support INTx interrupt. */
+	return (PCI_INVALID_IRQ);
+}
+
+static int
+vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
+    int maxcount, int *irqs)
+{
+	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
+	    irqs));
+}
+
+static int
+vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
+{
+	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
+}
+
+static int
+vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
+{
+	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
+}
+
+static int
+vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
+{
+	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
+}
+
+#define	MSI_INTEL_ADDR_DEST	0x000ff000
+#define	MSI_INTEL_DATA_INTVEC	IOART_INTVEC	/* Interrupt vector. */
+#define	MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
+
+static int
+vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
+    uint64_t *addr, uint32_t *data)
+{
+	unsigned int devfn;
+	struct hv_pci_dev *hpdev;
+
+	uint64_t v_addr;
+	uint32_t v_data;
+	struct hv_irq_desc *hid, *tmp_hid;
+	unsigned int cpu, vcpu_id;
+	unsigned int vector;
+
+	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
+	struct pci_create_interrupt *int_pkt;
+	struct compose_comp_ctxt comp;
+	struct {
+		struct pci_packet pkt;
+		uint8_t buffer[sizeof(struct pci_create_interrupt)];
+	} ctxt;
+
+	int ret;
+
+	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
+	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
+	if (!hpdev)
+		return (ENOENT);
+
+	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
+	    &v_addr, &v_data);
+	if (ret)
+		return (ret);
+
+	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
+		if (hid->irq == irq) {
+			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
+			hv_int_desc_free(hpdev, hid);
+			break;
+		}
+	}
+
+	cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
+	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
+	vector = v_data & MSI_INTEL_DATA_INTVEC;
+
+	init_completion(&comp.comp_pkt.host_event);
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.pkt.completion_func = hv_pci_compose_compl;
+	ctxt.pkt.compl_ctxt = ∁
+
+	int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message;
+	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
+	int_pkt->wslot.val = hpdev->desc.wslot.val;
+	int_pkt->int_desc.vector = vector;
+	int_pkt->int_desc.vector_count = 1;
+	int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED;
+	int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id;
+
+	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
+	    VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt),
+	    (uint64_t)&ctxt.pkt);
+	if (ret) {
+		free_completion(&comp.comp_pkt.host_event);
+		return (ret);
+	}
+
+	wait_for_completion(&comp.comp_pkt.host_event);
+	free_completion(&comp.comp_pkt.host_event);
+
+	if (comp.comp_pkt.completion_status < 0)
+		return (EPROTO);
+
+	*addr = comp.int_desc.address;
+	*data = comp.int_desc.data;
+
+	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
+	hid->irq = irq;
+	hid->desc = comp.int_desc;
+	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
+
+	return (0);
+}
+
+static device_method_t vmbus_pcib_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,         vmbus_pcib_probe),
+	DEVMETHOD(device_attach,        vmbus_pcib_attach),
+	DEVMETHOD(device_detach,        vmbus_pcib_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+
+	/* Bus interface */
+	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
+	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
+	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
+	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
+	DEVMETHOD(bus_activate_resource,   bus_generic_activate_resource),
+	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
+	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
+#if __FreeBSD_version >= 1100000
+	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
+#endif
+
+	/* pcib interface */
+	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
+	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
+	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
+	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
+	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
+	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
+	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
+	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
+	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
+
+	DEVMETHOD_END
+};
+
+static devclass_t pcib_devclass;
+
+DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
+		sizeof(struct vmbus_pcib_softc));
+DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0);
+MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
+MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
+
+#endif /* NEW_PCIB */


Property changes on: trunk/sys/dev/hyperv/pcib/vmbus_pcib.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- trunk/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,2390 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
+ * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
+ * converted into VSCSI protocol messages which are delivered to the parent
+ * partition StorVSP driver over the Hyper-V VMBUS.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c 332904 2018-04-24 03:07:49Z dexuan $");
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+#include <sys/bus.h>
+#include <sys/mutex.h>
+#include <sys/callout.h>
+#include <sys/smp.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/uma.h>
+#include <sys/lock.h>
+#include <sys/sema.h>
+#include <sys/sglist.h>
+#include <sys/eventhandler.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_periph.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_xpt_internal.h>
+#include <cam/cam_debug.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include "hv_vstorage.h"
+#include "vmbus_if.h"
+
+#define STORVSC_MAX_LUNS_PER_TARGET	(64)
+#define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
+#define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
+#define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
+#define STORVSC_MAX_TARGETS		(2)
+
+#define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
+
+/*
+ * 33 segments are needed to allow 128KB maxio, in case the data
+ * in the first page is _not_ PAGE_SIZE aligned, e.g.
+ *
+ *     |<----------- 128KB ----------->|
+ *     |                               |
+ *  0  2K 4K    8K   16K   124K  128K  130K
+ *  |  |  |     |     |       |     |  |
+ *  +--+--+-----+-----+.......+-----+--+--+
+ *  |  |  |     |     |       |     |  |  | DATA
+ *  |  |  |     |     |       |     |  |  |
+ *  +--+--+-----+-----+.......------+--+--+
+ *     |  |                         |  |
+ *     | 1|            31           | 1| ...... # of segments
+ */
+#define STORVSC_DATA_SEGCNT_MAX		33
+#define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
+#define STORVSC_DATA_SIZE_MAX		\
+	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
+
+struct storvsc_softc;
+
+struct hv_sgl_node {
+	LIST_ENTRY(hv_sgl_node) link;
+	struct sglist *sgl_data;
+};
+
+struct hv_sgl_page_pool{
+	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
+	LIST_HEAD(, hv_sgl_node) free_sgl_list;
+	boolean_t                is_init;
+} g_hv_sgl_page_pool;
+
+enum storvsc_request_type {
+	WRITE_TYPE,
+	READ_TYPE,
+	UNKNOWN_TYPE
+};
+
+SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+	"Hyper-V storage interface");
+
+static u_int hv_storvsc_use_win8ext_flags = 1;
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
+	&hv_storvsc_use_win8ext_flags, 0,
+	"Use win8 extension flags or not");
+
+static u_int hv_storvsc_use_pim_unmapped = 1;
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
+	&hv_storvsc_use_pim_unmapped, 0,
+	"Optimize storvsc by using unmapped I/O");
+
+static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
+	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
+
+static u_int hv_storvsc_max_io = 512;
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
+	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
+
+static int hv_storvsc_chan_cnt = 0;
+SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
+	&hv_storvsc_chan_cnt, 0, "# of channels to use");
+
+#define STORVSC_MAX_IO						\
+	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
+	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
+
+struct hv_storvsc_sysctl {
+	u_long		data_bio_cnt;
+	u_long		data_vaddr_cnt;
+	u_long		data_sg_cnt;
+	u_long		chan_send_cnt[MAXCPU];
+};
+
+struct storvsc_gpa_range {
+	struct vmbus_gpa_range	gpa_range;
+	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
+} __packed;
+
+struct hv_storvsc_request {
+	LIST_ENTRY(hv_storvsc_request)	link;
+	struct vstor_packet		vstor_packet;
+	int				prp_cnt;
+	struct storvsc_gpa_range	prp_list;
+	void				*sense_data;
+	uint8_t				sense_info_len;
+	uint8_t				retries;
+	union ccb			*ccb;
+	struct storvsc_softc		*softc;
+	struct callout			callout;
+	struct sema			synch_sema; /*Synchronize the request/response if needed */
+	struct sglist			*bounce_sgl;
+	unsigned int			bounce_sgl_count;
+	uint64_t			not_aligned_seg_bits;
+	bus_dmamap_t			data_dmap;
+};
+
+struct storvsc_softc {
+	struct vmbus_channel		*hs_chan;
+	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
+	struct mtx			hs_lock;
+	struct storvsc_driver_props	*hs_drv_props;
+	int 				hs_unit;
+	uint32_t			hs_frozen;
+	struct cam_sim			*hs_sim;
+	struct cam_path 		*hs_path;
+	uint32_t			hs_num_out_reqs;
+	boolean_t			hs_destroy;
+	boolean_t			hs_drain_notify;
+	struct sema 			hs_drain_sema;	
+	struct hv_storvsc_request	hs_init_req;
+	struct hv_storvsc_request	hs_reset_req;
+	device_t			hs_dev;
+	bus_dma_tag_t			storvsc_req_dtag;
+	struct hv_storvsc_sysctl	sysctl_data;
+	uint32_t			hs_nchan;
+	struct vmbus_channel		*hs_sel_chan[MAXCPU];
+};
+
+static eventhandler_tag storvsc_handler_tag;
+/*
+ * The size of the vmscsi_request has changed in win8. The
+ * additional size is for the newly added elements in the
+ * structure. These elements are valid only when we are talking
+ * to a win8 host.
+ * Track the correct size we need to apply.
+ */
+static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
+
+/**
+ * HyperV storvsc timeout testing cases:
+ * a. IO returned after first timeout;
+ * b. IO returned after second timeout and queue freeze;
+ * c. IO returned while timer handler is running
+ * The first can be tested by "sg_senddiag -vv /dev/daX",
+ * and the second and third can be done by
+ * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
+ */
+#define HVS_TIMEOUT_TEST 0
+
+/*
+ * Bus/adapter reset functionality on the Hyper-V host is
+ * buggy and it will be disabled until
+ * it can be further tested.
+ */
+#define HVS_HOST_RESET 0
+
+struct storvsc_driver_props {
+	char		*drv_name;
+	char		*drv_desc;
+	uint8_t		drv_max_luns_per_target;
+	uint32_t	drv_max_ios_per_target;
+	uint32_t	drv_ringbuffer_size;
+};
+
+enum hv_storage_type {
+	DRIVER_BLKVSC,
+	DRIVER_STORVSC,
+	DRIVER_UNKNOWN
+};
+
+#define HS_MAX_ADAPTERS 10
+
+#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
+
+/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
+static const struct hyperv_guid gStorVscDeviceType={
+	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
+		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
+};
+
+/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
+static const struct hyperv_guid gBlkVscDeviceType={
+	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
+		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
+};
+
+static struct storvsc_driver_props g_drv_props_table[] = {
+	{"blkvsc", "Hyper-V IDE",
+	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
+	 20*PAGE_SIZE},
+	{"storvsc", "Hyper-V SCSI",
+	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
+	 20*PAGE_SIZE}
+};
+
+/*
+ * Sense buffer size changed in win8; have a run-time
+ * variable to track the size we should use.
+ */
+static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
+
+/*
+ * The storage protocol version is determined during the
+ * initial exchange with the host.  It will indicate which
+ * storage functionality is available in the host.
+*/
+static int vmstor_proto_version;
+
+struct vmstor_proto {
+        int proto_version;
+        int sense_buffer_size;
+        int vmscsi_size_delta;
+};
+
+static const struct vmstor_proto vmstor_proto_list[] = {
+        {
+                VMSTOR_PROTOCOL_VERSION_WIN10,
+                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
+                0
+        },
+        {
+                VMSTOR_PROTOCOL_VERSION_WIN8_1,
+                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
+                0
+        },
+        {
+                VMSTOR_PROTOCOL_VERSION_WIN8,
+                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
+                0
+        },
+        {
+                VMSTOR_PROTOCOL_VERSION_WIN7,
+                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
+                sizeof(struct vmscsi_win8_extension),
+        },
+        {
+                VMSTOR_PROTOCOL_VERSION_WIN6,
+                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
+                sizeof(struct vmscsi_win8_extension),
+        }
+};
+
+/* static functions */
+static int storvsc_probe(device_t dev);
+static int storvsc_attach(device_t dev);
+static int storvsc_detach(device_t dev);
+static void storvsc_poll(struct cam_sim * sim);
+static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
+static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
+static enum hv_storage_type storvsc_get_storage_type(device_t dev);
+static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
+static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
+static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
+					struct vstor_packet *vstor_packet,
+					struct hv_storvsc_request *request);
+static int hv_storvsc_connect_vsp(struct storvsc_softc *);
+static void storvsc_io_done(struct hv_storvsc_request *reqp);
+static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+				bus_dma_segment_t *orig_sgl,
+				unsigned int orig_sgl_count,
+				uint64_t seg_bits);
+void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				unsigned int dest_sgl_count,
+				struct sglist* src_sgl,
+				uint64_t seg_bits);
+
+static device_method_t storvsc_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		storvsc_probe),
+	DEVMETHOD(device_attach,	storvsc_attach),
+	DEVMETHOD(device_detach,	storvsc_detach),
+	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
+	DEVMETHOD_END
+};
+
+static driver_t storvsc_driver = {
+	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
+};
+
+static devclass_t storvsc_devclass;
+DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
+MODULE_VERSION(storvsc, 1);
+MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
+
+static void
+storvsc_subchan_attach(struct storvsc_softc *sc,
+    struct vmbus_channel *new_channel)
+{
+	struct vmstor_chan_props props;
+	int ret = 0;
+
+	memset(&props, 0, sizeof(props));
+
+	vmbus_chan_cpu_rr(new_channel);
+	ret = vmbus_chan_open(new_channel,
+	    sc->hs_drv_props->drv_ringbuffer_size,
+  	    sc->hs_drv_props->drv_ringbuffer_size,
+	    (void *)&props,
+	    sizeof(struct vmstor_chan_props),
+	    hv_storvsc_on_channel_callback, sc);
+}
+
+/**
+ * @brief Send multi-channel creation request to host
+ *
+ * @param device  a Hyper-V device pointer
+ * @param max_chans  the max channels supported by vmbus
+ */
+static void
+storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
+{
+	struct vmbus_channel **subchan;
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;	
+	int request_subch;
+	int ret, i;
+
+	/* get sub-channel count that need to create */
+	request_subch = MIN(max_subch, mp_ncpus - 1);
+
+	request = &sc->hs_init_req;
+
+	/* request the host to create multi-channel */
+	memset(request, 0, sizeof(struct hv_storvsc_request));
+	
+	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+	vstor_packet = &request->vstor_packet;
+	
+	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+	vstor_packet->u.multi_channels_cnt = request_subch;
+
+	ret = vmbus_chan_send(sc->hs_chan,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+
+	sema_wait(&request->synch_sema);
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0) {		
+		printf("Storvsc_error: create multi-channel invalid operation "
+		    "(%d) or statue (%u)\n",
+		    vstor_packet->operation, vstor_packet->status);
+		return;
+	}
+
+	/* Update channel count */
+	sc->hs_nchan = request_subch + 1;
+
+	/* Wait for sub-channels setup to complete. */
+	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
+
+	/* Attach the sub-channels. */
+	for (i = 0; i < request_subch; ++i)
+		storvsc_subchan_attach(sc, subchan[i]);
+
+	/* Release the sub-channels. */
+	vmbus_subchan_rel(subchan, request_subch);
+
+	if (bootverbose)
+		printf("Storvsc create multi-channel success!\n");
+}
+
+/**
+ * @brief initialize channel connection to parent partition
+ *
+ * @param dev  a Hyper-V device pointer
+ * @returns  0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_channel_init(struct storvsc_softc *sc)
+{
+	int ret = 0, i;
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;
+	uint16_t max_subch;
+	boolean_t support_multichannel;
+	uint32_t version;
+
+	max_subch = 0;
+	support_multichannel = FALSE;
+
+	request = &sc->hs_init_req;
+	memset(request, 0, sizeof(struct hv_storvsc_request));
+	vstor_packet = &request->vstor_packet;
+	request->softc = sc;
+
+	/**
+	 * Initiate the vsc/vsp initialization protocol on the open channel
+	 */
+	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+
+	ret = vmbus_chan_send(sc->hs_chan,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+
+	if (ret != 0)
+		goto cleanup;
+
+	sema_wait(&request->synch_sema);
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+		vstor_packet->status != 0) {
+		goto cleanup;
+	}
+
+	for (i = 0; i < nitems(vmstor_proto_list); i++) {
+		/* reuse the packet for version range supported */
+
+		memset(vstor_packet, 0, sizeof(struct vstor_packet));
+		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
+		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+		vstor_packet->u.version.major_minor =
+			vmstor_proto_list[i].proto_version;
+
+		/* revision is only significant for Windows guests */
+		vstor_packet->u.version.revision = 0;
+
+		ret = vmbus_chan_send(sc->hs_chan,
+		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+
+		if (ret != 0)
+			goto cleanup;
+
+		sema_wait(&request->synch_sema);
+
+		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
+			ret = EINVAL;
+			goto cleanup;	
+		}
+		if (vstor_packet->status == 0) {
+			vmstor_proto_version =
+				vmstor_proto_list[i].proto_version;
+			sense_buffer_size =
+				vmstor_proto_list[i].sense_buffer_size;
+			vmscsi_size_delta =
+				vmstor_proto_list[i].vmscsi_size_delta;
+			break;
+		}
+	}
+
+	if (vstor_packet->status != 0) {
+		ret = EINVAL;
+		goto cleanup;
+	}
+	/**
+	 * Query channel properties
+	 */
+	memset(vstor_packet, 0, sizeof(struct vstor_packet));
+	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+	ret = vmbus_chan_send(sc->hs_chan,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+
+	if ( ret != 0)
+		goto cleanup;
+
+	sema_wait(&request->synch_sema);
+
+	/* TODO: Check returned version */
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0) {
+		goto cleanup;
+	}
+
+	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
+	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
+		max_subch = hv_storvsc_chan_cnt - 1;
+
+	/* multi-channels feature is supported by WIN8 and above version */
+	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
+	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
+	    (vstor_packet->u.chan_props.flags &
+	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
+		support_multichannel = TRUE;
+	}
+	if (bootverbose) {
+		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
+		    support_multichannel ? ", multi-chan capable" : "");
+	}
+
+	memset(vstor_packet, 0, sizeof(struct vstor_packet));
+	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+	ret = vmbus_chan_send(sc->hs_chan,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+
+	if (ret != 0) {
+		goto cleanup;
+	}
+
+	sema_wait(&request->synch_sema);
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0)
+		goto cleanup;
+
+	/*
+	 * If multi-channel is supported, send multichannel create
+	 * request to host.
+	 */
+	if (support_multichannel && max_subch > 0)
+		storvsc_send_multichannel_request(sc, max_subch);
+cleanup:
+	sema_destroy(&request->synch_sema);
+	return (ret);
+}
+
+/**
+ * @brief Open channel connection to paraent partition StorVSP driver
+ *
+ * Open and initialize channel connection to parent partition StorVSP driver.
+ *
+ * @param pointer to a Hyper-V device
+ * @returns 0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_connect_vsp(struct storvsc_softc *sc)
+{	
+	int ret = 0;
+	struct vmstor_chan_props props;
+
+	memset(&props, 0, sizeof(struct vmstor_chan_props));
+
+	/*
+	 * Open the channel
+	 */
+	vmbus_chan_cpu_rr(sc->hs_chan);
+	ret = vmbus_chan_open(
+		sc->hs_chan,
+		sc->hs_drv_props->drv_ringbuffer_size,
+		sc->hs_drv_props->drv_ringbuffer_size,
+		(void *)&props,
+		sizeof(struct vmstor_chan_props),
+		hv_storvsc_on_channel_callback, sc);
+
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = hv_storvsc_channel_init(sc);
+	return (ret);
+}
+
+#if HVS_HOST_RESET
+static int
+hv_storvsc_host_reset(struct storvsc_softc *sc)
+{
+	int ret = 0;
+
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;
+
+	request = &sc->hs_reset_req;
+	request->softc = sc;
+	vstor_packet = &request->vstor_packet;
+
+	sema_init(&request->synch_sema, 0, "stor synch sema");
+
+	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+	ret = vmbus_chan_send(dev->channel,
+	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+	    vstor_packet, VSTOR_PKT_SIZE,
+	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
+
+	if (ret != 0) {
+		goto cleanup;
+	}
+
+	sema_wait(&request->synch_sema);
+
+	/*
+	 * At this point, all outstanding requests in the adapter
+	 * should have been flushed out and return to us
+	 */
+
+cleanup:
+	sema_destroy(&request->synch_sema);
+	return (ret);
+}
+#endif /* HVS_HOST_RESET */
+
+/**
+ * @brief Function to initiate an I/O request
+ *
+ * @param device Hyper-V device pointer
+ * @param request pointer to a request structure
+ * @returns 0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_io_request(struct storvsc_softc *sc,
+					  struct hv_storvsc_request *request)
+{
+	struct vstor_packet *vstor_packet = &request->vstor_packet;
+	struct vmbus_channel* outgoing_channel = NULL;
+	int ret = 0, ch_sel;
+
+	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
+
+	vstor_packet->u.vm_srb.length =
+	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
+	
+	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
+
+	vstor_packet->u.vm_srb.transfer_len =
+	    request->prp_list.gpa_range.gpa_len;
+
+	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
+
+	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
+	outgoing_channel = sc->hs_sel_chan[ch_sel];
+
+	mtx_unlock(&request->softc->hs_lock);
+	if (request->prp_list.gpa_range.gpa_len) {
+		ret = vmbus_chan_send_prplist(outgoing_channel,
+		    &request->prp_list.gpa_range, request->prp_cnt,
+		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+	} else {
+		ret = vmbus_chan_send(outgoing_channel,
+		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
+		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
+	}
+	/* statistic for successful request sending on each channel */
+	if (!ret) {
+		sc->sysctl_data.chan_send_cnt[ch_sel]++;
+	}
+	mtx_lock(&request->softc->hs_lock);
+
+	if (ret != 0) {
+		printf("Unable to send packet %p ret %d", vstor_packet, ret);
+	} else {
+		atomic_add_int(&sc->hs_num_out_reqs, 1);
+	}
+
+	return (ret);
+}
+
+
+/**
+ * Process IO_COMPLETION_OPERATION and ready
+ * the result to be completed for upper layer
+ * processing by the CAM layer.
+ */
+static void
+hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
+			   struct vstor_packet *vstor_packet,
+			   struct hv_storvsc_request *request)
+{
+	struct vmscsi_req *vm_srb;
+
+	vm_srb = &vstor_packet->u.vm_srb;
+
+	/*
+	 * Copy some fields of the host's response into the request structure,
+	 * because the fields will be used later in storvsc_io_done().
+	 */
+	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
+	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
+
+	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
+			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
+		/* Autosense data available */
+
+		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
+				("vm_srb->sense_info_len <= "
+				 "request->sense_info_len"));
+
+		memcpy(request->sense_data, vm_srb->u.sense_data,
+			vm_srb->sense_info_len);
+
+		request->sense_info_len = vm_srb->sense_info_len;
+	}
+
+	/* Complete request by passing to the CAM layer */
+	storvsc_io_done(request);
+	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
+	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
+		sema_post(&sc->hs_drain_sema);
+	}
+}
+
+static void
+hv_storvsc_rescan_target(struct storvsc_softc *sc)
+{
+	path_id_t pathid;
+	target_id_t targetid;
+	union ccb *ccb;
+
+	pathid = cam_sim_path(sc->hs_sim);
+	targetid = CAM_TARGET_WILDCARD;
+
+	/*
+	 * Allocate a CCB and schedule a rescan.
+	 */
+	ccb = xpt_alloc_ccb_nowait();
+	if (ccb == NULL) {
+		printf("unable to alloc CCB for rescan\n");
+		return;
+	}
+
+	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
+	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		printf("unable to create path for rescan, pathid: %u,"
+		    "targetid: %u\n", pathid, targetid);
+		xpt_free_ccb(ccb);
+		return;
+	}
+
+	if (targetid == CAM_TARGET_WILDCARD)
+		ccb->ccb_h.func_code = XPT_SCAN_BUS;
+	else
+		ccb->ccb_h.func_code = XPT_SCAN_TGT;
+
+	xpt_rescan(ccb);
+}
+
+static void
+hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
+{
+	int ret = 0;
+	struct storvsc_softc *sc = xsc;
+	uint32_t bytes_recvd;
+	uint64_t request_id;
+	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;
+
+	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
+	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
+	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
+	/* XXX check bytes_recvd to make sure that it contains enough data */
+
+	while ((ret == 0) && (bytes_recvd > 0)) {
+		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
+
+		if ((request == &sc->hs_init_req) ||
+			(request == &sc->hs_reset_req)) {
+			memcpy(&request->vstor_packet, packet,
+				   sizeof(struct vstor_packet));
+			sema_post(&request->synch_sema);
+		} else {
+			vstor_packet = (struct vstor_packet *)packet;
+			switch(vstor_packet->operation) {
+			case VSTOR_OPERATION_COMPLETEIO:
+				if (request == NULL)
+					panic("VMBUS: storvsc received a "
+					    "packet with NULL request id in "
+					    "COMPLETEIO operation.");
+
+				hv_storvsc_on_iocompletion(sc,
+							vstor_packet, request);
+				break;
+			case VSTOR_OPERATION_REMOVEDEVICE:
+				printf("VMBUS: storvsc operation %d not "
+				    "implemented.\n", vstor_packet->operation);
+				/* TODO: implement */
+				break;
+			case VSTOR_OPERATION_ENUMERATE_BUS:
+				hv_storvsc_rescan_target(sc);
+				break;
+			default:
+				break;
+			}			
+		}
+
+		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
+		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
+		    &request_id);
+		KASSERT(ret != ENOBUFS,
+		    ("storvsc recvbuf is not large enough"));
+		/*
+		 * XXX check bytes_recvd to make sure that it contains
+		 * enough data
+		 */
+	}
+}
+
+/**
+ * @brief StorVSC probe function
+ *
+ * Device probe function.  Returns 0 if the input device is a StorVSC
+ * device.  Otherwise, a ENXIO is returned.  If the input device is
+ * for BlkVSC (paravirtual IDE) device and this support is disabled in
+ * favor of the emulated ATA/IDE device, return ENXIO.
+ *
+ * @param a device
+ * @returns 0 on success, ENXIO if not a matcing StorVSC device
+ */
+static int
+storvsc_probe(device_t dev)
+{
+	int ret	= ENXIO;
+	
+	switch (storvsc_get_storage_type(dev)) {
+	case DRIVER_BLKVSC:
+		if(bootverbose)
+			device_printf(dev,
+			    "Enlightened ATA/IDE detected\n");
+		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
+		ret = BUS_PROBE_DEFAULT;
+		break;
+	case DRIVER_STORVSC:
+		if(bootverbose)
+			device_printf(dev, "Enlightened SCSI device detected\n");
+		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
+		ret = BUS_PROBE_DEFAULT;
+		break;
+	default:
+		ret = ENXIO;
+	}
+	return (ret);
+}
+
+static void
+storvsc_create_chan_sel(struct storvsc_softc *sc)
+{
+	struct vmbus_channel **subch;
+	int i, nsubch;
+
+	sc->hs_sel_chan[0] = sc->hs_chan;
+	nsubch = sc->hs_nchan - 1;
+	if (nsubch == 0)
+		return;
+
+	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
+	for (i = 0; i < nsubch; i++)
+		sc->hs_sel_chan[i + 1] = subch[i];
+	vmbus_subchan_rel(subch, nsubch);
+}
+
+static int
+storvsc_init_requests(device_t dev)
+{
+	struct storvsc_softc *sc = device_get_softc(dev);
+	struct hv_storvsc_request *reqp;
+	int error, i;
+
+	LIST_INIT(&sc->hs_free_list);
+
+	error = bus_dma_tag_create(
+		bus_get_dma_tag(dev),		/* parent */
+		1,				/* alignment */
+		PAGE_SIZE,			/* boundary */
+		BUS_SPACE_MAXADDR,		/* lowaddr */
+		BUS_SPACE_MAXADDR,		/* highaddr */
+		NULL, NULL,			/* filter, filterarg */
+		STORVSC_DATA_SIZE_MAX,		/* maxsize */
+		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
+		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
+		0,				/* flags */
+		NULL,				/* lockfunc */
+		NULL,				/* lockfuncarg */
+		&sc->storvsc_req_dtag);
+	if (error) {
+		device_printf(dev, "failed to create storvsc dma tag\n");
+		return (error);
+	}
+
+	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
+		reqp = malloc(sizeof(struct hv_storvsc_request),
+				 M_DEVBUF, M_WAITOK|M_ZERO);
+		reqp->softc = sc;
+		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
+				&reqp->data_dmap);
+		if (error) {
+			device_printf(dev, "failed to allocate storvsc "
+			    "data dmamap\n");
+			goto cleanup;
+		}
+		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
+	}
+	return (0);
+
+cleanup:
+	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
+		LIST_REMOVE(reqp, link);
+		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
+		free(reqp, M_DEVBUF);
+	}
+	return (error);
+}
+
+static void
+storvsc_sysctl(device_t dev)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *ch_tree, *chid_tree;
+	struct storvsc_softc *sc;
+	char name[16];
+	int i;
+
+	sc = device_get_softc(dev);
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
+		&sc->sysctl_data.data_bio_cnt, "# of bio data block");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
+		&sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
+		&sc->sysctl_data.data_sg_cnt, "# of sg data block");
+
+	/* dev.storvsc.UNIT.channel */
+	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
+		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (ch_tree == NULL)
+		return;
+
+	for (i = 0; i < sc->hs_nchan; i++) {
+		uint32_t ch_id;
+
+		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
+		snprintf(name, sizeof(name), "%d", ch_id);
+		/* dev.storvsc.UNIT.channel.CHID */
+		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
+			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+		if (chid_tree == NULL)
+			return;
+		/* dev.storvsc.UNIT.channel.CHID.send_req */
+		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
+			"# of request sending from this channel");
+	}
+}
+
+/**
+ * @brief StorVSC attach function
+ *
+ * Function responsible for allocating per-device structures,
+ * setting up CAM interfaces and scanning for available LUNs to
+ * be used for SCSI device peripherals.
+ *
+ * @param a device
+ * @returns 0 on success or an error on failure
+ */
+static int
+storvsc_attach(device_t dev)
+{
+	enum hv_storage_type stor_type;
+	struct storvsc_softc *sc;
+	struct cam_devq *devq;
+	int ret, i, j;
+	struct hv_storvsc_request *reqp;
+	struct root_hold_token *root_mount_token = NULL;
+	struct hv_sgl_node *sgl_node = NULL;
+	void *tmp_buff = NULL;
+
+	/*
+	 * We need to serialize storvsc attach calls.
+	 */
+	root_mount_token = root_mount_hold("storvsc");
+
+	sc = device_get_softc(dev);
+	sc->hs_nchan = 1;
+	sc->hs_chan = vmbus_get_channel(dev);
+
+	stor_type = storvsc_get_storage_type(dev);
+
+	if (stor_type == DRIVER_UNKNOWN) {
+		ret = ENODEV;
+		goto cleanup;
+	}
+
+	/* fill in driver specific properties */
+	sc->hs_drv_props = &g_drv_props_table[stor_type];
+	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
+	sc->hs_drv_props->drv_max_ios_per_target =
+		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
+	if (bootverbose) {
+		printf("storvsc ringbuffer size: %d, max_io: %d\n",
+			sc->hs_drv_props->drv_ringbuffer_size,
+			sc->hs_drv_props->drv_max_ios_per_target);
+	}
+	/* fill in device specific properties */
+	sc->hs_unit	= device_get_unit(dev);
+	sc->hs_dev	= dev;
+
+	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
+
+	ret = storvsc_init_requests(dev);
+	if (ret != 0)
+		goto cleanup;
+
+	/* create sg-list page pool */
+	if (FALSE == g_hv_sgl_page_pool.is_init) {
+		g_hv_sgl_page_pool.is_init = TRUE;
+		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
+		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
+
+		/*
+		 * Pre-create SG list, each SG list with
+		 * STORVSC_DATA_SEGCNT_MAX segments, each
+		 * segment has one page buffer
+		 */
+		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
+	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
+			    M_DEVBUF, M_WAITOK|M_ZERO);
+
+			sgl_node->sgl_data =
+			    sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
+			    M_WAITOK|M_ZERO);
+
+			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
+				tmp_buff = malloc(PAGE_SIZE,
+				    M_DEVBUF, M_WAITOK|M_ZERO);
+
+				sgl_node->sgl_data->sg_segs[j].ss_paddr =
+				    (vm_paddr_t)tmp_buff;
+			}
+
+			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
+			    sgl_node, link);
+		}
+	}
+
+	sc->hs_destroy = FALSE;
+	sc->hs_drain_notify = FALSE;
+	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
+
+	ret = hv_storvsc_connect_vsp(sc);
+	if (ret != 0) {
+		goto cleanup;
+	}
+
+	/* Construct cpu to channel mapping */
+	storvsc_create_chan_sel(sc);
+
+	/*
+	 * Create the device queue.
+	 * Hyper-V maps each target to one SCSI HBA
+	 */
+	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
+	if (devq == NULL) {
+		device_printf(dev, "Failed to alloc device queue\n");
+		ret = ENOMEM;
+		goto cleanup;
+	}
+
+	sc->hs_sim = cam_sim_alloc(storvsc_action,
+				storvsc_poll,
+				sc->hs_drv_props->drv_name,
+				sc,
+				sc->hs_unit,
+				&sc->hs_lock, 1,
+				sc->hs_drv_props->drv_max_ios_per_target,
+				devq);
+
+	if (sc->hs_sim == NULL) {
+		device_printf(dev, "Failed to alloc sim\n");
+		cam_simq_free(devq);
+		ret = ENOMEM;
+		goto cleanup;
+	}
+
+	mtx_lock(&sc->hs_lock);
+	/* bus_id is set to 0, need to get it from VMBUS channel query? */
+	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
+		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
+		mtx_unlock(&sc->hs_lock);
+		device_printf(dev, "Unable to register SCSI bus\n");
+		ret = ENXIO;
+		goto cleanup;
+	}
+
+	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
+		 cam_sim_path(sc->hs_sim),
+		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
+		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
+		mtx_unlock(&sc->hs_lock);
+		device_printf(dev, "Unable to create path\n");
+		ret = ENXIO;
+		goto cleanup;
+	}
+
+	mtx_unlock(&sc->hs_lock);
+
+	storvsc_sysctl(dev);
+
+	root_mount_rel(root_mount_token);
+	return (0);
+
+
+cleanup:
+	root_mount_rel(root_mount_token);
+	while (!LIST_EMPTY(&sc->hs_free_list)) {
+		reqp = LIST_FIRST(&sc->hs_free_list);
+		LIST_REMOVE(reqp, link);
+		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
+		free(reqp, M_DEVBUF);
+	}
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+
+	return (ret);
+}
+
+/**
+ * @brief StorVSC device detach function
+ *
+ * This function is responsible for safely detaching a
+ * StorVSC device.  This includes waiting for inbound responses
+ * to complete and freeing associated per-device structures.
+ *
+ * @param dev a device
+ * returns 0 on success
+ */
+static int
+storvsc_detach(device_t dev)
+{
+	struct storvsc_softc *sc = device_get_softc(dev);
+	struct hv_storvsc_request *reqp = NULL;
+	struct hv_sgl_node *sgl_node = NULL;
+	int j = 0;
+
+	sc->hs_destroy = TRUE;
+
+	/*
+	 * At this point, all outbound traffic should be disabled. We
+	 * only allow inbound traffic (responses) to proceed so that
+	 * outstanding requests can be completed.
+	 */
+
+	sc->hs_drain_notify = TRUE;
+	sema_wait(&sc->hs_drain_sema);
+	sc->hs_drain_notify = FALSE;
+
+	/*
+	 * Since we have already drained, we don't need to busy wait.
+	 * The call to close the channel will reset the callback
+	 * under the protection of the incoming channel lock.
+	 */
+
+	vmbus_chan_close(sc->hs_chan);
+
+	mtx_lock(&sc->hs_lock);
+	while (!LIST_EMPTY(&sc->hs_free_list)) {
+		reqp = LIST_FIRST(&sc->hs_free_list);
+		LIST_REMOVE(reqp, link);
+		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
+		free(reqp, M_DEVBUF);
+	}
+	mtx_unlock(&sc->hs_lock);
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+	
+	return (0);
+}
+
+#if HVS_TIMEOUT_TEST
+/**
+ * @brief unit test for timed out operations
+ *
+ * This function provides unit testing capability to simulate
+ * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
+ * is required.
+ *
+ * @param reqp pointer to a request structure
+ * @param opcode SCSI operation being performed
+ * @param wait if 1, wait for I/O to complete
+ */
+static void
+storvsc_timeout_test(struct hv_storvsc_request *reqp,
+		uint8_t opcode, int wait)
+{
+	int ret;
+	union ccb *ccb = reqp->ccb;
+	struct storvsc_softc *sc = reqp->softc;
+
+	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
+		return;
+	}
+
+	if (wait) {
+		mtx_lock(&reqp->event.mtx);
+	}
+	ret = hv_storvsc_io_request(sc, reqp);
+	if (ret != 0) {
+		if (wait) {
+			mtx_unlock(&reqp->event.mtx);
+		}
+		printf("%s: io_request failed with %d.\n",
+				__func__, ret);
+		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+		mtx_lock(&sc->hs_lock);
+		storvsc_free_request(sc, reqp);
+		xpt_done(ccb);
+		mtx_unlock(&sc->hs_lock);
+		return;
+	}
+
+	if (wait) {
+		xpt_print(ccb->ccb_h.path,
+				"%u: %s: waiting for IO return.\n",
+				ticks, __func__);
+		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
+		mtx_unlock(&reqp->event.mtx);
+		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
+				ticks, __func__, (ret == 0)?
+				"IO return detected" :
+				"IO return not detected");
+		/*
+		 * Now both the timer handler and io done are running
+		 * simultaneously. We want to confirm the io done always
+		 * finishes after the timer handler exits. So reqp used by
+		 * timer handler is not freed or stale. Do busy loop for
+		 * another 1/10 second to make sure io done does
+		 * wait for the timer handler to complete.
+		 */
+		DELAY(100*1000);
+		mtx_lock(&sc->hs_lock);
+		xpt_print(ccb->ccb_h.path,
+				"%u: %s: finishing, queue frozen %d, "
+				"ccb status 0x%x scsi_status 0x%x.\n",
+				ticks, __func__, sc->hs_frozen,
+				ccb->ccb_h.status,
+				ccb->csio.scsi_status);
+		mtx_unlock(&sc->hs_lock);
+	}
+}
+#endif /* HVS_TIMEOUT_TEST */
+
+#ifdef notyet
+/**
+ * @brief timeout handler for requests
+ *
+ * This function is called as a result of a callout expiring.
+ *
+ * @param arg pointer to a request
+ */
+static void
+storvsc_timeout(void *arg)
+{
+	struct hv_storvsc_request *reqp = arg;
+	struct storvsc_softc *sc = reqp->softc;
+	union ccb *ccb = reqp->ccb;
+
+	if (reqp->retries == 0) {
+		mtx_lock(&sc->hs_lock);
+		xpt_print(ccb->ccb_h.path,
+		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
+		    ticks, reqp, ccb->ccb_h.timeout / 1000);
+		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
+		mtx_unlock(&sc->hs_lock);
+
+		reqp->retries++;
+		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
+		    0, storvsc_timeout, reqp, 0);
+#if HVS_TIMEOUT_TEST
+		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
+#endif
+		return;
+	}
+
+	mtx_lock(&sc->hs_lock);
+	xpt_print(ccb->ccb_h.path,
+		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
+		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
+		(sc->hs_frozen == 0)?
+		"freezing the queue" : "the queue is already frozen");
+	if (sc->hs_frozen == 0) {
+		sc->hs_frozen = 1;
+		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
+	}
+	mtx_unlock(&sc->hs_lock);
+	
+#if HVS_TIMEOUT_TEST
+	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
+#endif
+}
+#endif
+
+/**
+ * @brief StorVSC device poll function
+ *
+ * This function is responsible for servicing requests when
+ * interrupts are disabled (i.e when we are dumping core.)
+ *
+ * @param sim a pointer to a CAM SCSI interface module
+ */
+static void
+storvsc_poll(struct cam_sim *sim)
+{
+	struct storvsc_softc *sc = cam_sim_softc(sim);
+
+	mtx_assert(&sc->hs_lock, MA_OWNED);
+	mtx_unlock(&sc->hs_lock);
+	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
+	mtx_lock(&sc->hs_lock);
+}
+
+/**
+ * @brief StorVSC device action function
+ *
+ * This function is responsible for handling SCSI operations which
+ * are passed from the CAM layer.  The requests are in the form of
+ * CAM control blocks which indicate the action being performed.
+ * Not all actions require converting the request to a VSCSI protocol
+ * message - these actions can be responded to by this driver.
+ * Requests which are destined for a backend storage device are converted
+ * to a VSCSI protocol message and sent on the channel connection associated
+ * with this device.
+ *
+ * @param sim pointer to a CAM SCSI interface module
+ * @param ccb pointer to a CAM control block
+ */
+static void
+storvsc_action(struct cam_sim *sim, union ccb *ccb)
+{
+	struct storvsc_softc *sc = cam_sim_softc(sim);
+	int res;
+
+	mtx_assert(&sc->hs_lock, MA_OWNED);
+	switch (ccb->ccb_h.func_code) {
+	case XPT_PATH_INQ: {
+		struct ccb_pathinq *cpi = &ccb->cpi;
+
+		cpi->version_num = 1;
+		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
+		cpi->target_sprt = 0;
+		cpi->hba_misc = PIM_NOBUSRESET;
+		if (hv_storvsc_use_pim_unmapped)
+			cpi->hba_misc |= PIM_UNMAPPED;
+		cpi->maxio = STORVSC_DATA_SIZE_MAX;
+		cpi->hba_eng_cnt = 0;
+		cpi->max_target = STORVSC_MAX_TARGETS;
+		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
+		cpi->initiator_id = cpi->max_target;
+		cpi->bus_id = cam_sim_bus(sim);
+		cpi->base_transfer_speed = 300000;
+		cpi->transport = XPORT_SAS;
+		cpi->transport_version = 0;
+		cpi->protocol = PROTO_SCSI;
+		cpi->protocol_version = SCSI_REV_SPC2;
+		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
+		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
+		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
+		cpi->unit_number = cam_sim_unit(sim);
+
+		ccb->ccb_h.status = CAM_REQ_CMP;
+		xpt_done(ccb);
+		return;
+	}
+	case XPT_GET_TRAN_SETTINGS: {
+		struct  ccb_trans_settings *cts = &ccb->cts;
+
+		cts->transport = XPORT_SAS;
+		cts->transport_version = 0;
+		cts->protocol = PROTO_SCSI;
+		cts->protocol_version = SCSI_REV_SPC2;
+
+		/* enable tag queuing and disconnected mode */
+		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
+		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
+		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
+		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
+		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
+			
+		ccb->ccb_h.status = CAM_REQ_CMP;
+		xpt_done(ccb);
+		return;
+	}
+	case XPT_SET_TRAN_SETTINGS:	{
+		ccb->ccb_h.status = CAM_REQ_CMP;
+		xpt_done(ccb);
+		return;
+	}
+	case XPT_CALC_GEOMETRY:{
+		cam_calc_geometry(&ccb->ccg, 1);
+		xpt_done(ccb);
+		return;
+	}
+	case  XPT_RESET_BUS:
+	case  XPT_RESET_DEV:{
+#if HVS_HOST_RESET
+		if ((res = hv_storvsc_host_reset(sc)) != 0) {
+			xpt_print(ccb->ccb_h.path,
+				"hv_storvsc_host_reset failed with %d\n", res);
+			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+			xpt_done(ccb);
+			return;
+		}
+		ccb->ccb_h.status = CAM_REQ_CMP;
+		xpt_done(ccb);
+		return;
+#else
+		xpt_print(ccb->ccb_h.path,
+				  "%s reset not supported.\n",
+				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
+				  "bus" : "dev");
+		ccb->ccb_h.status = CAM_REQ_INVALID;
+		xpt_done(ccb);
+		return;
+#endif	/* HVS_HOST_RESET */
+	}
+	case XPT_SCSI_IO:
+	case XPT_IMMED_NOTIFY: {
+		struct hv_storvsc_request *reqp = NULL;
+		bus_dmamap_t dmap_saved;
+
+		if (ccb->csio.cdb_len == 0) {
+			panic("cdl_len is 0\n");
+		}
+
+		if (LIST_EMPTY(&sc->hs_free_list)) {
+			ccb->ccb_h.status = CAM_REQUEUE_REQ;
+			if (sc->hs_frozen == 0) {
+				sc->hs_frozen = 1;
+				xpt_freeze_simq(sim, /* count*/1);
+			}
+			xpt_done(ccb);
+			return;
+		}
+
+		reqp = LIST_FIRST(&sc->hs_free_list);
+		LIST_REMOVE(reqp, link);
+
+		/* Save the data_dmap before reset request */
+		dmap_saved = reqp->data_dmap;
+
+		/* XXX this is ugly */
+		bzero(reqp, sizeof(struct hv_storvsc_request));
+
+		/* Restore necessary bits */
+		reqp->data_dmap = dmap_saved;
+		reqp->softc = sc;
+		
+		ccb->ccb_h.status |= CAM_SIM_QUEUED;
+		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+			xpt_done(ccb);
+			return;
+		}
+
+#ifdef notyet
+		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
+			callout_init(&reqp->callout, 1);
+			callout_reset_sbt(&reqp->callout,
+			    SBT_1MS * ccb->ccb_h.timeout, 0,
+			    storvsc_timeout, reqp, 0);
+#if HVS_TIMEOUT_TEST
+			cv_init(&reqp->event.cv, "storvsc timeout cv");
+			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
+					NULL, MTX_DEF);
+			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
+				case MODE_SELECT_10:
+				case SEND_DIAGNOSTIC:
+					/* To have timer send the request. */
+					return;
+				default:
+					break;
+			}
+#endif /* HVS_TIMEOUT_TEST */
+		}
+#endif
+
+		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
+			xpt_print(ccb->ccb_h.path,
+				"hv_storvsc_io_request failed with %d\n", res);
+			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+			storvsc_free_request(sc, reqp);
+			xpt_done(ccb);
+			return;
+		}
+		return;
+	}
+
+	default:
+		ccb->ccb_h.status = CAM_REQ_INVALID;
+		xpt_done(ccb);
+		return;
+	}
+}
+
+/**
+ * @brief destroy bounce buffer
+ *
+ * This function is responsible for destroy a Scatter/Gather list
+ * that create by storvsc_create_bounce_buffer()
+ *
+ * @param sgl- the Scatter/Gather need be destroy
+ * @param sg_count- page count of the SG list.
+ *
+ */
+static void
+storvsc_destroy_bounce_buffer(struct sglist *sgl)
+{
+	struct hv_sgl_node *sgl_node = NULL;
+	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
+		printf("storvsc error: not enough in use sgl\n");
+		return;
+	}
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	sgl_node->sgl_data = sgl;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
+}
+
+/**
+ * @brief create bounce buffer
+ *
+ * This function is responsible for create a Scatter/Gather list,
+ * which hold several pages that can be aligned with page size.
+ *
+ * @param seg_count- SG-list segments count
+ * @param write - if WRITE_TYPE, set SG list page used size to 0,
+ * otherwise set used size to page size.
+ *
+ * return NULL if create failed
+ */
+static struct sglist *
+storvsc_create_bounce_buffer(uint16_t seg_count, int write)
+{
+	int i = 0;
+	struct sglist *bounce_sgl = NULL;
+	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
+	struct hv_sgl_node *sgl_node = NULL;	
+
+	/* get struct sglist from free_sgl_list */
+	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		printf("storvsc error: not enough free sgl\n");
+		return NULL;
+	}
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	bounce_sgl = sgl_node->sgl_data;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
+
+	bounce_sgl->sg_maxseg = seg_count;
+
+	if (write == WRITE_TYPE)
+		bounce_sgl->sg_nseg = 0;
+	else
+		bounce_sgl->sg_nseg = seg_count;
+
+	for (i = 0; i < seg_count; i++)
+	        bounce_sgl->sg_segs[i].ss_len = buf_len;
+
+	return bounce_sgl;
+}
+
+/**
+ * @brief copy data from SG list to bounce buffer
+ *
+ * This function is responsible for copy data from one SG list's segments
+ * to another SG list which used as bounce buffer.
+ *
+ * @param bounce_sgl - the destination SG list
+ * @param orig_sgl - the segment of the source SG list.
+ * @param orig_sgl_count - the count of segments.
+ * @param orig_sgl_count - indicate which segment need bounce buffer,
+ *  set 1 means need.
+ *
+ */
+static void
+storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+			       bus_dma_segment_t *orig_sgl,
+			       unsigned int orig_sgl_count,
+			       uint64_t seg_bits)
+{
+	int src_sgl_idx = 0;
+
+	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
+		if (seg_bits & (1 << src_sgl_idx)) {
+			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
+			    (void*)orig_sgl[src_sgl_idx].ds_addr,
+			    orig_sgl[src_sgl_idx].ds_len);
+
+			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
+			    orig_sgl[src_sgl_idx].ds_len;
+		}
+	}
+}
+
+/**
+ * @brief copy data from SG list which used as bounce to another SG list
+ *
+ * This function is responsible for copy data from one SG list with bounce
+ * buffer to another SG list's segments.
+ *
+ * @param dest_sgl - the destination SG list's segments
+ * @param dest_sgl_count - the count of destination SG list's segment.
+ * @param src_sgl - the source SG list.
+ * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
+ *
+ */
+void
+storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				    unsigned int dest_sgl_count,
+				    struct sglist* src_sgl,
+				    uint64_t seg_bits)
+{
+	int sgl_idx = 0;
+	
+	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
+		if (seg_bits & (1 << sgl_idx)) {
+			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
+			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
+			    src_sgl->sg_segs[sgl_idx].ss_len);
+		}
+	}
+}
+
+/**
+ * @brief check SG list with bounce buffer or not
+ *
+ * This function is responsible for check if need bounce buffer for SG list.
+ *
+ * @param sgl - the SG list's segments
+ * @param sg_count - the count of SG list's segment.
+ * @param bits - segmengs number that need bounce buffer
+ *
+ * return -1 if SG list needless bounce buffer
+ */
+static int
+storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
+				unsigned int sg_count,
+				uint64_t *bits)
+{
+	int i = 0;
+	int offset = 0;
+	uint64_t phys_addr = 0;
+	uint64_t tmp_bits = 0;
+	boolean_t found_hole = FALSE;
+	boolean_t pre_aligned = TRUE;
+
+	if (sg_count < 2){
+		return -1;
+	}
+
+	*bits = 0;
+	
+	phys_addr = vtophys(sgl[0].ds_addr);
+	offset =  phys_addr - trunc_page(phys_addr);
+
+	if (offset != 0) {
+		pre_aligned = FALSE;
+		tmp_bits |= 1;
+	}
+
+	for (i = 1; i < sg_count; i++) {
+		phys_addr = vtophys(sgl[i].ds_addr);
+		offset =  phys_addr - trunc_page(phys_addr);
+
+		if (offset == 0) {
+			if (FALSE == pre_aligned){
+				/*
+				 * This segment is aligned, if the previous
+				 * one is not aligned, find a hole
+				 */
+				found_hole = TRUE;
+			}
+			pre_aligned = TRUE;
+		} else {
+			tmp_bits |= 1ULL << i;
+			if (!pre_aligned) {
+				if (phys_addr != vtophys(sgl[i-1].ds_addr +
+				    sgl[i-1].ds_len)) {
+					/*
+					 * Check whether connect to previous
+					 * segment,if not, find the hole
+					 */
+					found_hole = TRUE;
+				}
+			} else {
+				found_hole = TRUE;
+			}
+			pre_aligned = FALSE;
+		}
+	}
+
+	if (!found_hole) {
+		return (-1);
+	} else {
+		*bits = tmp_bits;
+		return 0;
+	}
+}
+
+/**
+ * Copy bus_dma segments to multiple page buffer, which requires
+ * the pages are compact composed except for the 1st and last pages.
+ */
+static void
+storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+	struct hv_storvsc_request *reqp = arg;
+	union ccb *ccb = reqp->ccb;
+	struct ccb_scsiio *csio = &ccb->csio;
+	struct storvsc_gpa_range *prplist;
+	int i;
+
+	prplist = &reqp->prp_list;
+	prplist->gpa_range.gpa_len = csio->dxfer_len;
+	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
+
+	for (i = 0; i < nsegs; i++) {
+#ifdef INVARIANTS
+		if (nsegs > 1) {
+			if (i == 0) {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) +
+				    segs[i].ds_len == PAGE_SIZE,
+				    ("invalid 1st page, ofs 0x%jx, len %zu",
+				     (uintmax_t)segs[i].ds_addr,
+				     segs[i].ds_len));
+			} else if (i == nsegs - 1) {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
+				    ("invalid last page, ofs 0x%jx",
+				     (uintmax_t)segs[i].ds_addr));
+			} else {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
+				    segs[i].ds_len == PAGE_SIZE,
+				    ("not a full page, ofs 0x%jx, len %zu",
+				     (uintmax_t)segs[i].ds_addr,
+				     segs[i].ds_len));
+			}
+		}
+#endif
+		prplist->gpa_page[i] = atop(segs[i].ds_addr);
+	}
+	reqp->prp_cnt = nsegs;
+}
+
+/**
+ * @brief Fill in a request structure based on a CAM control block
+ *
+ * Fills in a request structure based on the contents of a CAM control
+ * block.  The request structure holds the payload information for
+ * VSCSI protocol request.
+ *
+ * @param ccb pointer to a CAM contorl block
+ * @param reqp pointer to a request structure
+ */
+static int
+create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
+{
+	struct ccb_scsiio *csio = &ccb->csio;
+	uint64_t phys_addr;
+	uint32_t pfn;
+	uint64_t not_aligned_seg_bits = 0;
+	int error;
+	
+	/* refer to struct vmscsi_req for meanings of these two fields */
+	reqp->vstor_packet.u.vm_srb.port =
+		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
+	reqp->vstor_packet.u.vm_srb.path_id =
+		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
+
+	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
+	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
+
+	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
+	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
+		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
+			csio->cdb_len);
+	} else {
+		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
+			csio->cdb_len);
+	}
+
+	if (hv_storvsc_use_win8ext_flags) {
+		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
+		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
+	}
+	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+	case CAM_DIR_OUT:
+		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_DATA_OUT;
+		}
+		break;
+	case CAM_DIR_IN:
+		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_DATA_IN;
+		}
+		break;
+	case CAM_DIR_NONE:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_NO_DATA_TRANSFER;
+		}
+		break;
+	default:
+		printf("Error: unexpected data direction: 0x%x\n",
+			ccb->ccb_h.flags & CAM_DIR_MASK);
+		return (EINVAL);
+	}
+
+	reqp->sense_data     = &csio->sense_data;
+	reqp->sense_info_len = csio->sense_len;
+
+	reqp->ccb = ccb;
+
+	if (0 == csio->dxfer_len) {
+		return (0);
+	}
+
+	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
+	case CAM_DATA_BIO:
+	case CAM_DATA_VADDR:
+		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
+		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
+		    BUS_DMA_NOWAIT);
+		if (error) {
+			xpt_print(ccb->ccb_h.path,
+			    "bus_dmamap_load_ccb failed: %d\n", error);
+			return (error);
+		}
+		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
+			reqp->softc->sysctl_data.data_bio_cnt++;
+		else
+			reqp->softc->sysctl_data.data_vaddr_cnt++;
+		break;
+
+	case CAM_DATA_SG:
+	{
+		struct storvsc_gpa_range *prplist;
+		int i = 0;
+		int offset = 0;
+		int ret;
+
+		bus_dma_segment_t *storvsc_sglist =
+		    (bus_dma_segment_t *)ccb->csio.data_ptr;
+		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
+
+		prplist = &reqp->prp_list;
+		prplist->gpa_range.gpa_len = csio->dxfer_len;
+
+		printf("Storvsc: get SG I/O operation, %d\n",
+		    reqp->vstor_packet.u.vm_srb.data_in);
+
+		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
+			printf("Storvsc: %d segments is too much, "
+			    "only support %d segments\n",
+			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
+			return (EINVAL);
+		}
+
+		/*
+		 * We create our own bounce buffer function currently. Idealy
+		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
+		 * code there is no callback API to check the page alignment of
+		 * middle segments before busdma can decide if a bounce buffer
+		 * is needed for particular segment. There is callback,
+		 * "bus_dma_filter_t *filter", but the parrameters are not
+		 * sufficient for storvsc driver.
+		 * TODO:
+		 *	Add page alignment check in BUS_DMA(9) callback. Once
+		 *	this is complete, switch the following code to use
+		 *	BUS_DMA(9) for storvsc bounce buffer support.
+		 */
+		/* check if we need to create bounce buffer */
+		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
+		    storvsc_sg_count, &not_aligned_seg_bits);
+		if (ret != -1) {
+			reqp->bounce_sgl =
+			    storvsc_create_bounce_buffer(storvsc_sg_count,
+			    reqp->vstor_packet.u.vm_srb.data_in);
+			if (NULL == reqp->bounce_sgl) {
+				printf("Storvsc_error: "
+				    "create bounce buffer failed.\n");
+				return (ENOMEM);
+			}
+
+			reqp->bounce_sgl_count = storvsc_sg_count;
+			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
+
+			/*
+			 * if it is write, we need copy the original data
+			 *to bounce buffer
+			 */
+			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+				storvsc_copy_sgl_to_bounce_buf(
+				    reqp->bounce_sgl,
+				    storvsc_sglist,
+				    storvsc_sg_count,
+				    reqp->not_aligned_seg_bits);
+			}
+
+			/* transfer virtual address to physical frame number */
+			if (reqp->not_aligned_seg_bits & 0x1){
+ 				phys_addr =
+				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
+			}else{
+ 				phys_addr =
+					vtophys(storvsc_sglist[0].ds_addr);
+			}
+			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
+
+			pfn = phys_addr >> PAGE_SHIFT;
+			prplist->gpa_page[0] = pfn;
+			
+			for (i = 1; i < storvsc_sg_count; i++) {
+				if (reqp->not_aligned_seg_bits & (1 << i)) {
+					phys_addr =
+					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
+				} else {
+					phys_addr =
+					    vtophys(storvsc_sglist[i].ds_addr);
+				}
+
+				pfn = phys_addr >> PAGE_SHIFT;
+				prplist->gpa_page[i] = pfn;
+			}
+			reqp->prp_cnt = i;
+		} else {
+			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
+
+			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
+
+			for (i = 0; i < storvsc_sg_count; i++) {
+				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
+				pfn = phys_addr >> PAGE_SHIFT;
+				prplist->gpa_page[i] = pfn;
+			}
+			reqp->prp_cnt = i;
+
+			/* check the last segment cross boundary or not */
+			offset = phys_addr & PAGE_MASK;
+			if (offset) {
+				/* Add one more PRP entry */
+				phys_addr =
+				    vtophys(storvsc_sglist[i-1].ds_addr +
+				    PAGE_SIZE - offset);
+				pfn = phys_addr >> PAGE_SHIFT;
+				prplist->gpa_page[i] = pfn;
+				reqp->prp_cnt++;
+			}
+			
+			reqp->bounce_sgl_count = 0;
+		}
+		reqp->softc->sysctl_data.data_sg_cnt++;
+		break;
+	}
+	default:
+		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
+		return(EINVAL);
+	}
+
+	return(0);
+}
+
+static uint32_t
+is_scsi_valid(const struct scsi_inquiry_data *inq_data)
+{
+	u_int8_t type;
+
+	type = SID_TYPE(inq_data);
+	if (type == T_NODEVICE)
+		return (0);
+	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
+		return (0);
+	return (1);
+}
+
+/**
+ * @brief completion function before returning to CAM
+ *
+ * I/O process has been completed and the result needs
+ * to be passed to the CAM layer.
+ * Free resources related to this request.
+ *
+ * @param reqp pointer to a request structure
+ */
+static void
+storvsc_io_done(struct hv_storvsc_request *reqp)
+{
+	union ccb *ccb = reqp->ccb;
+	struct ccb_scsiio *csio = &ccb->csio;
+	struct storvsc_softc *sc = reqp->softc;
+	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
+	bus_dma_segment_t *ori_sglist = NULL;
+	int ori_sg_count = 0;
+	const struct scsi_generic *cmd;
+
+	/* destroy bounce buffer if it is used */
+	if (reqp->bounce_sgl_count) {
+		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
+		ori_sg_count = ccb->csio.sglist_cnt;
+
+		/*
+		 * If it is READ operation, we should copy back the data
+		 * to original SG list.
+		 */
+		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
+			    ori_sg_count,
+			    reqp->bounce_sgl,
+			    reqp->not_aligned_seg_bits);
+		}
+
+		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
+		reqp->bounce_sgl_count = 0;
+	}
+		
+	if (reqp->retries > 0) {
+		mtx_lock(&sc->hs_lock);
+#if HVS_TIMEOUT_TEST
+		xpt_print(ccb->ccb_h.path,
+			"%u: IO returned after timeout, "
+			"waking up timer handler if any.\n", ticks);
+		mtx_lock(&reqp->event.mtx);
+		cv_signal(&reqp->event.cv);
+		mtx_unlock(&reqp->event.mtx);
+#endif
+		reqp->retries = 0;
+		xpt_print(ccb->ccb_h.path,
+			"%u: IO returned after timeout, "
+			"stopping timer if any.\n", ticks);
+		mtx_unlock(&sc->hs_lock);
+	}
+
+#ifdef notyet
+	/*
+	 * callout_drain() will wait for the timer handler to finish
+	 * if it is running. So we don't need any lock to synchronize
+	 * between this routine and the timer handler.
+	 * Note that we need to make sure reqp is not freed when timer
+	 * handler is using or will use it.
+	 */
+	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
+		callout_drain(&reqp->callout);
+	}
+#endif
+	cmd = (const struct scsi_generic *)
+	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
+	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
+
+	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
+	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
+	int srb_status = SRB_STATUS(vm_srb->srb_status);
+	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
+		if (srb_status != SRB_STATUS_SUCCESS) {
+			/*
+			 * If there are errors, for example, invalid LUN,
+			 * host will inform VM through SRB status.
+			 */
+			if (bootverbose) {
+				if (srb_status == SRB_STATUS_INVALID_LUN) {
+					xpt_print(ccb->ccb_h.path,
+					    "invalid LUN %d for op: %s\n",
+					    vm_srb->lun,
+					    scsi_op_desc(cmd->opcode, NULL));
+				} else {
+					xpt_print(ccb->ccb_h.path,
+					    "Unknown SRB flag: %d for op: %s\n",
+					    srb_status,
+					    scsi_op_desc(cmd->opcode, NULL));
+				}
+			}
+			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+		} else {
+			ccb->ccb_h.status |= CAM_REQ_CMP;
+		}
+
+		if (cmd->opcode == INQUIRY &&
+		    srb_status == SRB_STATUS_SUCCESS) {
+			int resp_xfer_len, resp_buf_len, data_len;
+			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
+			struct scsi_inquiry_data *inq_data =
+			    (struct scsi_inquiry_data *)csio->data_ptr;
+
+			/* Get the buffer length reported by host */
+			resp_xfer_len = vm_srb->transfer_len;
+
+			/* Get the available buffer length */
+			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+			data_len = (resp_buf_len < resp_xfer_len) ?
+			    resp_buf_len : resp_xfer_len;
+			if (bootverbose && data_len >= 5) {
+				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
+				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
+				    resp_buf[0], resp_buf[1], resp_buf[2],
+				    resp_buf[3], resp_buf[4]);
+			}
+			/*
+			 * XXX: Hyper-V (since win2012r2) responses inquiry with
+			 * unknown version (0) for GEN-2 DVD device.
+			 * Manually set the version number to SPC3 in order to
+			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
+			 * see probedone() in scsi_xpt.c
+			 */
+			if (SID_TYPE(inq_data) == T_CDROM &&
+			    inq_data->version == 0 &&
+			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
+				inq_data->version = SCSI_REV_SPC3;
+				if (bootverbose) {
+					xpt_print(ccb->ccb_h.path,
+					    "set version from 0 to %d\n",
+					    inq_data->version);
+				}
+			}
+			/*
+			 * XXX: Manually fix the wrong response returned from WS2012
+			 */
+			if (!is_scsi_valid(inq_data) &&
+			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
+			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
+			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
+				if (data_len >= 4 &&
+				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
+					resp_buf[2] = SCSI_REV_SPC3;
+					resp_buf[3] = 2; // resp fmt must be 2
+					if (bootverbose)
+						xpt_print(ccb->ccb_h.path,
+						    "fix version and resp fmt for 0x%x\n",
+						    vmstor_proto_version);
+				}
+			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
+				char vendor[16];
+
+				cam_strvis(vendor, inq_data->vendor,
+				    sizeof(inq_data->vendor), sizeof(vendor));
+				/*
+				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
+				 * WIN2012 R2 in order to support UNMAP feature.
+				 */
+				if (!strncmp(vendor, "Msft", 4) &&
+				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
+				    (vmstor_proto_version ==
+				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
+				     vmstor_proto_version ==
+				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
+					inq_data->version = SCSI_REV_SPC3;
+					if (bootverbose) {
+						xpt_print(ccb->ccb_h.path,
+						    "storvsc upgrades "
+						    "SPC2 to SPC3\n");
+					}
+				}
+			}
+		}
+	} else {
+		/**
+		 * On Some Windows hosts TEST_UNIT_READY command can return
+		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
+		 * "(Medium not present - tray closed)". This error can be
+		 * ignored since it will be sent to host periodically.
+		 */
+		boolean_t unit_not_ready = \
+		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
+		    cmd->opcode == TEST_UNIT_READY &&
+		    srb_status == SRB_STATUS_ERROR;
+		if (!unit_not_ready && bootverbose) {
+			mtx_lock(&sc->hs_lock);
+			xpt_print(ccb->ccb_h.path,
+				"storvsc scsi_status = %d, srb_status = %d\n",
+				vm_srb->scsi_status, srb_status);
+			mtx_unlock(&sc->hs_lock);
+		}
+		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
+	}
+
+	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
+	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
+
+	if (reqp->sense_info_len != 0) {
+		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
+		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
+	}
+
+	mtx_lock(&sc->hs_lock);
+	if (reqp->softc->hs_frozen == 1) {
+		xpt_print(ccb->ccb_h.path,
+			"%u: storvsc unfreezing softc 0x%p.\n",
+			ticks, reqp->softc);
+		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
+		reqp->softc->hs_frozen = 0;
+	}
+	storvsc_free_request(sc, reqp);
+	mtx_unlock(&sc->hs_lock);
+
+	xpt_done_direct(ccb);
+}
+
+/**
+ * @brief Free a request structure
+ *
+ * Free a request structure by returning it to the free list
+ *
+ * @param sc pointer to a softc
+ * @param reqp pointer to a request structure
+ */	
+static void
+storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
+{
+
+	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
+}
+
+/**
+ * @brief Determine type of storage device from GUID
+ *
+ * Using the type GUID, determine if this is a StorVSC (paravirtual
+ * SCSI or BlkVSC (paravirtual IDE) device.
+ *
+ * @param dev a device
+ * returns an enum
+ */
+static enum hv_storage_type
+storvsc_get_storage_type(device_t dev)
+{
+	device_t parent = device_get_parent(dev);
+
+	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
+		return DRIVER_BLKVSC;
+	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
+		return DRIVER_STORVSC;
+	return DRIVER_UNKNOWN;
+}
+
+#define	PCI_VENDOR_INTEL	0x8086
+#define	PCI_PRODUCT_PIIX4	0x7111
+
+static void
+storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
+    struct ata_params *ident_buf __unused, int *veto)
+{
+
+	/*
+	 * The ATA disks are shared with the controllers managed
+	 * by this driver, so veto the ATA disks' attachment; the
+	 * ATA disks will be attached as SCSI disks once this driver
+	 * attached.
+	 */
+	if (path->device->protocol == PROTO_ATA) {
+		struct ccb_pathinq cpi;
+
+		bzero(&cpi, sizeof(cpi));
+		xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE);
+		cpi.ccb_h.func_code = XPT_PATH_INQ;
+		xpt_action((union ccb *)&cpi);
+		if (cpi.ccb_h.status == CAM_REQ_CMP &&
+		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
+		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
+			(*veto)++;
+			if (bootverbose) {
+				xpt_print(path,
+				    "Disable ATA disks on "
+				    "simulated ATA controller (0x%04x%04x)\n",
+				    cpi.hba_device, cpi.hba_vendor);
+			}
+		}
+	}
+}
+
+static void
+storvsc_sysinit(void *arg __unused)
+{
+	if (vm_guest == VM_GUEST_HV) {
+		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
+		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
+	}
+}
+SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
+    NULL);
+
+static void
+storvsc_sysuninit(void *arg __unused)
+{
+	if (storvsc_handler_tag != NULL)
+		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
+}
+SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
+    storvsc_sysuninit, NULL);


Property changes on: trunk/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/storvsc/hv_vstorage.h
===================================================================
--- trunk/sys/dev/hyperv/storvsc/hv_vstorage.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/storvsc/hv_vstorage.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,287 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/storvsc/hv_vstorage.h 324461 2017-10-10 02:22:34Z sephe $
+ */
+
+#ifndef __HV_VSTORAGE_H__
+#define __HV_VSTORAGE_H__
+
+/*
+ * Major/minor macros.  Minor version is in LSB, meaning that earlier flat
+ * version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1).
+ */
+
+#define VMSTOR_PROTOCOL_MAJOR(VERSION_)         (((VERSION_) >> 8) & 0xff)
+#define VMSTOR_PROTOCOL_MINOR(VERSION_)         (((VERSION_)     ) & 0xff)
+#define VMSTOR_PROTOCOL_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \
+                                                 (((MINOR_) & 0xff)     ))
+
+#define VMSTOR_PROTOCOL_VERSION_WIN6       VMSTOR_PROTOCOL_VERSION(2, 0)
+#define VMSTOR_PROTOCOL_VERSION_WIN7       VMSTOR_PROTOCOL_VERSION(4, 2)
+#define VMSTOR_PROTOCOL_VERSION_WIN8       VMSTOR_PROTOCOL_VERSION(5, 1)
+#define VMSTOR_PROTOCOL_VERSION_WIN8_1     VMSTOR_PROTOCOL_VERSION(6, 0)
+#define VMSTOR_PROTOCOL_VERSION_WIN10      VMSTOR_PROTOCOL_VERSION(6, 2)
+/*
+ * Invalid version.
+ */
+#define VMSTOR_INVALID_PROTOCOL_VERSION  -1
+
+/*
+ * Version history:
+ * V1 Beta                    0.1
+ * V1 RC < 2008/1/31          1.0
+ * V1 RC > 2008/1/31          2.0
+ * Win7: 4.2
+ * Win8: 5.1
+ */
+
+#define VMSTOR_PROTOCOL_VERSION_CURRENT	VMSTOR_PROTOCOL_VERSION(5, 1)
+
+/**
+ *  Packet structure ops describing virtual storage requests.
+ */
+enum vstor_packet_ops {
+	VSTOR_OPERATION_COMPLETEIO            = 1,
+	VSTOR_OPERATION_REMOVEDEVICE          = 2,
+	VSTOR_OPERATION_EXECUTESRB            = 3,
+	VSTOR_OPERATION_RESETLUN              = 4,
+	VSTOR_OPERATION_RESETADAPTER          = 5,
+	VSTOR_OPERATION_RESETBUS              = 6,
+	VSTOR_OPERATION_BEGININITIALIZATION   = 7,
+	VSTOR_OPERATION_ENDINITIALIZATION     = 8,
+	VSTOR_OPERATION_QUERYPROTOCOLVERSION  = 9,
+	VSTOR_OPERATION_QUERYPROPERTIES       = 10,
+	VSTOR_OPERATION_ENUMERATE_BUS         = 11,
+	VSTOR_OPERATION_FCHBA_DATA            = 12,
+	VSTOR_OPERATION_CREATE_MULTI_CHANNELS = 13,
+	VSTOR_OPERATION_MAXIMUM               = 13
+};
+
+
+/*
+ *  Platform neutral description of a scsi request -
+ *  this remains the same across the write regardless of 32/64 bit
+ *  note: it's patterned off the Windows DDK SCSI_PASS_THROUGH structure
+ */
+
+#define CDB16GENERIC_LENGTH			0x10
+#define SENSE_BUFFER_SIZE			0x14
+#define MAX_DATA_BUFFER_LENGTH_WITH_PADDING	0x14
+
+#define POST_WIN7_STORVSC_SENSE_BUFFER_SIZE	0x14
+#define PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE	0x12
+
+
+struct vmscsi_win8_extension {
+	/*
+	 * The following were added in Windows 8
+	 */
+	uint16_t reserve;
+	uint8_t  queue_tag;
+	uint8_t  queue_action;
+	uint32_t srb_flags;
+	uint32_t time_out_value;
+	uint32_t queue_sort_ey;
+} __packed;
+
+struct vmscsi_req {
+	uint16_t length;
+	uint8_t  srb_status;
+	uint8_t  scsi_status;
+
+	/* HBA number, set to the order number detected by initiator. */
+	uint8_t  port;
+	/* SCSI bus number or bus_id, different from CAM's path_id. */
+	uint8_t  path_id;
+
+	uint8_t  target_id;
+	uint8_t  lun;
+
+	uint8_t  cdb_len;
+	uint8_t  sense_info_len;
+	uint8_t  data_in;
+	uint8_t  reserved;
+
+	uint32_t transfer_len;
+
+	union {
+	    uint8_t cdb[CDB16GENERIC_LENGTH];
+
+	    uint8_t sense_data[SENSE_BUFFER_SIZE];
+
+	    uint8_t reserved_array[MAX_DATA_BUFFER_LENGTH_WITH_PADDING];
+	} u;
+
+	/*
+	 * The following was added in win8.
+	 */
+	struct vmscsi_win8_extension win8_extension;
+
+} __packed;
+
+/**
+ *  This structure is sent during the initialization phase to get the different
+ *  properties of the channel.
+ */
+
+struct vmstor_chan_props {
+	uint16_t proto_ver;
+	uint8_t  path_id;
+	uint8_t  target_id;
+
+	uint16_t max_channel_cnt;
+
+	/**
+	 * Note: port number is only really known on the client side
+	 */
+	uint16_t port;
+	uint32_t flags;
+	uint32_t max_transfer_bytes;
+
+	/**
+	 *  This id is unique for each channel and will correspond with
+	 *  vendor specific data in the inquiry_ata
+	 */
+	uint64_t unique_id;
+
+} __packed;
+
+/**
+ *  This structure is sent during the storage protocol negotiations.
+ */
+
+struct vmstor_proto_ver
+{
+	/**
+	 * Major (MSW) and minor (LSW) version numbers.
+	 */
+	uint16_t major_minor;
+
+	uint16_t revision;			/* always zero */
+} __packed;
+
+/**
+ * Channel Property Flags
+ */
+
+#define STORAGE_CHANNEL_REMOVABLE_FLAG                  0x1
+#define STORAGE_CHANNEL_EMULATED_IDE_FLAG               0x2
+
+
+struct vstor_packet {
+	/**
+	 * Requested operation type
+	 */
+	enum vstor_packet_ops operation;
+
+	/*
+	 * Flags - see below for values
+	 */
+	uint32_t flags;
+
+	/**
+	 * Status of the request returned from the server side.
+	 */
+	uint32_t status;
+
+	union
+	{
+	    /**
+	     * Structure used to forward SCSI commands from the client to
+	     * the server.
+	     */
+	    struct vmscsi_req vm_srb;
+
+	    /**
+	     * Structure used to query channel properties.
+	     */
+	    struct vmstor_chan_props chan_props;
+
+	    /**
+	     * Used during version negotiations.
+	     */
+	    struct vmstor_proto_ver version;
+
+	    /**
+             * Number of multichannels to create
+	     */
+	    uint16_t multi_channels_cnt;
+	} u;
+
+} __packed;
+
+
+/**
+ * SRB (SCSI Request Block) Status Codes
+ */
+#define SRB_STATUS_PENDING		0x00
+#define SRB_STATUS_SUCCESS		0x01
+#define SRB_STATUS_ABORTED		0x02
+#define SRB_STATUS_ERROR 		0x04
+#define SRB_STATUS_INVALID_LUN          0x20
+/**
+ * SRB Status Masks (can be combined with above status codes)
+ */
+#define SRB_STATUS_QUEUE_FROZEN         0x40
+#define SRB_STATUS_AUTOSENSE_VALID      0x80
+
+#define SRB_STATUS(status)	\
+	((status) & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
+/*
+ * SRB Flag Bits
+ */
+
+#define SRB_FLAGS_QUEUE_ACTION_ENABLE           0x00000002
+#define SRB_FLAGS_DISABLE_DISCONNECT            0x00000004
+#define SRB_FLAGS_DISABLE_SYNCH_TRANSFER        0x00000008
+#define SRB_FLAGS_BYPASS_FROZEN_QUEUE           0x00000010
+#define SRB_FLAGS_DISABLE_AUTOSENSE             0x00000020
+#define SRB_FLAGS_DATA_IN                       0x00000040
+#define SRB_FLAGS_DATA_OUT                      0x00000080
+#define SRB_FLAGS_NO_DATA_TRANSFER              0x00000000
+#define SRB_FLAGS_UNSPECIFIED_DIRECTION (SRB_FLAGS_DATA_IN | SRB_FLAGS_DATA_OUT)
+#define SRB_FLAGS_NO_QUEUE_FREEZE               0x00000100
+#define SRB_FLAGS_ADAPTER_CACHE_ENABLE          0x00000200
+#define SRB_FLAGS_FREE_SENSE_BUFFER             0x00000400
+/**
+ *  Packet flags
+ */
+
+/**
+ *  This flag indicates that the server should send back a completion for this
+ *  packet.
+ */
+#define REQUEST_COMPLETION_FLAG	0x1
+
+/**
+ *  This is the set of flags that the vsc can set in any packets it sends
+ */
+#define VSC_LEGAL_FLAGS (REQUEST_COMPLETION_FLAG)
+
+#endif /* __HV_VSTORAGE_H__ */


Property changes on: trunk/sys/dev/hyperv/storvsc/hv_vstorage.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/hv_kvp.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/hv_kvp.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/hv_kvp.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,921 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ *	Author:	Sainath Varanasi.
+ *	Date:	4/2012
+ *	Email:	bsdic at microsoft.com
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/hv_kvp.c 324461 2017-10-10 02:22:34Z sephe $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/lock.h>
+#include <sys/taskqueue.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/un.h>
+#include <sys/endian.h>
+#include <sys/_null.h>
+#include <sys/sema.h>
+#include <sys/signal.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/mutex.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/utilities/hv_utilreg.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#include "unicode.h"
+#include "hv_kvp.h"
+#include "vmbus_if.h"
+
+/* hv_kvp defines */
+#define BUFFERSIZE	sizeof(struct hv_kvp_msg)
+#define kvp_hdr		hdr.kvp_hdr
+
+#define KVP_FWVER_MAJOR		3
+#define KVP_FWVER		VMBUS_IC_VERSION(KVP_FWVER_MAJOR, 0)
+
+#define KVP_MSGVER_MAJOR	4
+#define KVP_MSGVER		VMBUS_IC_VERSION(KVP_MSGVER_MAJOR, 0)
+
+/* hv_kvp debug control */
+static int hv_kvp_log = 0;
+
+#define	hv_kvp_log_error(...)	do {				\
+	if (hv_kvp_log > 0)				\
+		log(LOG_ERR, "hv_kvp: " __VA_ARGS__);	\
+} while (0)
+
+#define	hv_kvp_log_info(...) do {				\
+	if (hv_kvp_log > 1)				\
+		log(LOG_INFO, "hv_kvp: " __VA_ARGS__);		\
+} while (0)
+
+static const struct vmbus_ic_desc vmbus_kvp_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
+		    0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6 } },
+		.ic_desc = "Hyper-V KVP"
+	},
+	VMBUS_IC_DESC_END
+};
+
+/* character device prototypes */
+static d_open_t		hv_kvp_dev_open;
+static d_close_t	hv_kvp_dev_close;
+static d_read_t		hv_kvp_dev_daemon_read;
+static d_write_t	hv_kvp_dev_daemon_write;
+static d_poll_t		hv_kvp_dev_daemon_poll;
+
+/* hv_kvp character device structure */
+static struct cdevsw hv_kvp_cdevsw =
+{
+	.d_version	= D_VERSION,
+	.d_open		= hv_kvp_dev_open,
+	.d_close	= hv_kvp_dev_close,
+	.d_read		= hv_kvp_dev_daemon_read,
+	.d_write	= hv_kvp_dev_daemon_write,
+	.d_poll		= hv_kvp_dev_daemon_poll,
+	.d_name		= "hv_kvp_dev",
+};
+
+
+/*
+ * Global state to track and synchronize multiple
+ * KVP transaction requests from the host.
+ */
+typedef struct hv_kvp_sc {
+	struct vmbus_ic_softc	util_sc;
+	device_t		dev;
+
+	/* Unless specified the pending mutex should be
+	 * used to alter the values of the following paramters:
+	 * 1. req_in_progress
+	 * 2. req_timed_out
+	 */
+	struct mtx		pending_mutex;
+
+	struct task		task;
+
+	/* To track if transaction is active or not */
+	boolean_t		req_in_progress;
+	/* Tracks if daemon did not reply back in time */
+	boolean_t		req_timed_out;
+	/* Tracks if daemon is serving a request currently */
+	boolean_t		daemon_busy;
+
+	/* Length of host message */
+	uint32_t		host_msg_len;
+
+	/* Host message id */
+	uint64_t		host_msg_id;
+
+	/* Current kvp message from the host */
+	struct hv_kvp_msg	*host_kvp_msg;
+
+	 /* Current kvp message for daemon */
+	struct hv_kvp_msg	daemon_kvp_msg;
+
+	/* Rcv buffer for communicating with the host*/
+	uint8_t			*rcv_buf;
+
+	/* Device semaphore to control communication */
+	struct sema		dev_sema;
+
+	/* Indicates if daemon registered with driver */
+	boolean_t		register_done;
+
+	/* Character device status */
+	boolean_t		dev_accessed;
+
+	struct cdev *hv_kvp_dev;
+
+	struct proc *daemon_task;
+
+	struct selinfo hv_kvp_selinfo;
+} hv_kvp_sc;
+
+/* hv_kvp prototypes */
+static int	hv_kvp_req_in_progress(hv_kvp_sc *sc);
+static void	hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t, uint64_t, uint8_t *);
+static void	hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc);
+static void	hv_kvp_process_request(void *context, int pending);
+
+/*
+ * hv_kvp low level functions
+ */
+
+/*
+ * Check if kvp transaction is in progres
+ */
+static int
+hv_kvp_req_in_progress(hv_kvp_sc *sc)
+{
+
+	return (sc->req_in_progress);
+}
+
+
+/*
+ * This routine is called whenever a message is received from the host
+ */
+static void
+hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t rcv_len,
+			uint64_t request_id, uint8_t *rcv_buf)
+{
+
+	/* Store all the relevant message details in the global structure */
+	/* Do not need to use mutex for req_in_progress here */
+	sc->req_in_progress = true;
+	sc->host_msg_len = rcv_len;
+	sc->host_msg_id = request_id;
+	sc->rcv_buf = rcv_buf;
+	sc->host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
+	    sizeof(struct hv_vmbus_pipe_hdr) +
+	    sizeof(struct hv_vmbus_icmsg_hdr)];
+}
+
+/*
+ * Convert ip related info in umsg from utf8 to utf16 and store in hmsg
+ */
+static int
+hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
+				    struct hv_kvp_ip_msg *host_ip_msg)
+{
+	int err_ip, err_subnet, err_gway, err_dns, err_adap;
+	int UNUSED_FLAG = 1;
+
+	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
+	    MAX_IP_ADDR_SIZE,
+	    (char *)umsg->body.kvp_ip_val.ip_addr,
+	    strlen((char *)umsg->body.kvp_ip_val.ip_addr),
+	    UNUSED_FLAG,
+	    &err_ip);
+	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
+	    MAX_IP_ADDR_SIZE,
+	    (char *)umsg->body.kvp_ip_val.sub_net,
+	    strlen((char *)umsg->body.kvp_ip_val.sub_net),
+	    UNUSED_FLAG,
+	    &err_subnet);
+	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
+	    MAX_GATEWAY_SIZE,
+	    (char *)umsg->body.kvp_ip_val.gate_way,
+	    strlen((char *)umsg->body.kvp_ip_val.gate_way),
+	    UNUSED_FLAG,
+	    &err_gway);
+	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
+	    MAX_IP_ADDR_SIZE,
+	    (char *)umsg->body.kvp_ip_val.dns_addr,
+	    strlen((char *)umsg->body.kvp_ip_val.dns_addr),
+	    UNUSED_FLAG,
+	    &err_dns);
+	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
+	    MAX_ADAPTER_ID_SIZE,
+	    (char *)umsg->body.kvp_ip_val.adapter_id,
+	    strlen((char *)umsg->body.kvp_ip_val.adapter_id),
+	    UNUSED_FLAG,
+	    &err_adap);
+
+	host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled;
+	host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family;
+
+	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
+}
+
+
+/*
+ * Convert ip related info in hmsg from utf16 to utf8 and store in umsg
+ */
+static int
+hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
+				    struct hv_kvp_msg *umsg)
+{
+	int err_ip, err_subnet, err_gway, err_dns, err_adap;
+	int UNUSED_FLAG = 1;
+	device_t *devs;
+	int devcnt;
+
+	/* IP Address */
+	utf16_to_utf8((char *)umsg->body.kvp_ip_val.ip_addr,
+	    MAX_IP_ADDR_SIZE,
+	    (uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
+	    MAX_IP_ADDR_SIZE,
+	    UNUSED_FLAG,
+	    &err_ip);
+
+	/* Adapter ID : GUID */
+	utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
+	    MAX_ADAPTER_ID_SIZE,
+	    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
+	    MAX_ADAPTER_ID_SIZE,
+	    UNUSED_FLAG,
+	    &err_adap);
+
+	if (devclass_get_devices(devclass_find("hn"), &devs, &devcnt) == 0) {
+		for (devcnt = devcnt - 1; devcnt >= 0; devcnt--) {
+			device_t dev = devs[devcnt];
+			struct vmbus_channel *chan;
+			char buf[HYPERV_GUID_STRLEN];
+			int n;
+
+			chan = vmbus_get_channel(dev);
+			n = hyperv_guid2str(vmbus_chan_guid_inst(chan), buf,
+			    sizeof(buf));
+
+			/*
+			 * The string in the 'kvp_ip_val.adapter_id' has
+			 * braces around the GUID; skip the leading brace
+			 * in 'kvp_ip_val.adapter_id'.
+			 */
+			if (strncmp(buf,
+			    ((char *)&umsg->body.kvp_ip_val.adapter_id) + 1,
+			    n) == 0) {
+				strlcpy((char *)umsg->body.kvp_ip_val.adapter_id,
+				    device_get_nameunit(dev), MAX_ADAPTER_ID_SIZE);
+				break;
+			}
+		}
+		free(devs, M_TEMP);
+	}
+
+	/* Address Family , DHCP , SUBNET, Gateway, DNS */
+	umsg->kvp_hdr.operation = host_ip_msg->operation;
+	umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family;
+	umsg->body.kvp_ip_val.dhcp_enabled = host_ip_msg->kvp_ip_val.dhcp_enabled;
+	utf16_to_utf8((char *)umsg->body.kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE,
+	    (uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
+	    MAX_IP_ADDR_SIZE,
+	    UNUSED_FLAG,
+	    &err_subnet);
+
+	utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE,
+	    (uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
+	    MAX_GATEWAY_SIZE,
+	    UNUSED_FLAG,
+	    &err_gway);
+
+	utf16_to_utf8((char *)umsg->body.kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE,
+	    (uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
+	    MAX_IP_ADDR_SIZE,
+	    UNUSED_FLAG,
+	    &err_dns);
+
+	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
+}
+
+
+/*
+ * Prepare a user kvp msg based on host kvp msg (utf16 to utf8)
+ * Ensure utf16_utf8 takes care of the additional string terminating char!!
+ */
+static void
+hv_kvp_convert_hostmsg_to_usermsg(struct hv_kvp_msg *hmsg, struct hv_kvp_msg *umsg)
+{
+	int utf_err = 0;
+	uint32_t value_type;
+	struct hv_kvp_ip_msg *host_ip_msg;
+
+	host_ip_msg = (struct hv_kvp_ip_msg*)hmsg;
+	memset(umsg, 0, sizeof(struct hv_kvp_msg));
+
+	umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation;
+	umsg->kvp_hdr.pool = hmsg->kvp_hdr.pool;
+
+	switch (umsg->kvp_hdr.operation) {
+	case HV_KVP_OP_SET_IP_INFO:
+		hv_kvp_convert_utf16_ipinfo_to_utf8(host_ip_msg, umsg);
+		break;
+
+	case HV_KVP_OP_GET_IP_INFO:
+		utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
+		    MAX_ADAPTER_ID_SIZE,
+		    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
+		    MAX_ADAPTER_ID_SIZE, 1, &utf_err);
+
+		umsg->body.kvp_ip_val.addr_family =
+		    host_ip_msg->kvp_ip_val.addr_family;
+		break;
+
+	case HV_KVP_OP_SET:
+		value_type = hmsg->body.kvp_set.data.value_type;
+
+		switch (value_type) {
+		case HV_REG_SZ:
+			umsg->body.kvp_set.data.value_size =
+			    utf16_to_utf8(
+				(char *)umsg->body.kvp_set.data.msg_value.value,
+				HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1,
+				(uint16_t *)hmsg->body.kvp_set.data.msg_value.value,
+				hmsg->body.kvp_set.data.value_size,
+				1, &utf_err);
+			/* utf8 encoding */
+			umsg->body.kvp_set.data.value_size =
+			    umsg->body.kvp_set.data.value_size / 2;
+			break;
+
+		case HV_REG_U32:
+			umsg->body.kvp_set.data.value_size =
+			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%d",
+				hmsg->body.kvp_set.data.msg_value.value_u32) + 1;
+			break;
+
+		case HV_REG_U64:
+			umsg->body.kvp_set.data.value_size =
+			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%llu",
+				(unsigned long long)
+				hmsg->body.kvp_set.data.msg_value.value_u64) + 1;
+			break;
+		}
+
+		umsg->body.kvp_set.data.key_size =
+		    utf16_to_utf8(
+			umsg->body.kvp_set.data.key,
+			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
+			(uint16_t *)hmsg->body.kvp_set.data.key,
+			hmsg->body.kvp_set.data.key_size,
+			1, &utf_err);
+
+		/* utf8 encoding */
+		umsg->body.kvp_set.data.key_size =
+		    umsg->body.kvp_set.data.key_size / 2;
+		break;
+
+	case HV_KVP_OP_GET:
+		umsg->body.kvp_get.data.key_size =
+		    utf16_to_utf8(umsg->body.kvp_get.data.key,
+			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
+			(uint16_t *)hmsg->body.kvp_get.data.key,
+			hmsg->body.kvp_get.data.key_size,
+			1, &utf_err);
+		/* utf8 encoding */
+		umsg->body.kvp_get.data.key_size =
+		    umsg->body.kvp_get.data.key_size / 2;
+		break;
+
+	case HV_KVP_OP_DELETE:
+		umsg->body.kvp_delete.key_size =
+		    utf16_to_utf8(umsg->body.kvp_delete.key,
+			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
+			(uint16_t *)hmsg->body.kvp_delete.key,
+			hmsg->body.kvp_delete.key_size,
+			1, &utf_err);
+		/* utf8 encoding */
+		umsg->body.kvp_delete.key_size =
+		    umsg->body.kvp_delete.key_size / 2;
+		break;
+
+	case HV_KVP_OP_ENUMERATE:
+		umsg->body.kvp_enum_data.index =
+		    hmsg->body.kvp_enum_data.index;
+		break;
+
+	default:
+		hv_kvp_log_info("%s: daemon_kvp_msg: Invalid operation : %d\n",
+		    __func__, umsg->kvp_hdr.operation);
+	}
+}
+
+
+/*
+ * Prepare a host kvp msg based on user kvp msg (utf8 to utf16)
+ */
+static int
+hv_kvp_convert_usermsg_to_hostmsg(struct hv_kvp_msg *umsg, struct hv_kvp_msg *hmsg)
+{
+	int hkey_len = 0, hvalue_len = 0, utf_err = 0;
+	struct hv_kvp_exchg_msg_value *host_exchg_data;
+	char *key_name, *value;
+
+	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg;
+
+	switch (hmsg->kvp_hdr.operation) {
+	case HV_KVP_OP_GET_IP_INFO:
+		return (hv_kvp_convert_utf8_ipinfo_to_utf16(umsg, host_ip_msg));
+
+	case HV_KVP_OP_SET_IP_INFO:
+	case HV_KVP_OP_SET:
+	case HV_KVP_OP_DELETE:
+		return (0);
+
+	case HV_KVP_OP_ENUMERATE:
+		host_exchg_data = &hmsg->body.kvp_enum_data.data;
+		key_name = umsg->body.kvp_enum_data.data.key;
+		hkey_len = utf8_to_utf16((uint16_t *)host_exchg_data->key,
+				((HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2) - 2),
+				key_name, strlen(key_name),
+				1, &utf_err);
+		/* utf16 encoding */
+		host_exchg_data->key_size = 2 * (hkey_len + 1);
+		value = umsg->body.kvp_enum_data.data.msg_value.value;
+		hvalue_len = utf8_to_utf16(
+				(uint16_t *)host_exchg_data->msg_value.value,
+				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
+				value, strlen(value),
+				1, &utf_err);
+		host_exchg_data->value_size = 2 * (hvalue_len + 1);
+		host_exchg_data->value_type = HV_REG_SZ;
+
+		if ((hkey_len < 0) || (hvalue_len < 0))
+			return (EINVAL);
+
+		return (0);
+
+	case HV_KVP_OP_GET:
+		host_exchg_data = &hmsg->body.kvp_get.data;
+		value = umsg->body.kvp_get.data.msg_value.value;
+		hvalue_len = utf8_to_utf16(
+				(uint16_t *)host_exchg_data->msg_value.value,
+				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
+				value, strlen(value),
+				1, &utf_err);
+		/* Convert value size to uft16 */
+		host_exchg_data->value_size = 2 * (hvalue_len + 1);
+		/* Use values by string */
+		host_exchg_data->value_type = HV_REG_SZ;
+
+		if (hvalue_len < 0)
+			return (EINVAL);
+
+		return (0);
+
+	default:
+		return (EINVAL);
+	}
+}
+
+
+/*
+ * Send the response back to the host.
+ */
+static void
+hv_kvp_respond_host(hv_kvp_sc *sc, uint32_t error)
+{
+	struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp;
+
+	hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *)
+	    &sc->rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+	hv_icmsg_hdrp->status = error;
+	hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+	    HV_ICMSGHDRFLAG_RESPONSE;
+
+	error = vmbus_chan_send(vmbus_get_channel(sc->dev),
+	    VMBUS_CHANPKT_TYPE_INBAND, 0, sc->rcv_buf, sc->host_msg_len,
+	    sc->host_msg_id);
+	if (error)
+		hv_kvp_log_info("%s: hv_kvp_respond_host: sendpacket error:%d\n",
+			__func__, error);
+}
+
+
+/*
+ * This is the main kvp kernel process that interacts with both user daemon
+ * and the host
+ */
+static void
+hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc)
+{
+	struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+	struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
+
+	/* Prepare kvp_msg to be sent to user */
+	hv_kvp_convert_hostmsg_to_usermsg(hmsg, umsg);
+
+	/* Send the msg to user via function deamon_read - setting sema */
+	sema_post(&sc->dev_sema);
+
+	/* We should wake up the daemon, in case it's doing poll() */
+	selwakeup(&sc->hv_kvp_selinfo);
+}
+
+
+/*
+ * Function to read the kvp request buffer from host
+ * and interact with daemon
+ */
+static void
+hv_kvp_process_request(void *context, int pending)
+{
+	uint8_t *kvp_buf;
+	struct vmbus_channel *channel;
+	uint32_t recvlen = 0;
+	uint64_t requestid;
+	struct hv_vmbus_icmsg_hdr *icmsghdrp;
+	int ret = 0, error;
+	hv_kvp_sc *sc;
+
+	hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__);
+
+	sc = (hv_kvp_sc*)context;
+	kvp_buf = sc->util_sc.ic_buf;
+	channel = vmbus_get_channel(sc->dev);
+
+	recvlen = sc->util_sc.ic_buflen;
+	ret = vmbus_chan_recv(channel, kvp_buf, &recvlen, &requestid);
+	KASSERT(ret != ENOBUFS, ("hvkvp recvbuf is not large enough"));
+	/* XXX check recvlen to make sure that it contains enough data */
+
+	while ((ret == 0) && (recvlen > 0)) {
+		icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+		    &kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+		hv_kvp_transaction_init(sc, recvlen, requestid, kvp_buf);
+		if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+			error = vmbus_ic_negomsg(&sc->util_sc,
+			    kvp_buf, &recvlen, KVP_FWVER, KVP_MSGVER);
+			/* XXX handle vmbus_ic_negomsg failure. */
+			if (!error)
+				hv_kvp_respond_host(sc, HV_S_OK);
+			else
+				hv_kvp_respond_host(sc, HV_E_FAIL);
+			/*
+			 * It is ok to not acquire the mutex before setting
+			 * req_in_progress here because negotiation is the
+			 * first thing that happens and hence there is no
+			 * chance of a race condition.
+			 */
+
+			sc->req_in_progress = false;
+			hv_kvp_log_info("%s :version negotiated\n", __func__);
+
+		} else {
+			if (!sc->daemon_busy) {
+
+				hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
+				mtx_lock(&sc->pending_mutex);
+				sc->req_timed_out = false;
+				sc->daemon_busy = true;
+				mtx_unlock(&sc->pending_mutex);
+
+				hv_kvp_send_msg_to_daemon(sc);
+				hv_kvp_log_info("%s: waiting for daemon\n", __func__);
+			}
+
+			/* Wait 5 seconds for daemon to respond back */
+			tsleep(sc, 0, "kvpworkitem", 5 * hz);
+			hv_kvp_log_info("%s: came out of wait\n", __func__);
+		}
+
+		mtx_lock(&sc->pending_mutex);
+
+		/* Notice that once req_timed_out is set to true
+		 * it will remain true until the next request is
+		 * sent to the daemon. The response from daemon
+		 * is forwarded to host only when this flag is
+		 * false.
+		 */
+		sc->req_timed_out = true;
+
+		/*
+		 * Cancel request if so need be.
+		 */
+		if (hv_kvp_req_in_progress(sc)) {
+			hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__);
+			hv_kvp_respond_host(sc, HV_E_FAIL);
+			sc->req_in_progress = false;
+		}
+
+		mtx_unlock(&sc->pending_mutex);
+
+		/*
+		 * Try reading next buffer
+		 */
+		recvlen = sc->util_sc.ic_buflen;
+		ret = vmbus_chan_recv(channel, kvp_buf, &recvlen, &requestid);
+		KASSERT(ret != ENOBUFS, ("hvkvp recvbuf is not large enough"));
+		/* XXX check recvlen to make sure that it contains enough data */
+
+		hv_kvp_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
+			__func__, context, ret, recvlen);
+	}
+}
+
+
+/*
+ * Callback routine that gets called whenever there is a message from host
+ */
+static void
+hv_kvp_callback(struct vmbus_channel *chan __unused, void *context)
+{
+	hv_kvp_sc *sc = (hv_kvp_sc*)context;
+	/*
+	 The first request from host will not be handled until daemon is registered.
+	 when callback is triggered without a registered daemon, callback just return.
+	 When a new daemon gets regsitered, this callbcak is trigged from _write op.
+	*/
+	if (sc->register_done) {
+		hv_kvp_log_info("%s: Queuing work item\n", __func__);
+		taskqueue_enqueue(taskqueue_thread, &sc->task);
+	}
+}
+
+static int
+hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype,
+				struct thread *td)
+{
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
+	hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__);
+	if (sc->dev_accessed)
+		return (-EBUSY);
+
+	sc->daemon_task = curproc;
+	sc->dev_accessed = true;
+	sc->daemon_busy = false;
+	return (0);
+}
+
+
+static int
+hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
+				 struct thread *td __unused)
+{
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
+	hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__);
+	sc->dev_accessed = false;
+	sc->register_done = false;
+	return (0);
+}
+
+
+/*
+ * hv_kvp_daemon read invokes this function
+ * acts as a send to daemon
+ */
+static int
+hv_kvp_dev_daemon_read(struct cdev *dev, struct uio *uio, int ioflag __unused)
+{
+	size_t amt;
+	int error = 0;
+	struct hv_kvp_msg *hv_kvp_dev_buf;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
+	/* Read is not allowed util registering is done. */
+	if (!sc->register_done)
+		return (EPERM);
+
+	sema_wait(&sc->dev_sema);
+
+	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
+	memcpy(hv_kvp_dev_buf, &sc->daemon_kvp_msg, sizeof(struct hv_kvp_msg));
+
+	amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 :
+		BUFFERSIZE + 1 - uio->uio_offset);
+
+	if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0)
+		hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__);
+
+	free(hv_kvp_dev_buf, M_TEMP);
+	return (error);
+}
+
+
+/*
+ * hv_kvp_daemon write invokes this function
+ * acts as a recieve from daemon
+ */
+static int
+hv_kvp_dev_daemon_write(struct cdev *dev, struct uio *uio, int ioflag __unused)
+{
+	size_t amt;
+	int error = 0;
+	struct hv_kvp_msg *hv_kvp_dev_buf;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
+	uio->uio_offset = 0;
+	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
+
+	amt = MIN(uio->uio_resid, BUFFERSIZE);
+	error = uiomove(hv_kvp_dev_buf, amt, uio);
+
+	if (error != 0) {
+		free(hv_kvp_dev_buf, M_TEMP);
+		return (error);
+	}
+	memcpy(&sc->daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
+
+	free(hv_kvp_dev_buf, M_TEMP);
+	if (sc->register_done == false) {
+		if (sc->daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
+			sc->register_done = true;
+			hv_kvp_callback(vmbus_get_channel(sc->dev), dev->si_drv1);
+		}
+		else {
+			hv_kvp_log_info("%s, KVP Registration Failed\n", __func__);
+			return (EINVAL);
+		}
+	} else {
+
+		mtx_lock(&sc->pending_mutex);
+
+		if(!sc->req_timed_out) {
+			struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+			struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
+
+			error = hv_kvp_convert_usermsg_to_hostmsg(umsg, hmsg);
+			hv_kvp_respond_host(sc, umsg->hdr.error);
+			wakeup(sc);
+			sc->req_in_progress = false;
+			if (umsg->hdr.error != HV_S_OK)
+				hv_kvp_log_info("%s, Error 0x%x from daemon\n",
+				    __func__, umsg->hdr.error);
+			if (error)
+				hv_kvp_log_info("%s, Error from convert\n", __func__);
+		}
+
+		sc->daemon_busy = false;
+		mtx_unlock(&sc->pending_mutex);
+	}
+
+	return (error);
+}
+
+
+/*
+ * hv_kvp_daemon poll invokes this function to check if data is available
+ * for daemon to read.
+ */
+static int
+hv_kvp_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
+{
+	int revents = 0;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
+	mtx_lock(&sc->pending_mutex);
+	/*
+	 * We check global flag daemon_busy for the data availiability for
+	 * userland to read. Deamon_busy is set to true before driver has data
+	 * for daemon to read. It is set to false after daemon sends
+	 * then response back to driver.
+	 */
+	if (sc->daemon_busy == true)
+		revents = POLLIN;
+	else
+		selrecord(td, &sc->hv_kvp_selinfo);
+
+	mtx_unlock(&sc->pending_mutex);
+
+	return (revents);
+}
+
+static int
+hv_kvp_probe(device_t dev)
+{
+
+	return (vmbus_ic_probe(dev, vmbus_kvp_descs));
+}
+
+static int
+hv_kvp_attach(device_t dev)
+{
+	int error;
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+
+	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
+
+	sc->dev = dev;
+	sema_init(&sc->dev_sema, 0, "hv_kvp device semaphore");
+	mtx_init(&sc->pending_mutex, "hv-kvp pending mutex",
+		NULL, MTX_DEF);
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_kvp_log",
+	    CTLFLAG_RWTUN, &hv_kvp_log, 0, "Hyperv KVP service log level");
+
+	TASK_INIT(&sc->task, 0, hv_kvp_process_request, sc);
+
+	/* create character device */
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
+			&sc->hv_kvp_dev,
+			&hv_kvp_cdevsw,
+			0,
+			UID_ROOT,
+			GID_WHEEL,
+			0640,
+			"hv_kvp_dev");
+
+	if (error != 0)
+		return (error);
+	sc->hv_kvp_dev->si_drv1 = sc;
+
+	return (vmbus_ic_attach(dev, hv_kvp_callback));
+}
+
+static int
+hv_kvp_detach(device_t dev)
+{
+	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
+
+	if (sc->daemon_task != NULL) {
+		PROC_LOCK(sc->daemon_task);
+		kern_psignal(sc->daemon_task, SIGKILL);
+		PROC_UNLOCK(sc->daemon_task);
+	}
+
+	destroy_dev(sc->hv_kvp_dev);
+	return (vmbus_ic_detach(dev));
+}
+
+static device_method_t kvp_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_kvp_probe),
+	DEVMETHOD(device_attach, hv_kvp_attach),
+	DEVMETHOD(device_detach, hv_kvp_detach),
+	{ 0, 0 }
+};
+
+static driver_t kvp_driver = { "hvkvp", kvp_methods, sizeof(hv_kvp_sc)};
+
+static devclass_t kvp_devclass;
+
+DRIVER_MODULE(hv_kvp, vmbus, kvp_driver, kvp_devclass, NULL, NULL);
+MODULE_VERSION(hv_kvp, 1);
+MODULE_DEPEND(hv_kvp, vmbus, 1, 1, 1);


Property changes on: trunk/sys/dev/hyperv/utilities/hv_kvp.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/hv_kvp.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/hv_kvp.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/hv_kvp.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,228 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _KVP_H
+#define _KVP_H
+/*
+ * An implementation of HyperV key value pair (KVP) functionality for FreeBSD
+ *
+ */
+
+/*
+ * Maximum value size - used for both key names and value data, and includes
+ * any applicable NULL terminators.
+ *
+ * Note:  This limit is somewhat arbitrary, but falls easily within what is
+ * supported for all native guests (back to Win 2000) and what is reasonable
+ * for the IC KVP exchange functionality.  Note that Windows Me/98/95 are
+ * limited to 255 character key names.
+ *
+ * MSDN recommends not storing data values larger than 2048 bytes in the
+ * registry.
+ *
+ * Note:  This value is used in defining the KVP exchange message - this value
+ * cannot be modified without affecting the message size and compatibility.
+ */
+
+/*
+ * bytes, including any null terminators
+ */
+#define HV_KVP_EXCHANGE_MAX_VALUE_SIZE    (2048)
+
+
+/*
+ * Maximum key size - the registry limit for the length of an entry name
+ * is 256 characters, including the null terminator
+ */
+#define HV_KVP_EXCHANGE_MAX_KEY_SIZE    (512)
+
+
+/*
+ * In FreeBSD, we implement the KVP functionality in two components:
+ * 1) The kernel component which is packaged as part of the hv_utils driver
+ * is responsible for communicating with the host and responsible for
+ * implementing the host/guest protocol. 2) A user level daemon that is
+ * responsible for data gathering.
+ *
+ * Host/Guest Protocol: The host iterates over an index and expects the guest
+ * to assign a key name to the index and also return the value corresponding to
+ * the key. The host will have atmost one KVP transaction outstanding at any
+ * given point in time. The host side iteration stops when the guest returns
+ * an error. Microsoft has specified the following mapping of key names to
+ * host specified index:
+ *
+ *  Index		Key Name
+ *	0		FullyQualifiedDomainName
+ *	1		IntegrationServicesVersion
+ *	2		NetworkAddressIPv4
+ *	3		NetworkAddressIPv6
+ *	4		OSBuildNumber
+ *	5		OSName
+ *	6		OSMajorVersion
+ *	7		OSMinorVersion
+ *	8		OSVersion
+ *	9		ProcessorArchitecture
+ *
+ * The Windows host expects the Key Name and Key Value to be encoded in utf16.
+ *
+ * Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the
+ * data gathering functionality in a user mode daemon. The user level daemon
+ * is also responsible for binding the key name to the index as well. The
+ * kernel and user-level daemon communicate using a connector channel.
+ *
+ * The user mode component first registers with the
+ * the kernel component. Subsequently, the kernel component requests, data
+ * for the specified keys. In response to this message the user mode component
+ * fills in the value corresponding to the specified key. We overload the
+ * sequence field in the cn_msg header to define our KVP message types.
+ *
+ *
+ * The kernel component simply acts as a conduit for communication between the
+ * Windows host and the user-level daemon. The kernel component passes up the
+ * index received from the Host to the user-level daemon. If the index is
+ * valid (supported), the corresponding key as well as its
+ * value (both are strings) is returned. If the index is invalid
+ * (not supported), a NULL key string is returned.
+ */
+
+ 
+/*
+ * Registry value types.
+ */
+#define HV_REG_SZ     1
+#define HV_REG_U32    4
+#define HV_REG_U64    8
+
+
+/*
+ * Daemon code supporting IP injection.
+ */
+#define HV_KVP_OP_REGISTER    4
+
+
+enum hv_kvp_exchg_op {
+	HV_KVP_OP_GET = 0,
+	HV_KVP_OP_SET,
+	HV_KVP_OP_DELETE,
+	HV_KVP_OP_ENUMERATE,
+	HV_KVP_OP_GET_IP_INFO,
+	HV_KVP_OP_SET_IP_INFO,
+	HV_KVP_OP_COUNT /* Number of operations, must be last. */
+};
+
+enum hv_kvp_exchg_pool {
+	HV_KVP_POOL_EXTERNAL = 0,
+	HV_KVP_POOL_GUEST,
+	HV_KVP_POOL_AUTO,
+	HV_KVP_POOL_AUTO_EXTERNAL,
+	HV_KVP_POOL_AUTO_INTERNAL,
+	HV_KVP_POOL_COUNT /* Number of pools, must be last. */
+};
+
+#define ADDR_FAMILY_NONE                 0x00
+#define ADDR_FAMILY_IPV4                 0x01
+#define ADDR_FAMILY_IPV6                 0x02
+
+#define MAX_ADAPTER_ID_SIZE              128
+#define MAX_IP_ADDR_SIZE                 1024
+#define MAX_GATEWAY_SIZE                 512
+
+
+struct hv_kvp_ipaddr_value {
+	uint16_t adapter_id[MAX_ADAPTER_ID_SIZE];
+	uint8_t  addr_family;
+	uint8_t  dhcp_enabled;
+	uint16_t ip_addr[MAX_IP_ADDR_SIZE];
+	uint16_t sub_net[MAX_IP_ADDR_SIZE];
+	uint16_t gate_way[MAX_GATEWAY_SIZE];
+	uint16_t dns_addr[MAX_IP_ADDR_SIZE];
+}__attribute__((packed));
+
+struct hv_kvp_hdr {
+	uint8_t                 operation;
+	uint8_t                 pool;
+	uint16_t                pad;
+} __attribute__((packed));
+
+struct hv_kvp_exchg_msg_value {
+	uint32_t value_type;
+	uint32_t key_size;
+	uint32_t value_size;
+	uint8_t  key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+	union {
+		uint8_t  value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+		uint32_t value_u32;
+		uint64_t value_u64;
+	} msg_value;
+} __attribute__((packed));
+
+struct hv_kvp_msg_enumerate {
+	uint32_t index;
+	struct hv_kvp_exchg_msg_value data;
+} __attribute__((packed));
+
+struct hv_kvp_msg_get {
+	struct hv_kvp_exchg_msg_value data;
+} __attribute__((packed));
+
+struct hv_kvp_msg_set {
+	struct hv_kvp_exchg_msg_value data;
+} __attribute__((packed));
+
+struct hv_kvp_msg_delete {
+	uint32_t key_size;
+	uint8_t key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+} __attribute__((packed));
+
+struct hv_kvp_register {
+	uint8_t version[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+} __attribute__((packed));
+
+struct hv_kvp_msg {
+	union {
+		struct hv_kvp_hdr kvp_hdr;
+		uint32_t error;
+	} hdr;
+	union {
+		struct hv_kvp_msg_get		kvp_get;
+		struct hv_kvp_msg_set		kvp_set;
+		struct hv_kvp_msg_delete	kvp_delete;
+		struct hv_kvp_msg_enumerate	kvp_enum_data;
+		struct hv_kvp_ipaddr_value	kvp_ip_val;
+		struct hv_kvp_register		kvp_register;
+	} body;
+} __attribute__((packed));
+
+struct hv_kvp_ip_msg {
+	uint8_t operation;
+	uint8_t pool;
+	struct hv_kvp_ipaddr_value      kvp_ip_val;
+} __attribute__((packed));
+
+#endif /* _KVP_H */


Property changes on: trunk/sys/dev/hyperv/utilities/hv_kvp.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/hv_snapshot.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/hv_snapshot.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/hv_snapshot.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,1062 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/hv_snapshot.c 311230 2017-01-04 02:39:00Z sephe $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/taskqueue.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/kthread.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/un.h>
+#include <sys/endian.h>
+#include <sys/sema.h>
+#include <sys/signal.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/mutex.h>
+#include <sys/callout.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/utilities/hv_utilreg.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#include "hv_snapshot.h"
+#include "vmbus_if.h"
+
+#define VSS_MAJOR		5
+#define VSS_MINOR		0
+#define VSS_MSGVER		VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR)
+
+#define VSS_FWVER_MAJOR		3
+#define VSS_FWVER		VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0)
+
+#define TIMEOUT_LIMIT		(15)	// seconds
+enum hv_vss_op {
+	VSS_OP_CREATE = 0,
+	VSS_OP_DELETE,
+	VSS_OP_HOT_BACKUP,
+	VSS_OP_GET_DM_INFO,
+	VSS_OP_BU_COMPLETE,
+	/*
+	 * Following operations are only supported with IC version >= 5.0
+	 */
+	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
+	VSS_OP_THAW, /* Unfreeze the file systems */
+	VSS_OP_AUTO_RECOVER,
+	VSS_OP_COUNT /* Number of operations, must be last */
+};
+
+/*
+ * Header for all VSS messages.
+ */
+struct hv_vss_hdr {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint8_t			operation;
+	uint8_t			reserved[7];
+} __packed;
+
+
+/*
+ * Flag values for the hv_vss_check_feature. Here supports only
+ * one value.
+ */
+#define VSS_HBU_NO_AUTO_RECOVERY		0x00000005
+
+struct hv_vss_check_feature {
+	uint32_t flags;
+} __packed;
+
+struct hv_vss_check_dm_info {
+	uint32_t flags;
+} __packed;
+
+struct hv_vss_msg {
+	union {
+		struct hv_vss_hdr vss_hdr;
+	} hdr;
+	union {
+		struct hv_vss_check_feature vss_cf;
+		struct hv_vss_check_dm_info dm_info;
+	} body;
+} __packed;
+
+struct hv_vss_req {
+	struct hv_vss_opt_msg	opt_msg;	/* used to communicate with daemon */
+	struct hv_vss_msg	msg;		/* used to communicate with host */
+} __packed;
+
+/* hv_vss debug control */
+static int hv_vss_log = 0;
+
+#define	hv_vss_log_error(...)	do {				\
+	if (hv_vss_log > 0)					\
+		log(LOG_ERR, "hv_vss: " __VA_ARGS__);		\
+} while (0)
+
+#define	hv_vss_log_info(...) do {				\
+	if (hv_vss_log > 1)					\
+		log(LOG_INFO, "hv_vss: " __VA_ARGS__);		\
+} while (0)
+
+static const struct vmbus_ic_desc vmbus_vss_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
+		    0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4,  0x40} },
+		.ic_desc = "Hyper-V VSS"
+	},
+	VMBUS_IC_DESC_END
+};
+
+static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"};
+
+/* character device prototypes */
+static d_open_t		hv_vss_dev_open;
+static d_close_t	hv_vss_dev_close;
+static d_poll_t		hv_vss_dev_daemon_poll;
+static d_ioctl_t	hv_vss_dev_daemon_ioctl;
+
+static d_open_t		hv_appvss_dev_open;
+static d_close_t	hv_appvss_dev_close;
+static d_poll_t		hv_appvss_dev_poll;
+static d_ioctl_t	hv_appvss_dev_ioctl;
+
+/* hv_vss character device structure */
+static struct cdevsw hv_vss_cdevsw =
+{
+	.d_version	= D_VERSION,
+	.d_open		= hv_vss_dev_open,
+	.d_close	= hv_vss_dev_close,
+	.d_poll		= hv_vss_dev_daemon_poll,
+	.d_ioctl	= hv_vss_dev_daemon_ioctl,
+	.d_name		= FS_VSS_DEV_NAME,
+};
+
+static struct cdevsw hv_appvss_cdevsw =
+{
+	.d_version	= D_VERSION,
+	.d_open		= hv_appvss_dev_open,
+	.d_close	= hv_appvss_dev_close,
+	.d_poll		= hv_appvss_dev_poll,
+	.d_ioctl	= hv_appvss_dev_ioctl,
+	.d_name		= APP_VSS_DEV_NAME,
+};
+
+struct hv_vss_sc;
+/*
+ * Global state to track cdev
+ */
+struct hv_vss_dev_sc {
+	/*
+	 * msg was transferred from host to notify queue, and
+	 * ack queue. Finally, it was recyled to free list.
+	 */
+	STAILQ_HEAD(, hv_vss_req_internal) 	to_notify_queue;
+	STAILQ_HEAD(, hv_vss_req_internal) 	to_ack_queue;
+	struct hv_vss_sc			*sc;
+	struct proc				*proc_task;
+	struct selinfo				hv_vss_selinfo;
+};
+/*
+ * Global state to track and synchronize the transaction requests from the host.
+ * The VSS allows user to register their function to do freeze/thaw for application.
+ * VSS kernel will notify both vss daemon and user application if it is registered.
+ * The implementation state transition is illustrated by:
+ * https://clovertrail.github.io/assets/vssdot.png
+ */
+typedef struct hv_vss_sc {
+	struct vmbus_ic_softc			util_sc;
+	device_t				dev;
+
+	struct task				task;
+
+	/*
+	 * mutex is used to protect access of list/queue,
+	 * callout in request is also used this mutex.
+	 */
+	struct mtx				pending_mutex;
+	/*
+	 * req_free_list contains all free items
+	 */
+	LIST_HEAD(, hv_vss_req_internal)	req_free_list;
+
+	/* Indicates if daemon registered with driver */
+	boolean_t				register_done;
+
+	boolean_t				app_register_done;
+
+	/* cdev for file system freeze/thaw */
+	struct cdev				*hv_vss_dev;
+	/* cdev for application freeze/thaw */
+	struct cdev				*hv_appvss_dev;
+
+	/* sc for app */
+	struct hv_vss_dev_sc			app_sc;
+	/* sc for deamon */
+	struct hv_vss_dev_sc			daemon_sc;
+} hv_vss_sc;
+
+typedef struct hv_vss_req_internal {
+	LIST_ENTRY(hv_vss_req_internal)		link;
+	STAILQ_ENTRY(hv_vss_req_internal)	slink;
+	struct hv_vss_req			vss_req;
+
+	/* Rcv buffer for communicating with the host*/
+	uint8_t					*rcv_buf;
+	/* Length of host message */
+	uint32_t				host_msg_len;
+	/* Host message id */
+	uint64_t				host_msg_id;
+
+	hv_vss_sc				*sc;
+
+	struct callout				callout;
+} hv_vss_req_internal;
+
+#define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id)		\
+	do {								\
+		STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) {		\
+			if (reqp->vss_req.opt_msg.msgid == id) {	\
+				STAILQ_REMOVE(queue,			\
+				    reqp, hv_vss_req_internal, link);	\
+				break;					\
+			}						\
+		}							\
+	} while (0)
+
+static bool
+hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc)
+{
+	return (!sc->register_done && sc->daemon_sc.proc_task);
+}
+
+/*
+ * Callback routine that gets called whenever there is a message from host
+ */
+static void
+hv_vss_callback(struct vmbus_channel *chan __unused, void *context)
+{
+	hv_vss_sc *sc = (hv_vss_sc*)context;
+	if (hv_vss_is_daemon_killed_after_launch(sc))
+		hv_vss_log_info("%s: daemon was killed!\n", __func__);
+	if (sc->register_done || sc->daemon_sc.proc_task) {
+		hv_vss_log_info("%s: Queuing work item\n", __func__);
+		if (hv_vss_is_daemon_killed_after_launch(sc))
+			hv_vss_log_info("%s: daemon was killed!\n", __func__);
+		taskqueue_enqueue(taskqueue_thread, &sc->task);
+	} else {
+		hv_vss_log_info("%s: daemon has never been registered\n", __func__);
+	}
+	hv_vss_log_info("%s: received msg from host\n", __func__);
+}
+/*
+ * Send the response back to the host.
+ */
+static void
+hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch,
+    uint32_t recvlen, uint64_t requestid, uint32_t error)
+{
+	struct vmbus_icmsg_hdr *hv_icmsg_hdrp;
+
+	hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf;
+
+	hv_icmsg_hdrp->ic_status = error;
+	hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
+
+	error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    rcv_buf, recvlen, requestid);
+	if (error)
+		hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n",
+		    __func__, error);
+}
+
+static void
+hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status)
+{
+	struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf;
+	hv_vss_sc *sc = reqp->sc;
+	if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) {
+		msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
+	}
+	hv_vss_log_info("%s, %s response %s to host\n", __func__,
+	    vss_opt_name[reqp->vss_req.opt_msg.opt],
+	    status == HV_S_OK ? "Success" : "Fail");
+	hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev),
+	    reqp->host_msg_len, reqp->host_msg_id, status);
+	/* recycle the request */
+	LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
+}
+
+static void
+hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status)
+{
+	mtx_lock(&reqp->sc->pending_mutex);
+	hv_vss_notify_host_result_locked(reqp, status);
+	mtx_unlock(&reqp->sc->pending_mutex);
+}
+
+static void
+hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp,
+    struct hv_vss_opt_msg *userdata)
+{
+	struct hv_vss_req *hv_vss_dev_buf;
+	hv_vss_dev_buf = &reqp->vss_req;
+	hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE;
+	switch (reqp->vss_req.msg.hdr.vss_hdr.operation) {
+	case VSS_OP_FREEZE:
+		hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE;
+		break;
+	case VSS_OP_THAW:
+		hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW;
+		break;
+	case VSS_OP_HOT_BACKUP:
+		hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK;
+		break;
+	}
+	*userdata = hv_vss_dev_buf->opt_msg;
+	hv_vss_log_info("%s, read data from user for "
+	    "%s (%ju) \n", __func__, vss_opt_name[userdata->opt],
+	    (uintmax_t)userdata->msgid);
+}
+
+/**
+ * Remove the request id from app notifiy or ack queue,
+ * and recyle the request by inserting it to free list.
+ *
+ * When app was notified but not yet sending ack, the request
+ * should locate in either notify queue or ack queue.
+ */
+static struct hv_vss_req_internal*
+hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id)
+{
+	struct hv_vss_req_internal *reqp, *tmp;
+	SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue,
+	    slink, tmp, req_id);
+	if (reqp == NULL)
+		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue,
+		    slink, tmp, req_id);
+	if (reqp == NULL)
+		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue,
+		    slink, tmp, req_id);
+	if (reqp == NULL)
+		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink,
+		    tmp, req_id);
+	return (reqp);
+}
+/**
+ * Actions for daemon who has been notified.
+ */
+static void
+hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
+{
+	struct hv_vss_req_internal *reqp;
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) {
+		reqp = STAILQ_FIRST(&dev_sc->to_notify_queue);
+		hv_vss_cp_vssreq_to_user(reqp, userdata);
+		STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink);
+		/* insert the msg to queue for write */
+		STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink);
+		userdata->status = VSS_SUCCESS;
+	} else {
+		/* Timeout occur, thus request was removed from queue. */
+		hv_vss_log_info("%s: notify queue is empty!\n", __func__);
+		userdata->status = VSS_FAIL;
+	}
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+}
+
+static void
+hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp)
+{
+	uint32_t opt = reqp->vss_req.opt_msg.opt;
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink);
+	hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__,
+	    vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid,
+	    &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon");
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+	selwakeup(&dev_sc->hv_vss_selinfo);
+}
+
+/**
+ * Actions for daemon who has acknowledged.
+ */
+static void
+hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
+{
+	struct hv_vss_req_internal	*reqp, *tmp;
+	uint64_t			req_id;
+	int				opt;
+	uint32_t			status;
+
+	opt = userdata->opt;
+	req_id = userdata->msgid;
+	status = userdata->status;
+	/* make sure the reserved fields are all zeros. */
+	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
+	    __offsetof(struct hv_vss_opt_msg, reserved));
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+	if (reqp == NULL) {
+		hv_vss_log_info("%s Timeout: fail to find daemon ack request\n",
+		    __func__);
+		userdata->status = VSS_FAIL;
+		return;
+	}
+	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
+	hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__,
+	    status, vss_opt_name[opt], (uintmax_t)req_id);
+	switch (opt) {
+	case HV_VSS_CHECK:
+	case HV_VSS_FREEZE:
+		callout_drain(&reqp->callout);
+		hv_vss_notify_host_result(reqp,
+		    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
+		break;
+	case HV_VSS_THAW:
+		if (dev_sc->sc->app_register_done) {
+			if (status == VSS_SUCCESS) {
+				hv_vss_notify(&dev_sc->sc->app_sc, reqp);
+			} else {
+				/* handle error */
+				callout_drain(&reqp->callout);
+				hv_vss_notify_host_result(reqp, HV_E_FAIL);
+			}
+		} else {
+			callout_drain(&reqp->callout);
+			hv_vss_notify_host_result(reqp,
+			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
+		}
+		break;
+	}
+}
+
+/**
+ * Actions for app who has acknowledged.
+ */
+static void
+hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
+{
+	struct hv_vss_req_internal	*reqp, *tmp;
+	uint64_t			req_id;
+	int				opt;
+	uint8_t				status;
+
+	opt = userdata->opt;
+	req_id = userdata->msgid;
+	status = userdata->status;
+	/* make sure the reserved fields are all zeros. */
+	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
+	    __offsetof(struct hv_vss_opt_msg, reserved));
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+	if (reqp == NULL) {
+		hv_vss_log_info("%s Timeout: fail to find app ack request\n",
+		    __func__);
+		userdata->status = VSS_FAIL;
+		return;
+	}
+	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
+	hv_vss_log_info("%s, get response %d from app for %s (%ju) \n",
+	    __func__, status, vss_opt_name[opt], (uintmax_t)req_id);
+	if (dev_sc->sc->register_done) {
+		switch (opt) {
+		case HV_VSS_CHECK:
+		case HV_VSS_FREEZE:
+			if (status == VSS_SUCCESS) {
+				hv_vss_notify(&dev_sc->sc->daemon_sc, reqp);
+			} else {
+				/* handle error */
+				callout_drain(&reqp->callout);
+				hv_vss_notify_host_result(reqp, HV_E_FAIL);
+			}
+			break;
+		case HV_VSS_THAW:
+			callout_drain(&reqp->callout);
+			hv_vss_notify_host_result(reqp,
+			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
+			break;
+		}
+	} else {
+		hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__);
+	}
+}
+
+static int
+hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	struct proc     *td_proc;
+	td_proc = td->td_proc;
+
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
+	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
+
+	if (dev_sc->sc->register_done)
+		return (EBUSY);
+
+	dev_sc->sc->register_done = true;
+	hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc);
+
+	dev_sc->proc_task = curproc;
+	return (0);
+}
+
+static int
+hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
+				 struct thread *td)
+{
+	struct proc     *td_proc;
+	td_proc = td->td_proc;
+
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	hv_vss_log_info("%s: %s closes device \"%s\"\n",
+	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
+	dev_sc->sc->register_done = false;
+	return (0);
+}
+
+static int
+hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+    struct thread *td)
+{
+	struct proc			*td_proc;
+	struct hv_vss_dev_sc		*sc;
+
+	td_proc = td->td_proc;
+	sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
+
+	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
+	switch(cmd) {
+	case IOCHVVSSREAD:
+		hv_vss_notified(sc, userdata);
+		break;
+	case IOCHVVSSWRITE:
+		hv_vss_daemon_acked(sc, userdata);
+		break;
+	}
+	return (0);
+}
+
+/*
+ * hv_vss_daemon poll invokes this function to check if data is available
+ * for daemon to read.
+ */
+static int
+hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
+{
+	int revent = 0;
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	/**
+	 * if there is data ready, inform daemon's poll
+	 */
+	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
+		revent = POLLIN;
+	if (revent == 0)
+		selrecord(td, &dev_sc->hv_vss_selinfo);
+	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+	return (revent);
+}
+
+static int
+hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	struct proc     *td_proc;
+	td_proc = td->td_proc;
+
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
+	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
+
+	if (dev_sc->sc->app_register_done)
+		return (EBUSY);
+
+	dev_sc->sc->app_register_done = true;
+	dev_sc->proc_task = curproc;
+	return (0);
+}
+
+static int
+hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
+				 struct thread *td)
+{
+	struct proc     *td_proc;
+	td_proc = td->td_proc;
+
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	hv_vss_log_info("%s: %s closes device \"%s\".\n",
+	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
+	dev_sc->sc->app_register_done = false;
+	return (0);
+}
+
+static int
+hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+    struct thread *td)
+{
+	struct proc			*td_proc;
+	struct hv_vss_dev_sc		*dev_sc;
+
+	td_proc = td->td_proc;
+	dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
+
+	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
+	switch(cmd) {
+	case IOCHVVSSREAD:
+		hv_vss_notified(dev_sc, userdata);
+		break;
+	case IOCHVVSSWRITE:
+		hv_vss_app_acked(dev_sc, userdata);
+		break;
+	}
+	return (0);
+}
+
+/*
+ * hv_vss_daemon poll invokes this function to check if data is available
+ * for daemon to read.
+ */
+static int
+hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td)
+{
+	int revent = 0;
+	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
+
+	mtx_lock(&dev_sc->sc->pending_mutex);
+	/**
+	 * if there is data ready, inform daemon's poll
+	 */
+	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
+		revent = POLLIN;
+	if (revent == 0)
+		selrecord(td, &dev_sc->hv_vss_selinfo);
+	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
+	mtx_unlock(&dev_sc->sc->pending_mutex);
+	return (revent);
+}
+
+static void
+hv_vss_timeout(void *arg)
+{
+	hv_vss_req_internal *reqp = arg;
+	hv_vss_req_internal *request;
+	hv_vss_sc* sc = reqp->sc;
+	uint64_t req_id = reqp->vss_req.opt_msg.msgid;
+	/* This thread is locked */
+	KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!"));
+	request = hv_vss_drain_req_queue_locked(sc, req_id);
+	KASSERT(request != NULL, ("timeout but fail to find request"));
+	hv_vss_notify_host_result_locked(reqp, HV_E_FAIL);
+}
+
+/*
+ * This routine is called whenever a message is received from the host
+ */
+static void
+hv_vss_init_req(hv_vss_req_internal *reqp,
+    uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc)
+{
+	struct timespec vm_ts;
+	struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
+
+	memset(reqp, 0, __offsetof(hv_vss_req_internal, callout));
+	reqp->host_msg_len = recvlen;
+	reqp->host_msg_id = requestid;
+	reqp->rcv_buf = vss_buf;
+	reqp->sc = sc;
+	memcpy(&reqp->vss_req.msg,
+	    (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg));
+	/* set the opt for users */
+	switch (msg->hdr.vss_hdr.operation) {
+	case VSS_OP_FREEZE:
+		reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE;
+		break;
+	case VSS_OP_THAW:
+		reqp->vss_req.opt_msg.opt = HV_VSS_THAW;
+		break;
+	case VSS_OP_HOT_BACKUP:
+		reqp->vss_req.opt_msg.opt = HV_VSS_CHECK;
+		break;
+	}
+	/* Use a timestamp as msg request ID */
+	nanotime(&vm_ts);
+	reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec;
+}
+
+static hv_vss_req_internal*
+hv_vss_get_new_req_locked(hv_vss_sc *sc)
+{
+	hv_vss_req_internal *reqp;
+	if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) ||
+	    !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) ||
+	    !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) ||
+	    !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
+		/*
+		 * There is request coming from host before
+		 * finishing previous requests
+		 */
+		hv_vss_log_info("%s: Warning: there is new request "
+		    "coming before finishing previous requests\n", __func__);
+		return (NULL);
+	}
+	if (LIST_EMPTY(&sc->req_free_list)) {
+		/* TODO Error: no buffer */
+		hv_vss_log_info("Error: No buffer\n");
+		return (NULL);
+	}
+	reqp = LIST_FIRST(&sc->req_free_list);
+	LIST_REMOVE(reqp, link);
+	return (reqp);
+}
+
+static void
+hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt)
+{
+	hv_vss_sc *sc = reqp->sc;
+	/*
+	 * Freeze/Check notification sequence: kernel -> app -> daemon(fs)
+	 * Thaw notification sequence:         kernel -> daemon(fs) -> app
+	 *
+	 * We should wake up the daemon, in case it's doing poll().
+	 * The response should be received after 5s, otherwise, trigger timeout.
+	 */
+	switch (opt) {
+	case VSS_OP_FREEZE:
+	case VSS_OP_HOT_BACKUP:
+		if (sc->app_register_done)
+			hv_vss_notify(&sc->app_sc, reqp);
+		else
+			hv_vss_notify(&sc->daemon_sc, reqp);
+		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
+		    hv_vss_timeout, reqp);
+		break;
+	case VSS_OP_THAW:
+		hv_vss_notify(&sc->daemon_sc, reqp);
+		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
+		    hv_vss_timeout, reqp);
+		break;
+	}
+}
+
+/*
+ * Function to read the vss request buffer from host
+ * and interact with daemon
+ */
+static void
+hv_vss_process_request(void *context, int pending __unused)
+{
+	uint8_t *vss_buf;
+	struct vmbus_channel *channel;
+	uint32_t recvlen = 0;
+	uint64_t requestid;
+	struct vmbus_icmsg_hdr *icmsghdrp;
+	int ret = 0;
+	hv_vss_sc *sc;
+	hv_vss_req_internal *reqp;
+
+	hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__);
+
+	sc = (hv_vss_sc*)context;
+	vss_buf = sc->util_sc.ic_buf;
+	channel = vmbus_get_channel(sc->dev);
+
+	recvlen = sc->util_sc.ic_buflen;
+	ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
+	KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
+	/* XXX check recvlen to make sure that it contains enough data */
+
+	while ((ret == 0) && (recvlen > 0)) {
+		icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf;
+
+		if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) {
+			ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf,
+			    &recvlen, VSS_FWVER, VSS_MSGVER);
+			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
+			    recvlen, requestid, ret);
+			hv_vss_log_info("%s: version negotiated\n", __func__);
+		} else if (!hv_vss_is_daemon_killed_after_launch(sc)) {
+			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
+			switch(msg->hdr.vss_hdr.operation) {
+			case VSS_OP_FREEZE:
+			case VSS_OP_THAW:
+			case VSS_OP_HOT_BACKUP:
+				mtx_lock(&sc->pending_mutex);
+				reqp = hv_vss_get_new_req_locked(sc);
+				mtx_unlock(&sc->pending_mutex);
+				if (reqp == NULL) {
+					/* ignore this request from host */
+					break;
+				}
+				hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc);
+				hv_vss_log_info("%s: receive %s (%ju) from host\n",
+				    __func__,
+				    vss_opt_name[reqp->vss_req.opt_msg.opt],
+				    (uintmax_t)reqp->vss_req.opt_msg.msgid);
+				hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation);
+				break;
+			case VSS_OP_GET_DM_INFO:
+				hv_vss_log_info("%s: receive GET_DM_INFO from host\n",
+				    __func__);
+				msg->body.dm_info.flags = 0;
+				hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
+				    recvlen, requestid, HV_S_OK);
+				break;
+			default:
+				device_printf(sc->dev, "Unknown opt from host: %d\n",
+				    msg->hdr.vss_hdr.operation);
+				break;
+			}
+		} else {
+			/* daemon was killed for some reason after it was launched */
+			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
+			switch(msg->hdr.vss_hdr.operation) {
+			case VSS_OP_FREEZE:
+				hv_vss_log_info("%s: response fail for FREEZE\n",
+				    __func__);
+				break;
+			case VSS_OP_THAW:
+				hv_vss_log_info("%s: response fail for THAW\n",
+				    __func__);
+				break;
+			case VSS_OP_HOT_BACKUP:
+				hv_vss_log_info("%s: response fail for HOT_BACKUP\n",
+				    __func__);
+				msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
+				break;
+			case VSS_OP_GET_DM_INFO:
+				hv_vss_log_info("%s: response fail for GET_DM_INFO\n",
+				    __func__);
+				msg->body.dm_info.flags = 0;
+				break;
+			default:
+				device_printf(sc->dev, "Unknown opt from host: %d\n",
+				    msg->hdr.vss_hdr.operation);
+				break;
+			}
+			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
+			    recvlen, requestid, HV_E_FAIL);
+		}
+		/*
+		 * Try reading next buffer
+		 */
+		recvlen = sc->util_sc.ic_buflen;
+		ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
+		KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
+		/* XXX check recvlen to make sure that it contains enough data */
+
+		hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
+		    __func__, context, ret, recvlen);
+	}
+}
+
+static int
+hv_vss_probe(device_t dev)
+{
+	return (vmbus_ic_probe(dev, vmbus_vss_descs));
+}
+
+static int
+hv_vss_init_send_receive_queue(device_t dev)
+{
+	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
+	int i;
+	const int max_list = 4; /* It is big enough for the list */
+	struct hv_vss_req_internal* reqp;
+
+	LIST_INIT(&sc->req_free_list);
+	STAILQ_INIT(&sc->daemon_sc.to_notify_queue);
+	STAILQ_INIT(&sc->daemon_sc.to_ack_queue);
+	STAILQ_INIT(&sc->app_sc.to_notify_queue);
+	STAILQ_INIT(&sc->app_sc.to_ack_queue);
+
+	for (i = 0; i < max_list; i++) {
+		reqp = malloc(sizeof(struct hv_vss_req_internal),
+		    M_DEVBUF, M_WAITOK|M_ZERO);
+		LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
+		callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0);
+	}
+	return (0);
+}
+
+static int
+hv_vss_destroy_send_receive_queue(device_t dev)
+{
+	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
+	hv_vss_req_internal* reqp;
+
+	while (!LIST_EMPTY(&sc->req_free_list)) {
+		reqp = LIST_FIRST(&sc->req_free_list);
+		LIST_REMOVE(reqp, link);
+		free(reqp, M_DEVBUF);
+	}
+
+	while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) {
+		reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue);
+		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink);
+		free(reqp, M_DEVBUF);
+	}
+
+	while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) {
+		reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue);
+		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink);
+		free(reqp, M_DEVBUF);
+	}
+
+	while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) {
+		reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue);
+		STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink);
+		free(reqp, M_DEVBUF);
+	}
+
+	while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
+		reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue);
+		STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink);
+		free(reqp, M_DEVBUF);
+	}
+	return (0);
+}
+
+static int
+hv_vss_attach(device_t dev)
+{
+	int error;
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+
+	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
+
+	sc->dev = dev;
+	mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF);
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log",
+	    CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level");
+
+	TASK_INIT(&sc->task, 0, hv_vss_process_request, sc);
+	hv_vss_init_send_receive_queue(dev);
+	/* create character device for file system freeze/thaw */
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
+		    &sc->hv_vss_dev,
+		    &hv_vss_cdevsw,
+		    0,
+		    UID_ROOT,
+		    GID_WHEEL,
+		    0640,
+		    FS_VSS_DEV_NAME);
+
+	if (error != 0) {
+		hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error);
+		return (error);
+	}
+	sc->hv_vss_dev->si_drv1 = &sc->daemon_sc;
+	sc->daemon_sc.sc = sc;
+	/* create character device for application freeze/thaw */
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
+		    &sc->hv_appvss_dev,
+		    &hv_appvss_cdevsw,
+		    0,
+		    UID_ROOT,
+		    GID_WHEEL,
+		    0640,
+		    APP_VSS_DEV_NAME);
+
+	if (error != 0) {
+		hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error);
+		return (error);
+	}
+	sc->hv_appvss_dev->si_drv1 = &sc->app_sc;
+	sc->app_sc.sc = sc;
+
+	return (vmbus_ic_attach(dev, hv_vss_callback));
+}
+
+static int
+hv_vss_detach(device_t dev)
+{
+	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
+	mtx_destroy(&sc->pending_mutex);
+	if (sc->daemon_sc.proc_task != NULL) {
+		PROC_LOCK(sc->daemon_sc.proc_task);
+		kern_psignal(sc->daemon_sc.proc_task, SIGKILL);
+		PROC_UNLOCK(sc->daemon_sc.proc_task);
+	}
+	if (sc->app_sc.proc_task != NULL) {
+		PROC_LOCK(sc->app_sc.proc_task);
+		kern_psignal(sc->app_sc.proc_task, SIGKILL);
+		PROC_UNLOCK(sc->app_sc.proc_task);
+	}
+	hv_vss_destroy_send_receive_queue(dev);
+	destroy_dev(sc->hv_vss_dev);
+	destroy_dev(sc->hv_appvss_dev);
+	return (vmbus_ic_detach(dev));
+}
+
+static device_method_t vss_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_vss_probe),
+	DEVMETHOD(device_attach, hv_vss_attach),
+	DEVMETHOD(device_detach, hv_vss_detach),
+	{ 0, 0 }
+};
+
+static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)};
+
+static devclass_t vss_devclass;
+
+DRIVER_MODULE(hv_vss, vmbus, vss_driver, vss_devclass, NULL, NULL);
+MODULE_VERSION(hv_vss, 1);
+MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1);


Property changes on: trunk/sys/dev/hyperv/utilities/hv_snapshot.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/hv_snapshot.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/hv_snapshot.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/hv_snapshot.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,57 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/utilities/hv_snapshot.h 310735 2016-12-29 05:32:34Z sephe $
+ */
+
+#ifndef _VSS_H
+#define _VSS_H
+#include <sys/ioccom.h>
+#define FS_VSS_DEV_NAME		"hv_fsvss_dev"
+#define APP_VSS_DEV_NAME	"hv_appvss_dev"
+
+#define VSS_DEV(VSS)		"/dev/"VSS
+
+#define VSS_SUCCESS		0x00000000
+#define VSS_FAIL		0x00000001
+
+enum hv_vss_op_t {
+	HV_VSS_NONE = 0,
+	HV_VSS_CHECK,
+	HV_VSS_FREEZE,
+	HV_VSS_THAW,
+	HV_VSS_COUNT
+};
+
+struct hv_vss_opt_msg {
+	uint32_t	opt;		/* operation */
+	uint32_t	status;		/* 0 for success, 1 for error */
+	uint64_t	msgid;		/* an ID used to identify the transaction */
+	uint8_t		reserved[48];	/* reserved values are all zeroes */
+};
+#define IOCHVVSSREAD		_IOR('v', 2, struct hv_vss_opt_msg)
+#define IOCHVVSSWRITE		_IOW('v', 3, struct hv_vss_opt_msg)
+#endif


Property changes on: trunk/sys/dev/hyperv/utilities/hv_snapshot.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/hv_utilreg.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/hv_utilreg.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/hv_utilreg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,87 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/utilities/hv_utilreg.h 308516 2016-11-11 08:04:24Z sephe $
+ */
+
+#ifndef _HV_UTILREG_H_
+#define _HV_UTILREG_H_
+
+/*
+ * Some Hyper-V status codes.
+ */
+#define HV_S_OK				0x00000000
+#define HV_E_FAIL			0x80004005
+#define HV_S_CONT			0x80070103
+#define HV_ERROR_NOT_SUPPORTED		0x80070032
+#define HV_ERROR_MACHINE_LOCKED		0x800704F7
+#define HV_ERROR_DEVICE_NOT_CONNECTED	0x8007048F
+#define HV_INVALIDARG			0x80070057
+#define HV_GUID_NOTFOUND		0x80041002
+
+/*
+ * Common defines for Hyper-V ICs
+ */
+#define HV_ICMSGTYPE_NEGOTIATE		0
+#define HV_ICMSGTYPE_HEARTBEAT		1
+#define HV_ICMSGTYPE_KVPEXCHANGE	2
+#define HV_ICMSGTYPE_SHUTDOWN		3
+#define HV_ICMSGTYPE_TIMESYNC		4
+#define HV_ICMSGTYPE_VSS		5
+
+#define HV_ICMSGHDRFLAG_TRANSACTION	1
+#define HV_ICMSGHDRFLAG_REQUEST		2
+#define HV_ICMSGHDRFLAG_RESPONSE	4
+
+typedef struct hv_vmbus_pipe_hdr {
+	uint32_t flags;
+	uint32_t msgsize;
+} __packed hv_vmbus_pipe_hdr;
+
+typedef struct hv_vmbus_ic_version {
+	uint16_t major;
+	uint16_t minor;
+} __packed hv_vmbus_ic_version;
+
+typedef struct hv_vmbus_icmsg_hdr {
+	hv_vmbus_ic_version	icverframe;
+	uint16_t		icmsgtype;
+	hv_vmbus_ic_version	icvermsg;
+	uint16_t		icmsgsize;
+	uint32_t		status;
+	uint8_t			ictransaction_id;
+	uint8_t			icflags;
+	uint8_t			reserved[2];
+} __packed hv_vmbus_icmsg_hdr;
+
+typedef struct hv_vmbus_icmsg_negotiate {
+	uint16_t		icframe_vercnt;
+	uint16_t		icmsg_vercnt;
+	uint32_t		reserved;
+	hv_vmbus_ic_version	icversion_data[1]; /* any size array */
+} __packed hv_vmbus_icmsg_negotiate;
+
+#endif	/* !_HV_UTILREG_H_ */


Property changes on: trunk/sys/dev/hyperv/utilities/hv_utilreg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/unicode.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/unicode.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/unicode.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,202 @@
+/* $MidnightBSD$ */
+/* $NetBSD: unicode.h,v 1.1.1.1 2007/03/06 00:10:39 dillo Exp $ */
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Dieter Baron.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/utilities/unicode.h 272322 2014-09-30 17:54:57Z delphij $
+ */
+
+#include <sys/types.h>
+
+#define UNICODE_DECOMPOSE		0x01
+#define UNICODE_PRECOMPOSE		0x02
+#define UNICODE_UTF8_LATIN1_FALLBACK	0x03
+
+size_t utf8_to_utf16(uint16_t *, size_t, const char *, size_t, int, int *);
+size_t utf16_to_utf8(char *, size_t, const uint16_t *, size_t, int, int *);
+
+size_t
+utf8_to_utf16(uint16_t *dst, size_t dst_len,
+	      const char *src, size_t src_len,
+	      int flags, int *errp)
+{
+    const unsigned char *s;
+    size_t spos, dpos;
+    int error;
+    uint16_t c;
+
+#define IS_CONT(c)	(((c)&0xc0) == 0x80)
+
+    error = 0;
+    s = (const unsigned char *)src;
+    spos = dpos = 0;
+    while (spos<src_len) {
+	if (s[spos] < 0x80)
+	    c = s[spos++];
+	else if ((flags & UNICODE_UTF8_LATIN1_FALLBACK)
+		 && (spos >= src_len || !IS_CONT(s[spos+1]))
+		 && s[spos]>=0xa0) {
+	    /* not valid UTF-8, assume ISO 8859-1 */
+	    c = s[spos++];
+	}
+	else if (s[spos] < 0xc0 || s[spos] >= 0xf5) {
+	    /* continuation byte without lead byte
+	       or lead byte for codepoint above 0x10ffff */
+	    error++;
+	    spos++;
+	    continue;
+	}
+	else if (s[spos] < 0xe0) {
+	    if (spos >= src_len || !IS_CONT(s[spos+1])) {
+		spos++;
+		error++;
+		continue;
+	    }
+	    c = ((s[spos] & 0x3f) << 6) | (s[spos+1] & 0x3f);
+	    spos += 2;
+	    if (c < 0x80) {
+		/* overlong encoding */
+		error++;
+		continue;
+	    }
+	}
+	else if (s[spos] < 0xf0) {
+	    if (spos >= src_len-2
+		|| !IS_CONT(s[spos+1]) || !IS_CONT(s[spos+2])) {
+		spos++;
+		error++;
+		continue;
+	    }
+	    c = ((s[spos] & 0x0f) << 12) | ((s[spos+1] & 0x3f) << 6)
+		| (s[spos+2] & 0x3f);
+	    spos += 3;
+	    if (c < 0x800 || (c & 0xdf00) == 0xd800 ) {
+		/* overlong encoding or encoded surrogate */
+		error++;
+		continue;
+	    }
+	}
+	else {
+	    uint32_t cc;
+	    /* UTF-16 surrogate pair */
+
+	    if (spos >= src_len-3 || !IS_CONT(s[spos+1])
+		|| !IS_CONT(s[spos+2]) || !IS_CONT(s[spos+3])) {
+		spos++;
+		error++;
+		
+		continue;
+	    }
+	    cc = ((s[spos] & 0x03) << 18) | ((s[spos+1] & 0x3f) << 12)
+		 | ((s[spos+2] & 0x3f) << 6) | (s[spos+3] & 0x3f);
+	    spos += 4;
+	    if (cc < 0x10000) {
+		/* overlong encoding */
+		error++;
+		continue;
+	    }
+	    if (dst && dpos < dst_len)
+		dst[dpos] = (0xd800 | ((cc-0x10000)>>10));
+	    dpos++;
+	    c = 0xdc00 | ((cc-0x10000) & 0x3ffff);
+	}
+
+	if (dst && dpos < dst_len)
+	    dst[dpos] = c;
+	dpos++;
+    }
+    
+    if (errp)
+	*errp = error;
+
+    return dpos;
+
+#undef IS_CONT
+}
+
+
+size_t
+utf16_to_utf8(char *dst, size_t dst_len,
+	      const uint16_t *src, size_t src_len,
+	      int flags, int *errp)
+{
+    uint16_t spos, dpos;
+    int error;
+
+#define CHECK_LENGTH(l)	(dpos > dst_len-(l) ? dst=NULL : NULL)
+#define ADD_BYTE(b)	(dst ? dst[dpos] = (b) : 0, dpos++)
+
+    error = 0;
+    dpos = 0;
+    for (spos=0; spos<src_len; spos++) {
+	if (src[spos] < 0x80) {
+	    CHECK_LENGTH(1);
+	    ADD_BYTE(src[spos]);
+	}
+	else if (src[spos] < 0x800) {
+	    CHECK_LENGTH(2);
+	    ADD_BYTE(0xc0 | (src[spos]>>6));
+	    ADD_BYTE(0x80 | (src[spos] & 0x3f));
+	}
+	else if ((src[spos] & 0xdc00) == 0xd800) {
+	    uint32_t c;
+	    /* first surrogate */
+	    if (spos == src_len - 1 || (src[spos] & 0xdc00) != 0xdc00) {
+		/* no second surrogate present */
+		error++;
+		continue;
+	    }
+	    spos++;
+	    CHECK_LENGTH(4);
+	    c = (((src[spos]&0x3ff) << 10) | (src[spos+1]&0x3ff)) + 0x10000;
+	    ADD_BYTE(0xf0 | (c>>18));
+	    ADD_BYTE(0x80 | ((c>>12) & 0x3f));
+	    ADD_BYTE(0x80 | ((c>>6) & 0x3f));
+	    ADD_BYTE(0x80 | (c & 0x3f));
+	}
+	else if ((src[spos] & 0xdc00) == 0xdc00) {
+	    /* second surrogate without preceding first surrogate */
+	    error++;
+	}
+	else {
+	    CHECK_LENGTH(3);
+	    ADD_BYTE(0xe0 | src[spos]>>12);
+	    ADD_BYTE(0x80 | ((src[spos]>>6) & 0x3f));
+	    ADD_BYTE(0x80 | (src[spos] & 0x3f));
+	}
+    }
+
+    if (errp)
+	*errp = error;
+
+    return dpos;
+
+#undef ADD_BYTE
+#undef CHECK_LENGTH
+}


Property changes on: trunk/sys/dev/hyperv/utilities/unicode.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_heartbeat.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_heartbeat.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_heartbeat.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,153 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/vmbus_heartbeat.c 311254 2017-01-04 05:24:16Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#define VMBUS_HEARTBEAT_FWVER_MAJOR	3
+#define VMBUS_HEARTBEAT_FWVER		\
+	VMBUS_IC_VERSION(VMBUS_HEARTBEAT_FWVER_MAJOR, 0)
+
+#define VMBUS_HEARTBEAT_MSGVER_MAJOR	3
+#define VMBUS_HEARTBEAT_MSGVER		\
+	VMBUS_IC_VERSION(VMBUS_HEARTBEAT_MSGVER_MAJOR, 0)
+
+static int			vmbus_heartbeat_probe(device_t);
+static int			vmbus_heartbeat_attach(device_t);
+
+static const struct vmbus_ic_desc vmbus_heartbeat_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
+		    0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d} },
+		.ic_desc = "Hyper-V Heartbeat"
+	},
+	VMBUS_IC_DESC_END
+};
+
+static device_method_t vmbus_heartbeat_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vmbus_heartbeat_probe),
+	DEVMETHOD(device_attach,	vmbus_heartbeat_attach),
+	DEVMETHOD(device_detach,	vmbus_ic_detach),
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_heartbeat_driver = {
+	"hvheartbeat",
+	vmbus_heartbeat_methods,
+	sizeof(struct vmbus_ic_softc)
+};
+
+static devclass_t vmbus_heartbeat_devclass;
+
+DRIVER_MODULE(hv_heartbeat, vmbus, vmbus_heartbeat_driver,
+    vmbus_heartbeat_devclass, NULL, NULL);
+MODULE_VERSION(hv_heartbeat, 1);
+MODULE_DEPEND(hv_heartbeat, vmbus, 1, 1, 1);
+
+static void
+vmbus_heartbeat_cb(struct vmbus_channel *chan, void *xsc)
+{
+	struct vmbus_ic_softc *sc = xsc;
+	struct vmbus_icmsg_hdr *hdr;
+	int dlen, error;
+	uint64_t xactid;
+	void *data;
+
+	/*
+	 * Receive request.
+	 */
+	data = sc->ic_buf;
+	dlen = sc->ic_buflen;
+	error = vmbus_chan_recv(chan, data, &dlen, &xactid);
+	KASSERT(error != ENOBUFS, ("icbuf is not large enough"));
+	if (error)
+		return;
+
+	if (dlen < sizeof(*hdr)) {
+		device_printf(sc->ic_dev, "invalid data len %d\n", dlen);
+		return;
+	}
+	hdr = data;
+
+	/*
+	 * Update request, which will be echoed back as response.
+	 */
+	switch (hdr->ic_type) {
+	case VMBUS_ICMSG_TYPE_NEGOTIATE:
+		error = vmbus_ic_negomsg(sc, data, &dlen,
+		    VMBUS_HEARTBEAT_FWVER, VMBUS_HEARTBEAT_MSGVER);
+		if (error)
+			return;
+		break;
+
+	case VMBUS_ICMSG_TYPE_HEARTBEAT:
+		/* Only ic_seq is a must */
+		if (dlen < VMBUS_ICMSG_HEARTBEAT_SIZE_MIN) {
+			device_printf(sc->ic_dev, "invalid heartbeat len %d\n",
+			    dlen);
+			return;
+		}
+		((struct vmbus_icmsg_heartbeat *)data)->ic_seq++;
+		break;
+
+	default:
+		device_printf(sc->ic_dev, "got 0x%08x icmsg\n", hdr->ic_type);
+		break;
+	}
+
+	/*
+	 * Send response by echoing the request back.
+	 */
+	vmbus_ic_sendresp(sc, chan, data, dlen, xactid);
+}
+
+static int
+vmbus_heartbeat_probe(device_t dev)
+{
+
+	return (vmbus_ic_probe(dev, vmbus_heartbeat_descs));
+}
+
+static int
+vmbus_heartbeat_attach(device_t dev)
+{
+
+	return (vmbus_ic_attach(dev, vmbus_heartbeat_cb));
+}


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_heartbeat.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_ic.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_ic.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_ic.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,300 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#include "vmbus_if.h"
+
+#define VMBUS_IC_BRSIZE		(4 * PAGE_SIZE)
+
+#define VMBUS_IC_VERCNT		2
+#define VMBUS_IC_NEGOSZ		\
+	__offsetof(struct vmbus_icmsg_negotiate, ic_ver[VMBUS_IC_VERCNT])
+CTASSERT(VMBUS_IC_NEGOSZ < VMBUS_IC_BRSIZE);
+
+static int	vmbus_ic_fwver_sysctl(SYSCTL_HANDLER_ARGS);
+static int	vmbus_ic_msgver_sysctl(SYSCTL_HANDLER_ARGS);
+
+int
+vmbus_ic_negomsg(struct vmbus_ic_softc *sc, void *data, int *dlen0,
+    uint32_t fw_ver, uint32_t msg_ver)
+{
+	struct vmbus_icmsg_negotiate *nego;
+	int i, cnt, dlen = *dlen0, error;
+	uint32_t sel_fw_ver, sel_msg_ver;
+	bool has_fw_ver, has_msg_ver;
+
+	/*
+	 * Preliminary message verification.
+	 */
+	if (dlen < sizeof(*nego)) {
+		device_printf(sc->ic_dev, "truncated ic negotiate, len %d\n",
+		    dlen);
+		return (EINVAL);
+	}
+	nego = data;
+
+	if (nego->ic_fwver_cnt == 0) {
+		device_printf(sc->ic_dev, "ic negotiate does not contain "
+		    "framework version %u\n", nego->ic_fwver_cnt);
+		return (EINVAL);
+	}
+	if (nego->ic_msgver_cnt == 0) {
+		device_printf(sc->ic_dev, "ic negotiate does not contain "
+		    "message version %u\n", nego->ic_msgver_cnt);
+		return (EINVAL);
+	}
+
+	cnt = nego->ic_fwver_cnt + nego->ic_msgver_cnt;
+	if (dlen < __offsetof(struct vmbus_icmsg_negotiate, ic_ver[cnt])) {
+		device_printf(sc->ic_dev, "ic negotiate does not contain "
+		    "versions %d\n", dlen);
+		return (EINVAL);
+	}
+
+	error = EOPNOTSUPP;
+
+	/*
+	 * Find the best match framework version.
+	 */
+	has_fw_ver = false;
+	for (i = 0; i < nego->ic_fwver_cnt; ++i) {
+		if (VMBUS_ICVER_LE(nego->ic_ver[i], fw_ver)) {
+			if (!has_fw_ver) {
+				sel_fw_ver = nego->ic_ver[i];
+				has_fw_ver = true;
+			} else if (VMBUS_ICVER_GT(nego->ic_ver[i],
+			    sel_fw_ver)) {
+				sel_fw_ver = nego->ic_ver[i];
+			}
+		}
+	}
+	if (!has_fw_ver) {
+		device_printf(sc->ic_dev, "failed to select framework "
+		    "version\n");
+		goto done;
+	}
+
+	/*
+	 * Fine the best match message version.
+	 */
+	has_msg_ver = false;
+	for (i = nego->ic_fwver_cnt;
+	    i < nego->ic_fwver_cnt + nego->ic_msgver_cnt; ++i) {
+		if (VMBUS_ICVER_LE(nego->ic_ver[i], msg_ver)) {
+			if (!has_msg_ver) {
+				sel_msg_ver = nego->ic_ver[i];
+				has_msg_ver = true;
+			} else if (VMBUS_ICVER_GT(nego->ic_ver[i],
+			    sel_msg_ver)) {
+				sel_msg_ver = nego->ic_ver[i];
+			}
+		}
+	}
+	if (!has_msg_ver) {
+		device_printf(sc->ic_dev, "failed to select message "
+		    "version\n");
+		goto done;
+	}
+
+	error = 0;
+done:
+	if (bootverbose || !has_fw_ver || !has_msg_ver) {
+		if (has_fw_ver) {
+			device_printf(sc->ic_dev, "sel framework version: "
+			    "%u.%u\n",
+			    VMBUS_ICVER_MAJOR(sel_fw_ver),
+			    VMBUS_ICVER_MINOR(sel_fw_ver));
+		}
+		for (i = 0; i < nego->ic_fwver_cnt; i++) {
+			device_printf(sc->ic_dev, "supp framework version: "
+			    "%u.%u\n",
+			    VMBUS_ICVER_MAJOR(nego->ic_ver[i]),
+			    VMBUS_ICVER_MINOR(nego->ic_ver[i]));
+		}
+
+		if (has_msg_ver) {
+			device_printf(sc->ic_dev, "sel message version: "
+			    "%u.%u\n",
+			    VMBUS_ICVER_MAJOR(sel_msg_ver),
+			    VMBUS_ICVER_MINOR(sel_msg_ver));
+		}
+		for (i = nego->ic_fwver_cnt;
+		    i < nego->ic_fwver_cnt + nego->ic_msgver_cnt; i++) {
+			device_printf(sc->ic_dev, "supp message version: "
+			    "%u.%u\n",
+			    VMBUS_ICVER_MAJOR(nego->ic_ver[i]),
+			    VMBUS_ICVER_MINOR(nego->ic_ver[i]));
+		}
+	}
+	if (error)
+		return (error);
+
+	/* Record the selected versions. */
+	sc->ic_fwver = sel_fw_ver;
+	sc->ic_msgver = sel_msg_ver;
+
+	/* One framework version. */
+	nego->ic_fwver_cnt = 1;
+	nego->ic_ver[0] = sel_fw_ver;
+
+	/* One message version. */
+	nego->ic_msgver_cnt = 1;
+	nego->ic_ver[1] = sel_msg_ver;
+
+	/* Update data size. */
+	nego->ic_hdr.ic_dsize = VMBUS_IC_NEGOSZ -
+	    sizeof(struct vmbus_icmsg_hdr);
+
+	/* Update total size, if necessary. */
+	if (dlen < VMBUS_IC_NEGOSZ)
+		*dlen0 = VMBUS_IC_NEGOSZ;
+
+	return (0);
+}
+
+int
+vmbus_ic_probe(device_t dev, const struct vmbus_ic_desc descs[])
+{
+	device_t bus = device_get_parent(dev);
+	const struct vmbus_ic_desc *d;
+
+	if (resource_disabled(device_get_name(dev), 0))
+		return (ENXIO);
+
+	for (d = descs; d->ic_desc != NULL; ++d) {
+		if (VMBUS_PROBE_GUID(bus, dev, &d->ic_guid) == 0) {
+			device_set_desc(dev, d->ic_desc);
+			return (BUS_PROBE_DEFAULT);
+		}
+	}
+	return (ENXIO);
+}
+
+int
+vmbus_ic_attach(device_t dev, vmbus_chan_callback_t cb)
+{
+	struct vmbus_ic_softc *sc = device_get_softc(dev);
+	struct vmbus_channel *chan = vmbus_get_channel(dev);
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	int error;
+
+	sc->ic_dev = dev;
+	sc->ic_buflen = VMBUS_IC_BRSIZE;
+	sc->ic_buf = malloc(VMBUS_IC_BRSIZE, M_DEVBUF, M_WAITOK | M_ZERO);
+
+	/*
+	 * These services are not performance critical and do not need
+	 * batched reading. Furthermore, some services such as KVP can
+	 * only handle one message from the host at a time.
+	 * Turn off batched reading for all util drivers before we open the
+	 * channel.
+	 */
+	vmbus_chan_set_readbatch(chan, false);
+
+	error = vmbus_chan_open(chan, VMBUS_IC_BRSIZE, VMBUS_IC_BRSIZE, NULL, 0,
+	    cb, sc);
+	if (error) {
+		free(sc->ic_buf, M_DEVBUF);
+		return (error);
+	}
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fw_version",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    vmbus_ic_fwver_sysctl, "A", "framework version");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "msg_version",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    vmbus_ic_msgver_sysctl, "A", "message version");
+
+	return (0);
+}
+
+static int
+vmbus_ic_fwver_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct vmbus_ic_softc *sc = arg1;
+	char verstr[16];
+
+	snprintf(verstr, sizeof(verstr), "%u.%u",
+	    VMBUS_ICVER_MAJOR(sc->ic_fwver), VMBUS_ICVER_MINOR(sc->ic_fwver));
+	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
+}
+
+static int
+vmbus_ic_msgver_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct vmbus_ic_softc *sc = arg1;
+	char verstr[16];
+
+	snprintf(verstr, sizeof(verstr), "%u.%u",
+	    VMBUS_ICVER_MAJOR(sc->ic_msgver), VMBUS_ICVER_MINOR(sc->ic_msgver));
+	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
+}
+
+int
+vmbus_ic_detach(device_t dev)
+{
+	struct vmbus_ic_softc *sc = device_get_softc(dev);
+
+	vmbus_chan_close(vmbus_get_channel(dev));
+	free(sc->ic_buf, M_DEVBUF);
+
+	return (0);
+}
+
+int
+vmbus_ic_sendresp(struct vmbus_ic_softc *sc, struct vmbus_channel *chan,
+    void *data, int dlen, uint64_t xactid)
+{
+	struct vmbus_icmsg_hdr *hdr;
+	int error;
+
+	KASSERT(dlen >= sizeof(*hdr), ("invalid data length %d", dlen));
+	hdr = data;
+
+	hdr->ic_flags = VMBUS_ICMSG_FLAG_XACT | VMBUS_ICMSG_FLAG_RESP;
+	error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
+	    data, dlen, xactid);
+	if (error)
+		device_printf(sc->ic_dev, "resp send failed: %d\n", error);
+	return (error);
+}


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_ic.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_icreg.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_icreg.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_icreg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,136 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/utilities/vmbus_icreg.h 311256 2017-01-04 05:36:38Z sephe $
+ */
+
+#ifndef _VMBUS_ICREG_H_
+#define _VMBUS_ICREG_H_
+
+#define VMBUS_ICMSG_TYPE_NEGOTIATE	0
+#define VMBUS_ICMSG_TYPE_HEARTBEAT	1
+#define VMBUS_ICMSG_TYPE_KVP		2
+#define VMBUS_ICMSG_TYPE_SHUTDOWN	3
+#define VMBUS_ICMSG_TYPE_TIMESYNC	4
+#define VMBUS_ICMSG_TYPE_VSS		5
+
+#define VMBUS_ICMSG_STATUS_OK		0x00000000
+#define VMBUS_ICMSG_STATUS_FAIL		0x80004005
+
+#define VMBUS_IC_VERSION(major, minor)	((major) | (((uint32_t)(minor)) << 16))
+#define VMBUS_ICVER_MAJOR(ver)		((ver) & 0xffff)
+#define VMBUS_ICVER_MINOR(ver)		(((ver) & 0xffff0000) >> 16)
+#define VMBUS_ICVER_SWAP(ver)		\
+	((VMBUS_ICVER_MAJOR((ver)) << 16) | VMBUS_ICVER_MINOR((ver)))
+#define VMBUS_ICVER_LE(v1, v2)		\
+	(VMBUS_ICVER_SWAP((v1)) <= VMBUS_ICVER_SWAP((v2)))
+#define VMBUS_ICVER_GT(v1, v2)		\
+	(VMBUS_ICVER_SWAP((v1)) > VMBUS_ICVER_SWAP((v2)))
+
+struct vmbus_pipe_hdr {
+	uint32_t		ph_flags;
+	uint32_t		ph_msgsz;
+} __packed;
+
+struct vmbus_icmsg_hdr {
+	struct vmbus_pipe_hdr	ic_pipe;
+	uint32_t		ic_fwver;	/* framework version */
+	uint16_t		ic_type;
+	uint32_t		ic_msgver;	/* message version */
+	uint16_t		ic_dsize;	/* data size */
+	uint32_t		ic_status;	/* VMBUS_ICMSG_STATUS_ */
+	uint8_t			ic_xactid;
+	uint8_t			ic_flags;	/* VMBUS_ICMSG_FLAG_ */
+	uint8_t			ic_rsvd[2];
+} __packed;
+
+#define VMBUS_ICMSG_FLAG_XACT		0x0001
+#define VMBUS_ICMSG_FLAG_REQ		0x0002
+#define VMBUS_ICMSG_FLAG_RESP		0x0004
+
+/* VMBUS_ICMSG_TYPE_NEGOTIATE */
+struct vmbus_icmsg_negotiate {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint16_t		ic_fwver_cnt;
+	uint16_t		ic_msgver_cnt;
+	uint32_t		ic_rsvd;
+	/*
+	 * This version array contains two set of supported
+	 * versions:
+	 * - The first set consists of #ic_fwver_cnt supported framework
+	 *   versions.
+	 * - The second set consists of #ic_msgver_cnt supported message
+	 *   versions.
+	 */
+	uint32_t		ic_ver[];
+} __packed;
+
+/* VMBUS_ICMSG_TYPE_HEARTBEAT */
+struct vmbus_icmsg_heartbeat {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint64_t		ic_seq;
+	uint32_t		ic_rsvd[8];
+} __packed;
+
+#define VMBUS_ICMSG_HEARTBEAT_SIZE_MIN	\
+	__offsetof(struct vmbus_icmsg_heartbeat, ic_rsvd[0])
+
+/* VMBUS_ICMSG_TYPE_SHUTDOWN */
+struct vmbus_icmsg_shutdown {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint32_t		ic_code;
+	uint32_t		ic_timeo;
+	uint32_t 		ic_haltflags;
+	uint8_t			ic_msg[2048];
+} __packed;
+
+#define VMBUS_ICMSG_SHUTDOWN_SIZE_MIN	\
+	__offsetof(struct vmbus_icmsg_shutdown, ic_msg[0])
+
+/* VMBUS_ICMSG_TYPE_TIMESYNC */
+struct vmbus_icmsg_timesync {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint64_t		ic_hvtime;
+	uint64_t		ic_vmtime;
+	uint64_t		ic_rtt;
+	uint8_t			ic_tsflags;	/* VMBUS_ICMSG_TS_FLAG_ */
+} __packed;
+
+/* VMBUS_ICMSG_TYPE_TIMESYNC, MSGVER4 */
+struct vmbus_icmsg_timesync4 {
+	struct vmbus_icmsg_hdr	ic_hdr;
+	uint64_t		ic_hvtime;
+	uint64_t		ic_sent_tc;
+	uint8_t			ic_tsflags;	/* VMBUS_ICMSG_TS_FLAG_ */
+	uint8_t			ic_rsvd[5];
+} __packed;
+
+#define VMBUS_ICMSG_TS_FLAG_SYNC	0x01
+#define VMBUS_ICMSG_TS_FLAG_SAMPLE	0x02
+
+#define VMBUS_ICMSG_TS_BASE		116444736000000000ULL
+
+#endif	/* !_VMBUS_ICREG_H_ */


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_icreg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_icvar.h
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_icvar.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_icvar.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,62 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/utilities/vmbus_icvar.h 311230 2017-01-04 02:39:00Z sephe $
+ */
+
+#ifndef _VMBUS_ICVAR_H_
+#define _VMBUS_ICVAR_H_
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+
+struct vmbus_ic_softc {
+	device_t		ic_dev;
+	uint8_t			*ic_buf;
+	int			ic_buflen;
+	uint32_t		ic_fwver;	/* framework version */
+	uint32_t		ic_msgver;	/* message version */
+};
+
+struct vmbus_ic_desc {
+	const struct hyperv_guid	ic_guid;
+	const char			*ic_desc;
+};
+
+#define VMBUS_IC_DESC_END	{ .ic_desc = NULL }
+
+int		vmbus_ic_attach(device_t dev, vmbus_chan_callback_t cb);
+int		vmbus_ic_detach(device_t dev);
+int		vmbus_ic_probe(device_t dev, const struct vmbus_ic_desc descs[]);
+int		vmbus_ic_negomsg(struct vmbus_ic_softc *sc, void *data,
+		    int *dlen, uint32_t fw_ver, uint32_t msg_ver);
+int		vmbus_ic_sendresp(struct vmbus_ic_softc *sc,
+		    struct vmbus_channel *chan, void *data, int dlen,
+		    uint64_t xactid);
+
+#endif	/* !_VMBUS_ICVAR_H_ */


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_icvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_shutdown.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_shutdown.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_shutdown.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/vmbus_shutdown.c 311254 2017-01-04 05:24:16Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/systm.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#define VMBUS_SHUTDOWN_FWVER_MAJOR	3
+#define VMBUS_SHUTDOWN_FWVER		\
+	VMBUS_IC_VERSION(VMBUS_SHUTDOWN_FWVER_MAJOR, 0)
+
+#define VMBUS_SHUTDOWN_MSGVER_MAJOR	3
+#define VMBUS_SHUTDOWN_MSGVER		\
+	VMBUS_IC_VERSION(VMBUS_SHUTDOWN_MSGVER_MAJOR, 0)
+
+static int			vmbus_shutdown_probe(device_t);
+static int			vmbus_shutdown_attach(device_t);
+
+static const struct vmbus_ic_desc vmbus_shutdown_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49,
+		    0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb } },
+		.ic_desc = "Hyper-V Shutdown"
+	},
+	VMBUS_IC_DESC_END
+};
+
+static device_method_t vmbus_shutdown_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vmbus_shutdown_probe),
+	DEVMETHOD(device_attach,	vmbus_shutdown_attach),
+	DEVMETHOD(device_detach,	vmbus_ic_detach),
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_shutdown_driver = {
+	"hvshutdown",
+	vmbus_shutdown_methods,
+	sizeof(struct vmbus_ic_softc)
+};
+
+static devclass_t vmbus_shutdown_devclass;
+
+DRIVER_MODULE(hv_shutdown, vmbus, vmbus_shutdown_driver,
+    vmbus_shutdown_devclass, NULL, NULL);
+MODULE_VERSION(hv_shutdown, 1);
+MODULE_DEPEND(hv_shutdown, vmbus, 1, 1, 1);
+
+static void
+vmbus_shutdown_cb(struct vmbus_channel *chan, void *xsc)
+{
+	struct vmbus_ic_softc *sc = xsc;
+	struct vmbus_icmsg_hdr *hdr;
+	struct vmbus_icmsg_shutdown *msg;
+	int dlen, error, do_shutdown = 0;
+	uint64_t xactid;
+	void *data;
+
+	/*
+	 * Receive request.
+	 */
+	data = sc->ic_buf;
+	dlen = sc->ic_buflen;
+	error = vmbus_chan_recv(chan, data, &dlen, &xactid);
+	KASSERT(error != ENOBUFS, ("icbuf is not large enough"));
+	if (error)
+		return;
+
+	if (dlen < sizeof(*hdr)) {
+		device_printf(sc->ic_dev, "invalid data len %d\n", dlen);
+		return;
+	}
+	hdr = data;
+
+	/*
+	 * Update request, which will be echoed back as response.
+	 */
+	switch (hdr->ic_type) {
+	case VMBUS_ICMSG_TYPE_NEGOTIATE:
+		error = vmbus_ic_negomsg(sc, data, &dlen,
+		    VMBUS_SHUTDOWN_FWVER, VMBUS_SHUTDOWN_MSGVER);
+		if (error)
+			return;
+		break;
+
+	case VMBUS_ICMSG_TYPE_SHUTDOWN:
+		if (dlen < VMBUS_ICMSG_SHUTDOWN_SIZE_MIN) {
+			device_printf(sc->ic_dev, "invalid shutdown len %d\n",
+			    dlen);
+			return;
+		}
+		msg = data;
+
+		/* XXX ic_flags definition? */
+		if (msg->ic_haltflags == 0 || msg->ic_haltflags == 1) {
+			device_printf(sc->ic_dev, "shutdown requested\n");
+			hdr->ic_status = VMBUS_ICMSG_STATUS_OK;
+			do_shutdown = 1;
+		} else {
+			device_printf(sc->ic_dev, "unknown shutdown flags "
+			    "0x%08x\n", msg->ic_haltflags);
+			hdr->ic_status = VMBUS_ICMSG_STATUS_FAIL;
+		}
+		break;
+
+	default:
+		device_printf(sc->ic_dev, "got 0x%08x icmsg\n", hdr->ic_type);
+		break;
+	}
+
+	/*
+	 * Send response by echoing the request back.
+	 */
+	vmbus_ic_sendresp(sc, chan, data, dlen, xactid);
+
+	if (do_shutdown)
+		shutdown_nice(RB_POWEROFF);
+}
+
+static int
+vmbus_shutdown_probe(device_t dev)
+{
+
+	return (vmbus_ic_probe(dev, vmbus_shutdown_descs));
+}
+
+static int
+vmbus_shutdown_attach(device_t dev)
+{
+
+	return (vmbus_ic_attach(dev, vmbus_shutdown_cb));
+}


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_shutdown.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/utilities/vmbus_timesync.c
===================================================================
--- trunk/sys/dev/hyperv/utilities/vmbus_timesync.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/utilities/vmbus_timesync.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,261 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014,2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/vmbus_timesync.c 324461 2017-10-10 02:22:34Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/utilities/vmbus_icreg.h>
+#include <dev/hyperv/utilities/vmbus_icvar.h>
+
+#define VMBUS_TIMESYNC_FWVER_MAJOR	3
+#define VMBUS_TIMESYNC_FWVER		\
+	VMBUS_IC_VERSION(VMBUS_TIMESYNC_FWVER_MAJOR, 0)
+
+#define VMBUS_TIMESYNC_MSGVER_MAJOR	4
+#define VMBUS_TIMESYNC_MSGVER		\
+	VMBUS_IC_VERSION(VMBUS_TIMESYNC_MSGVER_MAJOR, 0)
+
+#define VMBUS_TIMESYNC_MSGVER4(sc)	\
+	VMBUS_ICVER_LE(VMBUS_IC_VERSION(4, 0), (sc)->ic_msgver)
+
+#define VMBUS_TIMESYNC_DORTT(sc)	\
+	(VMBUS_TIMESYNC_MSGVER4((sc)) && hyperv_tc64 != NULL)
+
+static int			vmbus_timesync_probe(device_t);
+static int			vmbus_timesync_attach(device_t);
+
+static const struct vmbus_ic_desc vmbus_timesync_descs[] = {
+	{
+		.ic_guid = { .hv_guid = {
+		    0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
+		    0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } },
+		.ic_desc = "Hyper-V Timesync"
+	},
+	VMBUS_IC_DESC_END
+};
+
+static device_method_t vmbus_timesync_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vmbus_timesync_probe),
+	DEVMETHOD(device_attach,	vmbus_timesync_attach),
+	DEVMETHOD(device_detach,	vmbus_ic_detach),
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_timesync_driver = {
+	"hvtimesync",
+	vmbus_timesync_methods,
+	sizeof(struct vmbus_ic_softc)
+};
+
+static devclass_t vmbus_timesync_devclass;
+
+DRIVER_MODULE(hv_timesync, vmbus, vmbus_timesync_driver,
+    vmbus_timesync_devclass, NULL, NULL);
+MODULE_VERSION(hv_timesync, 1);
+MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1);
+
+SYSCTL_NODE(_hw, OID_AUTO, hvtimesync, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
+    "Hyper-V timesync interface");
+
+static int vmbus_ts_ignore_sync = 0;
+SYSCTL_INT(_hw_hvtimesync, OID_AUTO, ignore_sync, CTLFLAG_RWTUN,
+    &vmbus_ts_ignore_sync, 0, "Ignore the sync request.");
+
+/*
+ * Trigger sample sync when drift exceeds threshold (ms).
+ * Ignore the sample request when set to 0.
+ */
+static int vmbus_ts_sample_thresh = 100;
+SYSCTL_INT(_hw_hvtimesync, OID_AUTO, sample_thresh, CTLFLAG_RWTUN,
+    &vmbus_ts_sample_thresh, 0,
+    "Threshold that makes sample request trigger the sync (unit: ms).");
+
+static int vmbus_ts_sample_verbose = 0;
+SYSCTL_INT(_hw_hvtimesync, OID_AUTO, sample_verbose, CTLFLAG_RWTUN,
+    &vmbus_ts_sample_verbose, 0, "Increase sample request verbosity.");
+
+static void
+vmbus_timesync(struct vmbus_ic_softc *sc, uint64_t hvtime, uint64_t sent_tc,
+    uint8_t tsflags)
+{
+	struct timespec vm_ts;
+	uint64_t hv_ns, vm_ns, rtt = 0;
+
+	if (VMBUS_TIMESYNC_DORTT(sc))
+		rtt = hyperv_tc64() - sent_tc;
+
+	hv_ns = (hvtime - VMBUS_ICMSG_TS_BASE + rtt) * HYPERV_TIMER_NS_FACTOR;
+	nanotime(&vm_ts);
+	vm_ns = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec;
+
+	if ((tsflags & VMBUS_ICMSG_TS_FLAG_SYNC) && !vmbus_ts_ignore_sync) {
+		struct timespec hv_ts;
+
+		if (bootverbose) {
+			device_printf(sc->ic_dev, "apply sync request, "
+			    "hv: %ju, vm: %ju\n",
+			    (uintmax_t)hv_ns, (uintmax_t)vm_ns);
+		}
+		hv_ts.tv_sec = hv_ns / NANOSEC;
+		hv_ts.tv_nsec = hv_ns % NANOSEC;
+		kern_clock_settime(curthread, CLOCK_REALTIME, &hv_ts);
+		/* Done! */
+		return;
+	}
+
+	if ((tsflags & VMBUS_ICMSG_TS_FLAG_SAMPLE) &&
+	    vmbus_ts_sample_thresh >= 0) {
+		int64_t diff;
+
+		if (vmbus_ts_sample_verbose) {
+			device_printf(sc->ic_dev, "sample request, "
+			    "hv: %ju, vm: %ju\n",
+			    (uintmax_t)hv_ns, (uintmax_t)vm_ns);
+		}
+
+		if (hv_ns > vm_ns)
+			diff = hv_ns - vm_ns;
+		else
+			diff = vm_ns - hv_ns;
+		/* nanosec -> millisec */
+		diff /= 1000000;
+
+		if (diff > vmbus_ts_sample_thresh) {
+			struct timespec hv_ts;
+
+			if (bootverbose) {
+				device_printf(sc->ic_dev,
+				    "apply sample request, hv: %ju, vm: %ju\n",
+				    (uintmax_t)hv_ns, (uintmax_t)vm_ns);
+			}
+			hv_ts.tv_sec = hv_ns / NANOSEC;
+			hv_ts.tv_nsec = hv_ns % NANOSEC;
+			kern_clock_settime(curthread, CLOCK_REALTIME, &hv_ts);
+		}
+		/* Done */
+		return;
+	}
+}
+
+static void
+vmbus_timesync_cb(struct vmbus_channel *chan, void *xsc)
+{
+	struct vmbus_ic_softc *sc = xsc;
+	struct vmbus_icmsg_hdr *hdr;
+	int dlen, error;
+	uint64_t xactid;
+	void *data;
+
+	/*
+	 * Receive request.
+	 */
+	data = sc->ic_buf;
+	dlen = sc->ic_buflen;
+	error = vmbus_chan_recv(chan, data, &dlen, &xactid);
+	KASSERT(error != ENOBUFS, ("icbuf is not large enough"));
+	if (error)
+		return;
+
+	if (dlen < sizeof(*hdr)) {
+		device_printf(sc->ic_dev, "invalid data len %d\n", dlen);
+		return;
+	}
+	hdr = data;
+
+	/*
+	 * Update request, which will be echoed back as response.
+	 */
+	switch (hdr->ic_type) {
+	case VMBUS_ICMSG_TYPE_NEGOTIATE:
+		error = vmbus_ic_negomsg(sc, data, &dlen,
+		    VMBUS_TIMESYNC_FWVER, VMBUS_TIMESYNC_MSGVER);
+		if (error)
+			return;
+		if (VMBUS_TIMESYNC_DORTT(sc))
+			device_printf(sc->ic_dev, "RTT\n");
+		break;
+
+	case VMBUS_ICMSG_TYPE_TIMESYNC:
+		if (VMBUS_TIMESYNC_MSGVER4(sc)) {
+			const struct vmbus_icmsg_timesync4 *msg4;
+
+			if (dlen < sizeof(*msg4)) {
+				device_printf(sc->ic_dev, "invalid timesync4 "
+				    "len %d\n", dlen);
+				return;
+			}
+			msg4 = data;
+			vmbus_timesync(sc, msg4->ic_hvtime, msg4->ic_sent_tc,
+			    msg4->ic_tsflags);
+		} else {
+			const struct vmbus_icmsg_timesync *msg;
+
+			if (dlen < sizeof(*msg)) {
+				device_printf(sc->ic_dev, "invalid timesync "
+				    "len %d\n", dlen);
+				return;
+			}
+			msg = data;
+			vmbus_timesync(sc, msg->ic_hvtime, 0, msg->ic_tsflags);
+		}
+		break;
+
+	default:
+		device_printf(sc->ic_dev, "got 0x%08x icmsg\n", hdr->ic_type);
+		break;
+	}
+
+	/*
+	 * Send response by echoing the request back.
+	 */
+	vmbus_ic_sendresp(sc, chan, data, dlen, xactid);
+}
+
+static int
+vmbus_timesync_probe(device_t dev)
+{
+
+	return (vmbus_ic_probe(dev, vmbus_timesync_descs));
+}
+
+static int
+vmbus_timesync_attach(device_t dev)
+{
+
+	return (vmbus_ic_attach(dev, vmbus_timesync_cb));
+}


Property changes on: trunk/sys/dev/hyperv/utilities/vmbus_timesync.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,222 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c 324461 2017-10-10 02:22:34Z sephe $");
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/timetc.h>
+
+#include <machine/cpufunc.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#include <vm/vm.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/vmbus/hyperv_machdep.h>
+#include <dev/hyperv/vmbus/hyperv_reg.h>
+#include <dev/hyperv/vmbus/hyperv_var.h>
+
+struct hyperv_reftsc_ctx {
+	struct hyperv_reftsc	*tsc_ref;
+	struct hyperv_dma	tsc_ref_dma;
+};
+
+static d_open_t			hyperv_tsc_open;
+static d_mmap_t			hyperv_tsc_mmap;
+
+static struct timecounter	hyperv_tsc_timecounter = {
+	.tc_get_timecount	= NULL,	/* based on CPU vendor. */
+	.tc_poll_pps		= NULL,
+	.tc_counter_mask	= 0xffffffff,
+	.tc_frequency		= HYPERV_TIMER_FREQ,
+	.tc_name		= "Hyper-V-TSC",
+	.tc_quality		= 3000,
+	.tc_flags		= 0,
+	.tc_priv		= NULL
+};
+
+static struct cdevsw		hyperv_tsc_cdevsw = {
+	.d_version		= D_VERSION,
+	.d_open			= hyperv_tsc_open,
+	.d_mmap			= hyperv_tsc_mmap,
+	.d_name			= HYPERV_REFTSC_DEVNAME
+};
+
+static struct hyperv_reftsc_ctx	hyperv_ref_tsc;
+
+uint64_t
+hypercall_md(volatile void *hc_addr, uint64_t in_val,
+    uint64_t in_paddr, uint64_t out_paddr)
+{
+	uint64_t status;
+
+	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
+	__asm__ __volatile__ ("call *%3" : "=a" (status) :
+	    "c" (in_val), "d" (in_paddr), "m" (hc_addr));
+	return (status);
+}
+
+static int
+hyperv_tsc_open(struct cdev *dev __unused, int oflags, int devtype __unused,
+    struct thread *td __unused)
+{
+
+	if (oflags & FWRITE)
+		return (EPERM);
+	return (0);
+}
+
+static int
+hyperv_tsc_mmap(struct cdev *dev __unused, vm_ooffset_t offset,
+    vm_paddr_t *paddr, int nprot __unused, vm_memattr_t *memattr __unused)
+{
+
+	KASSERT(hyperv_ref_tsc.tsc_ref != NULL, ("reftsc has not been setup"));
+
+	/*
+	 * NOTE:
+	 * 'nprot' does not contain information interested to us;
+	 * WR-open is blocked by d_open.
+	 */
+
+	if (offset != 0)
+		return (EOPNOTSUPP);
+
+	*paddr = hyperv_ref_tsc.tsc_ref_dma.hv_paddr;
+	return (0);
+}
+
+#define HYPERV_TSC_TIMECOUNT(fence)					\
+static uint64_t								\
+hyperv_tc64_tsc_##fence(void)						\
+{									\
+	struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;		\
+	uint32_t seq;							\
+									\
+	while ((seq = tsc_ref->tsc_seq) != 0) {				\
+		uint64_t disc, ret, tsc, scale;				\
+		int64_t ofs;						\
+									\
+		__compiler_membar();					\
+		scale = tsc_ref->tsc_scale;				\
+		ofs = tsc_ref->tsc_ofs;					\
+									\
+		fence();						\
+		tsc = rdtsc();						\
+									\
+		/* ret = ((tsc * scale) >> 64) + ofs */			\
+		__asm__ __volatile__ ("mulq %3" :			\
+		    "=d" (ret), "=a" (disc) :				\
+		    "a" (tsc), "r" (scale));				\
+		ret += ofs;						\
+									\
+		__compiler_membar();					\
+		if (tsc_ref->tsc_seq == seq)				\
+			return (ret);					\
+									\
+		/* Sequence changed; re-sync. */			\
+	}								\
+	/* Fallback to the generic timecounter, i.e. rdmsr. */		\
+	return (rdmsr(MSR_HV_TIME_REF_COUNT));				\
+}									\
+									\
+static u_int								\
+hyperv_tsc_timecount_##fence(struct timecounter *tc __unused)		\
+{									\
+									\
+	return (hyperv_tc64_tsc_##fence());				\
+}									\
+struct __hack
+
+HYPERV_TSC_TIMECOUNT(lfence);
+HYPERV_TSC_TIMECOUNT(mfence);
+
+static void
+hyperv_tsc_tcinit(void *dummy __unused)
+{
+	hyperv_tc64_t tc64 = NULL;
+	uint64_t val, orig;
+
+	if ((hyperv_features &
+	     (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
+	    (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
+	    (cpu_feature & CPUID_SSE2) == 0)	/* SSE2 for mfence/lfence */
+		return;
+
+	switch (cpu_vendor_id) {
+	case CPU_VENDOR_AMD:
+		hyperv_tsc_timecounter.tc_get_timecount =
+		    hyperv_tsc_timecount_mfence;
+		tc64 = hyperv_tc64_tsc_mfence;
+		break;
+
+	case CPU_VENDOR_INTEL:
+		hyperv_tsc_timecounter.tc_get_timecount =
+		    hyperv_tsc_timecount_lfence;
+		tc64 = hyperv_tc64_tsc_lfence;
+		break;
+
+	default:
+		/* Unsupport CPU vendors. */
+		return;
+	}
+
+	hyperv_ref_tsc.tsc_ref = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0,
+	    sizeof(struct hyperv_reftsc), &hyperv_ref_tsc.tsc_ref_dma,
+	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (hyperv_ref_tsc.tsc_ref == NULL) {
+		printf("hyperv: reftsc page allocation failed\n");
+		return;
+	}
+
+	orig = rdmsr(MSR_HV_REFERENCE_TSC);
+	val = MSR_HV_REFTSC_ENABLE | (orig & MSR_HV_REFTSC_RSVD_MASK) |
+	    ((hyperv_ref_tsc.tsc_ref_dma.hv_paddr >> PAGE_SHIFT) <<
+	     MSR_HV_REFTSC_PGSHIFT);
+	wrmsr(MSR_HV_REFERENCE_TSC, val);
+
+	/* Register "enlightened" timecounter. */
+	tc_init(&hyperv_tsc_timecounter);
+
+	/* Install 64 bits timecounter method for other modules to use. */
+	KASSERT(tc64 != NULL, ("tc64 is not set"));
+	hyperv_tc64 = tc64;
+
+	/* Add device for mmap(2). */
+	make_dev(&hyperv_tsc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0444,
+	    HYPERV_REFTSC_DEVNAME);
+}
+SYSINIT(hyperv_tsc_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, hyperv_tsc_tcinit,
+    NULL);


Property changes on: trunk/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
===================================================================
--- trunk/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,47 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S 302167 2016-06-24 02:06:13Z sephe $
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(vmbus_isr)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp, %rdi
+	call	vmbus_handle_intr
+	MEXITCOUNT
+	jmp	doreti


Property changes on: trunk/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/hyperv.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/hyperv.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/hyperv.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,338 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Implements low-level interactions with Hyper-V/Azure
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv.c 332067 2018-04-05 12:59:49Z emaste $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/timetc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/vmbus/hyperv_machdep.h>
+#include <dev/hyperv/vmbus/hyperv_reg.h>
+#include <dev/hyperv/vmbus/hyperv_var.h>
+
+#define HYPERV_FREEBSD_BUILD		0ULL
+#define HYPERV_FREEBSD_VERSION		((uint64_t)__FreeBSD_version)
+#define HYPERV_FREEBSD_OSID		0ULL
+
+#define MSR_HV_GUESTID_BUILD_FREEBSD	\
+	(HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK)
+#define MSR_HV_GUESTID_VERSION_FREEBSD	\
+	((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \
+	 MSR_HV_GUESTID_VERSION_MASK)
+#define MSR_HV_GUESTID_OSID_FREEBSD	\
+	((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \
+	 MSR_HV_GUESTID_OSID_MASK)
+
+#define MSR_HV_GUESTID_FREEBSD		\
+	(MSR_HV_GUESTID_BUILD_FREEBSD |	\
+	 MSR_HV_GUESTID_VERSION_FREEBSD | \
+	 MSR_HV_GUESTID_OSID_FREEBSD |	\
+	 MSR_HV_GUESTID_OSTYPE_FREEBSD)
+
+struct hypercall_ctx {
+	void			*hc_addr;
+	vm_paddr_t		hc_paddr;
+};
+
+static u_int			hyperv_get_timecount(struct timecounter *);
+static bool			hyperv_identify(void);
+static void			hypercall_memfree(void);
+
+u_int				hyperv_ver_major;
+
+u_int				hyperv_features;
+u_int				hyperv_recommends;
+
+static u_int			hyperv_pm_features;
+static u_int			hyperv_features3;
+
+hyperv_tc64_t			hyperv_tc64;
+
+static struct timecounter	hyperv_timecounter = {
+	.tc_get_timecount	= hyperv_get_timecount,
+	.tc_poll_pps		= NULL,
+	.tc_counter_mask	= 0xffffffff,
+	.tc_frequency		= HYPERV_TIMER_FREQ,
+	.tc_name		= "Hyper-V",
+	.tc_quality		= 2000,
+	.tc_flags		= 0,
+	.tc_priv		= NULL
+};
+
+static struct hypercall_ctx	hypercall_context;
+
+static u_int
+hyperv_get_timecount(struct timecounter *tc __unused)
+{
+	return rdmsr(MSR_HV_TIME_REF_COUNT);
+}
+
+static uint64_t
+hyperv_tc64_rdmsr(void)
+{
+
+	return (rdmsr(MSR_HV_TIME_REF_COUNT));
+}
+
+uint64_t
+hypercall_post_message(bus_addr_t msg_paddr)
+{
+	return hypercall_md(hypercall_context.hc_addr,
+	    HYPERCALL_POST_MESSAGE, msg_paddr, 0);
+}
+
+uint64_t
+hypercall_signal_event(bus_addr_t monprm_paddr)
+{
+	return hypercall_md(hypercall_context.hc_addr,
+	    HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0);
+}
+
+int
+hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz)
+{
+	const uint8_t *d = guid->hv_guid;
+
+	return snprintf(buf, sz, "%02x%02x%02x%02x-"
+	    "%02x%02x-%02x%02x-%02x%02x-"
+	    "%02x%02x%02x%02x%02x%02x",
+	    d[3], d[2], d[1], d[0],
+	    d[5], d[4], d[7], d[6], d[8], d[9],
+	    d[10], d[11], d[12], d[13], d[14], d[15]);
+}
+
+static bool
+hyperv_identify(void)
+{
+	u_int regs[4];
+	unsigned int maxleaf;
+
+	if (vm_guest != VM_GUEST_HV)
+		return (false);
+
+	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
+	maxleaf = regs[0];
+	if (maxleaf < CPUID_LEAF_HV_LIMITS)
+		return (false);
+
+	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
+	if (regs[0] != CPUID_HV_IFACE_HYPERV)
+		return (false);
+
+	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
+	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) {
+		/*
+		 * Hyper-V w/o Hypercall is impossible; someone
+		 * is faking Hyper-V.
+		 */
+		return (false);
+	}
+	hyperv_features = regs[0];
+	hyperv_pm_features = regs[2];
+	hyperv_features3 = regs[3];
+
+	do_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
+	hyperv_ver_major = regs[1] >> 16;
+	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
+	    hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
+
+	printf("  Features=0x%b\n", hyperv_features,
+	    "\020"
+	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
+	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
+	    "\003SYNIC"		/* MSRs for SynIC */
+	    "\004SYNTM"		/* MSRs for SynTimer */
+	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
+	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
+	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
+	    "\010RESET"		/* MSR_HV_RESET */
+	    "\011STATS"		/* MSR_HV_STATS_ */
+	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
+	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
+	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
+	    "\015DEBUG");	/* MSR_HV_SYNTH_DEBUG_ */
+	printf("  PM Features=0x%b [C%u]\n",
+	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK),
+	    "\020"
+	    "\005C3HPET",	/* HPET is required for C3 state */
+	    CPUPM_HV_CSTATE(hyperv_pm_features));
+	printf("  Features3=0x%b\n", hyperv_features3,
+	    "\020"
+	    "\001MWAIT"		/* MWAIT */
+	    "\002DEBUG"		/* guest debug support */
+	    "\003PERFMON"	/* performance monitor */
+	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
+	    "\005XMMHC"		/* hypercall input through XMM regs */
+	    "\006IDLE"		/* guest idle support */
+	    "\007SLEEP"		/* hypervisor sleep support */
+	    "\010NUMA"		/* NUMA distance query support */
+	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
+	    "\012SYNCMC"	/* inject synthetic machine checks */
+	    "\013CRASH"		/* MSRs for guest crash */
+	    "\014DEBUGMSR"	/* MSRs for guest debug */
+	    "\015NPIEP"		/* NPIEP */
+	    "\016HVDIS");	/* disabling hypervisor */
+
+	do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
+	hyperv_recommends = regs[0];
+	if (bootverbose)
+		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
+
+	do_cpuid(CPUID_LEAF_HV_LIMITS, regs);
+	if (bootverbose) {
+		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
+		    regs[0], regs[1], regs[2]);
+	}
+
+	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
+		do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
+		if (bootverbose) {
+			printf("  HW Features: %08x, AMD: %08x\n",
+			    regs[0], regs[3]);
+		}
+	}
+
+	return (true);
+}
+
+static void
+hyperv_init(void *dummy __unused)
+{
+	if (!hyperv_identify()) {
+		/* Not Hyper-V; reset guest id to the generic one. */
+		if (vm_guest == VM_GUEST_HV)
+			vm_guest = VM_GUEST_VM;
+		return;
+	}
+
+	/* Set guest id */
+	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD);
+
+	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
+		/* Register Hyper-V timecounter */
+		tc_init(&hyperv_timecounter);
+
+		/*
+		 * Install 64 bits timecounter method for other modules
+		 * to use.
+		 */
+		hyperv_tc64 = hyperv_tc64_rdmsr;
+	}
+}
+SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
+    NULL);
+
+static void
+hypercall_memfree(void)
+{
+	kmem_free(kernel_arena, (vm_offset_t)hypercall_context.hc_addr,
+	    PAGE_SIZE);
+	hypercall_context.hc_addr = NULL;
+}
+
+static void
+hypercall_create(void *arg __unused)
+{
+	uint64_t hc, hc_orig;
+
+	if (vm_guest != VM_GUEST_HV)
+		return;
+
+	/*
+	 * NOTE:
+	 * - busdma(9), i.e. hyperv_dmamem APIs, can _not_ be used due to
+	 *   the NX bit.
+	 * - Assume kmem_malloc() returns properly aligned memory.
+	 */
+	hypercall_context.hc_addr = (void *)kmem_malloc(kernel_arena, PAGE_SIZE,
+	    M_WAITOK);
+	hypercall_context.hc_paddr = vtophys(hypercall_context.hc_addr);
+
+	/* Get the 'reserved' bits, which requires preservation. */
+	hc_orig = rdmsr(MSR_HV_HYPERCALL);
+
+	/*
+	 * Setup the Hypercall page.
+	 *
+	 * NOTE: 'reserved' bits MUST be preserved.
+	 */
+	hc = ((hypercall_context.hc_paddr >> PAGE_SHIFT) <<
+	    MSR_HV_HYPERCALL_PGSHIFT) |
+	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
+	    MSR_HV_HYPERCALL_ENABLE;
+	wrmsr(MSR_HV_HYPERCALL, hc);
+
+	/*
+	 * Confirm that Hypercall page did get setup.
+	 */
+	hc = rdmsr(MSR_HV_HYPERCALL);
+	if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) {
+		printf("hyperv: Hypercall setup failed\n");
+		hypercall_memfree();
+		/* Can't perform any Hyper-V specific actions */
+		vm_guest = VM_GUEST_VM;
+		return;
+	}
+	if (bootverbose)
+		printf("hyperv: Hypercall created\n");
+}
+SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL);
+
+static void
+hypercall_destroy(void *arg __unused)
+{
+	uint64_t hc;
+
+	if (hypercall_context.hc_addr == NULL)
+		return;
+
+	/* Disable Hypercall */
+	hc = rdmsr(MSR_HV_HYPERCALL);
+	wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK));
+	hypercall_memfree();
+
+	if (bootverbose)
+		printf("hyperv: Hypercall destroyed\n");
+}
+SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy,
+    NULL);


Property changes on: trunk/sys/dev/hyperv/vmbus/hyperv.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/hyperv_busdma.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/hyperv_busdma.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/hyperv_busdma.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,99 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv_busdma.c 302116 2016-06-23 05:35:08Z sephe $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+
+#include <dev/hyperv/include/hyperv_busdma.h>
+
+#define HYPERV_DMA_MASK	(BUS_DMA_WAITOK | BUS_DMA_NOWAIT | BUS_DMA_ZERO)
+
+void
+hyperv_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+	bus_addr_t *paddr = arg;
+
+	if (error)
+		return;
+
+	KASSERT(nseg == 1, ("too many segments %d!", nseg));
+	*paddr = segs->ds_addr;
+}
+
+void *
+hyperv_dmamem_alloc(bus_dma_tag_t parent_dtag, bus_size_t alignment,
+    bus_addr_t boundary, bus_size_t size, struct hyperv_dma *dma, int flags)
+{
+	void *ret;
+	int error;
+
+	error = bus_dma_tag_create(parent_dtag, /* parent */
+	    alignment,		/* alignment */
+	    boundary,		/* boundary */
+	    BUS_SPACE_MAXADDR,	/* lowaddr */
+	    BUS_SPACE_MAXADDR,	/* highaddr */
+	    NULL, NULL,		/* filter, filterarg */
+	    size,		/* maxsize */
+	    1,			/* nsegments */
+	    size,		/* maxsegsize */
+	    0,			/* flags */
+	    NULL,		/* lockfunc */
+	    NULL,		/* lockfuncarg */
+	    &dma->hv_dtag);
+	if (error)
+		return NULL;
+
+	error = bus_dmamem_alloc(dma->hv_dtag, &ret,
+	    (flags & HYPERV_DMA_MASK) | BUS_DMA_COHERENT, &dma->hv_dmap);
+	if (error) {
+		bus_dma_tag_destroy(dma->hv_dtag);
+		return NULL;
+	}
+
+	error = bus_dmamap_load(dma->hv_dtag, dma->hv_dmap, ret, size,
+	    hyperv_dma_map_paddr, &dma->hv_paddr, BUS_DMA_NOWAIT);
+	if (error) {
+		bus_dmamem_free(dma->hv_dtag, ret, dma->hv_dmap);
+		bus_dma_tag_destroy(dma->hv_dtag);
+		return NULL;
+	}
+	return ret;
+}
+
+void
+hyperv_dmamem_free(struct hyperv_dma *dma, void *ptr)
+{
+	bus_dmamap_unload(dma->hv_dtag, dma->hv_dmap);
+	bus_dmamem_free(dma->hv_dtag, ptr, dma->hv_dmap);
+	bus_dma_tag_destroy(dma->hv_dtag);
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/hyperv_busdma.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/hyperv_machdep.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/hyperv_machdep.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/hyperv_machdep.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,38 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv_machdep.h 302170 2016-06-24 02:30:14Z sephe $
+ */
+
+#ifndef _HYPERV_MACHDEP_H_
+#define _HYPERV_MACHDEP_H_
+
+#include <sys/param.h>
+
+uint64_t	hypercall_md(volatile void *hc_addr, uint64_t in_val,
+		    uint64_t in_paddr, uint64_t out_paddr);
+
+#endif	/* !_HYPERV_MACHDEP_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/hyperv_machdep.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/hyperv_reg.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/hyperv_reg.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/hyperv_reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,194 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv_reg.h 311223 2017-01-04 01:58:38Z sephe $
+ */
+
+#ifndef _HYPERV_REG_H_
+#define _HYPERV_REG_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+/*
+ * Hyper-V Synthetic MSRs
+ */
+
+#define MSR_HV_GUEST_OS_ID		0x40000000
+#define MSR_HV_GUESTID_BUILD_MASK	0xffffULL
+#define MSR_HV_GUESTID_VERSION_MASK	0x0000ffffffff0000ULL
+#define MSR_HV_GUESTID_VERSION_SHIFT	16
+#define MSR_HV_GUESTID_OSID_MASK	0x00ff000000000000ULL
+#define MSR_HV_GUESTID_OSID_SHIFT	48
+#define MSR_HV_GUESTID_OSTYPE_MASK	0x7f00000000000000ULL
+#define MSR_HV_GUESTID_OSTYPE_SHIFT	56
+#define MSR_HV_GUESTID_OPENSRC		0x8000000000000000ULL
+#define MSR_HV_GUESTID_OSTYPE_LINUX	\
+	((0x01ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
+#define MSR_HV_GUESTID_OSTYPE_FREEBSD	\
+	((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
+
+#define MSR_HV_HYPERCALL		0x40000001
+#define MSR_HV_HYPERCALL_ENABLE		0x0001ULL
+#define MSR_HV_HYPERCALL_RSVD_MASK	0x0ffeULL
+#define MSR_HV_HYPERCALL_PGSHIFT	12
+
+#define MSR_HV_VP_INDEX			0x40000002
+
+#define MSR_HV_REFERENCE_TSC		0x40000021
+#define MSR_HV_REFTSC_ENABLE		0x0001ULL
+#define MSR_HV_REFTSC_RSVD_MASK		0x0ffeULL
+#define MSR_HV_REFTSC_PGSHIFT		12
+
+#define MSR_HV_SCONTROL			0x40000080
+#define MSR_HV_SCTRL_ENABLE		0x0001ULL
+#define MSR_HV_SCTRL_RSVD_MASK		0xfffffffffffffffeULL
+
+#define MSR_HV_SIEFP			0x40000082
+#define MSR_HV_SIEFP_ENABLE		0x0001ULL
+#define MSR_HV_SIEFP_RSVD_MASK		0x0ffeULL
+#define MSR_HV_SIEFP_PGSHIFT		12
+
+#define MSR_HV_SIMP			0x40000083
+#define MSR_HV_SIMP_ENABLE		0x0001ULL
+#define MSR_HV_SIMP_RSVD_MASK		0x0ffeULL
+#define MSR_HV_SIMP_PGSHIFT		12
+
+#define MSR_HV_EOM			0x40000084
+
+#define MSR_HV_SINT0			0x40000090
+#define MSR_HV_SINT_VECTOR_MASK		0x00ffULL
+#define MSR_HV_SINT_RSVD1_MASK		0xff00ULL
+#define MSR_HV_SINT_MASKED		0x00010000ULL
+#define MSR_HV_SINT_AUTOEOI		0x00020000ULL
+#define MSR_HV_SINT_RSVD2_MASK		0xfffffffffffc0000ULL
+#define MSR_HV_SINT_RSVD_MASK		(MSR_HV_SINT_RSVD1_MASK |	\
+					 MSR_HV_SINT_RSVD2_MASK)
+
+#define MSR_HV_STIMER0_CONFIG		0x400000b0
+#define MSR_HV_STIMER_CFG_ENABLE	0x0001ULL
+#define MSR_HV_STIMER_CFG_PERIODIC	0x0002ULL
+#define MSR_HV_STIMER_CFG_LAZY		0x0004ULL
+#define MSR_HV_STIMER_CFG_AUTOEN	0x0008ULL
+#define MSR_HV_STIMER_CFG_SINT_MASK	0x000f0000ULL
+#define MSR_HV_STIMER_CFG_SINT_SHIFT	16
+
+#define MSR_HV_STIMER0_COUNT		0x400000b1
+
+/*
+ * CPUID leaves
+ */
+
+#define CPUID_LEAF_HV_MAXLEAF		0x40000000
+
+#define CPUID_LEAF_HV_INTERFACE		0x40000001
+#define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
+
+#define CPUID_LEAF_HV_IDENTITY		0x40000002
+
+#define CPUID_LEAF_HV_FEATURES		0x40000003
+/* EAX: features include/hyperv.h CPUID_HV_MSR */
+/* ECX: power management features */
+#define CPUPM_HV_CSTATE_MASK		0x000f	/* deepest C-state */
+#define CPUPM_HV_C3_HPET		0x0010	/* C3 requires HPET */
+#define CPUPM_HV_CSTATE(f)		((f) & CPUPM_HV_CSTATE_MASK)
+/* EDX: features3 */
+#define CPUID3_HV_MWAIT			0x0001	/* MWAIT */
+#define CPUID3_HV_XMM_HYPERCALL		0x0010	/* Hypercall input through
+						 * XMM regs */
+#define CPUID3_HV_GUEST_IDLE		0x0020	/* guest idle */
+#define CPUID3_HV_NUMA			0x0080	/* NUMA distance query */
+#define CPUID3_HV_TIME_FREQ		0x0100	/* timer frequency query
+						 * (TSC, LAPIC) */
+#define CPUID3_HV_MSR_CRASH		0x0400	/* MSRs for guest crash */
+
+#define CPUID_LEAF_HV_RECOMMENDS	0x40000004
+#define CPUID_LEAF_HV_LIMITS		0x40000005
+#define CPUID_LEAF_HV_HWFEATURES	0x40000006
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+struct hyperv_mon_param {
+	uint32_t	mp_connid;
+	uint16_t	mp_evtflag_ofs;
+	uint16_t	mp_rsvd;
+} __packed;
+
+/*
+ * Hyper-V message types
+ */
+#define HYPERV_MSGTYPE_NONE		0
+#define HYPERV_MSGTYPE_CHANNEL		1
+#define HYPERV_MSGTYPE_TIMER_EXPIRED	0x80000010
+
+/*
+ * Hypercall status codes
+ */
+#define HYPERCALL_STATUS_SUCCESS	0x0000
+
+/*
+ * Hypercall input values
+ */
+#define HYPERCALL_POST_MESSAGE		0x005c
+#define HYPERCALL_SIGNAL_EVENT		0x005d
+
+/*
+ * Hypercall input parameters
+ */
+#define HYPERCALL_PARAM_ALIGN		8
+#if 0
+/*
+ * XXX
+ * <<Hypervisor Top Level Functional Specification 4.0b>> requires
+ * input parameters size to be multiple of 8, however, many post
+ * message input parameters do _not_ meet this requirement.
+ */
+#define HYPERCALL_PARAM_SIZE_ALIGN	8
+#endif
+
+/*
+ * HYPERCALL_POST_MESSAGE
+ */
+#define HYPERCALL_POSTMSGIN_DSIZE_MAX	240
+#define HYPERCALL_POSTMSGIN_SIZE	256
+
+struct hypercall_postmsg_in {
+	uint32_t	hc_connid;
+	uint32_t	hc_rsvd;
+	uint32_t	hc_msgtype;	/* HYPERV_MSGTYPE_ */
+	uint32_t	hc_dsize;
+	uint8_t		hc_data[HYPERCALL_POSTMSGIN_DSIZE_MAX];
+} __packed;
+CTASSERT(sizeof(struct hypercall_postmsg_in) == HYPERCALL_POSTMSGIN_SIZE);
+
+/*
+ * HYPERCALL_SIGNAL_EVENT
+ *
+ * struct hyperv_mon_param.
+ */
+
+#endif	/* !_HYPERV_REG_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/hyperv_reg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/hyperv_var.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/hyperv_var.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/hyperv_var.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,40 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv_var.h 310801 2016-12-30 02:13:21Z sephe $
+ */
+
+#ifndef _HYPERV_VAR_H_
+#define _HYPERV_VAR_H_
+
+#include <machine/bus.h>
+
+extern u_int	hyperv_recommends;
+
+uint64_t	hypercall_post_message(bus_addr_t msg_paddr);
+uint64_t	hypercall_signal_event(bus_addr_t monprm_paddr);
+
+#endif	/* !_HYPERV_VAR_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/hyperv_var.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,52 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c 302170 2016-06-24 02:30:14Z sephe $");
+
+#include <sys/param.h>
+#include <dev/hyperv/vmbus/hyperv_machdep.h>
+
+uint64_t
+hypercall_md(volatile void *hc_addr, uint64_t in_val,
+    uint64_t in_paddr, uint64_t out_paddr)
+{
+	uint32_t in_val_hi = in_val >> 32;
+	uint32_t in_val_lo = in_val & 0xFFFFFFFF;
+	uint32_t status_hi, status_lo;
+	uint32_t in_paddr_hi = in_paddr >> 32;
+	uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
+	uint32_t out_paddr_hi = out_paddr >> 32;
+	uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
+
+	__asm__ __volatile__ ("call *%8" : "=d"(status_hi), "=a"(status_lo) :
+	    "d" (in_val_hi), "a" (in_val_lo),
+	    "b" (in_paddr_hi), "c" (in_paddr_lo),
+	    "D"(out_paddr_hi), "S"(out_paddr_lo),
+	    "m" (hc_addr));
+	return (status_lo | ((uint64_t)status_hi << 32));
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
===================================================================
--- trunk/sys/dev/hyperv/vmbus/i386/vmbus_vector.S	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/i386/vmbus_vector.S	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,50 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/i386/vmbus_vector.S 302167 2016-06-24 02:06:13Z sephe $
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(vmbus_isr)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	FAKE_MCOUNT(TF_EIP(%esp))
+	pushl	%esp
+	call	vmbus_handle_intr
+	add	$4, %esp
+	MEXITCOUNT
+	jmp	doreti


Property changes on: trunk/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,1598 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * VM Bus Driver Implementation
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus.c 324572 2017-10-13 02:01:03Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+#include <machine/resource.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+#include <dev/hyperv/vmbus/hyperv_reg.h>
+#include <dev/hyperv/vmbus/hyperv_var.h>
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+#include <dev/hyperv/vmbus/vmbus_var.h>
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
+
+#include "acpi_if.h"
+#include "pcib_if.h"
+#include "vmbus_if.h"
+
+#define VMBUS_GPADL_START		0xe1e10
+
+struct vmbus_msghc {
+	struct vmbus_xact		*mh_xact;
+	struct hypercall_postmsg_in	mh_inprm_save;
+};
+
+static void			vmbus_identify(driver_t *, device_t);
+static int			vmbus_probe(device_t);
+static int			vmbus_attach(device_t);
+static int			vmbus_detach(device_t);
+static int			vmbus_read_ivar(device_t, device_t, int,
+				    uintptr_t *);
+static int			vmbus_child_pnpinfo_str(device_t, device_t,
+				    char *, size_t);
+static struct resource		*vmbus_alloc_resource(device_t dev,
+				    device_t child, int type, int *rid,
+				    rman_res_t start, rman_res_t end,
+				    rman_res_t count, u_int flags);
+static int			vmbus_alloc_msi(device_t bus, device_t dev,
+				    int count, int maxcount, int *irqs);
+static int			vmbus_release_msi(device_t bus, device_t dev,
+				    int count, int *irqs);
+static int			vmbus_alloc_msix(device_t bus, device_t dev,
+				    int *irq);
+static int			vmbus_release_msix(device_t bus, device_t dev,
+				    int irq);
+static int			vmbus_map_msi(device_t bus, device_t dev,
+				    int irq, uint64_t *addr, uint32_t *data);
+static uint32_t			vmbus_get_version_method(device_t, device_t);
+static int			vmbus_probe_guid_method(device_t, device_t,
+				    const struct hyperv_guid *);
+static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
+				    device_t dev, int cpu);
+static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
+				    int);
+
+static int			vmbus_init(struct vmbus_softc *);
+static int			vmbus_connect(struct vmbus_softc *, uint32_t);
+static int			vmbus_req_channels(struct vmbus_softc *sc);
+static void			vmbus_disconnect(struct vmbus_softc *);
+static int			vmbus_scan(struct vmbus_softc *);
+static void			vmbus_scan_teardown(struct vmbus_softc *);
+static void			vmbus_scan_done(struct vmbus_softc *,
+				    const struct vmbus_message *);
+static void			vmbus_chanmsg_handle(struct vmbus_softc *,
+				    const struct vmbus_message *);
+static void			vmbus_msg_task(void *, int);
+static void			vmbus_synic_setup(void *);
+static void			vmbus_synic_teardown(void *);
+static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
+static int			vmbus_dma_alloc(struct vmbus_softc *);
+static void			vmbus_dma_free(struct vmbus_softc *);
+static int			vmbus_intr_setup(struct vmbus_softc *);
+static void			vmbus_intr_teardown(struct vmbus_softc *);
+static int			vmbus_doattach(struct vmbus_softc *);
+static void			vmbus_event_proc_dummy(struct vmbus_softc *,
+				    int);
+
+static struct vmbus_softc	*vmbus_sc;
+
+extern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr);
+
+SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+    "Hyper-V vmbus");
+
+static int			vmbus_pin_evttask = 1;
+SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
+    &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
+
+static const uint32_t		vmbus_version[] = {
+	VMBUS_VERSION_WIN8_1,
+	VMBUS_VERSION_WIN8,
+	VMBUS_VERSION_WIN7,
+	VMBUS_VERSION_WS2008
+};
+
+static const vmbus_chanmsg_proc_t
+vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
+	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
+	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
+};
+
+static device_method_t vmbus_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_identify,		vmbus_identify),
+	DEVMETHOD(device_probe,			vmbus_probe),
+	DEVMETHOD(device_attach,		vmbus_attach),
+	DEVMETHOD(device_detach,		vmbus_detach),
+	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
+	DEVMETHOD(device_suspend,		bus_generic_suspend),
+	DEVMETHOD(device_resume,		bus_generic_resume),
+
+	/* Bus interface */
+	DEVMETHOD(bus_add_child,		bus_generic_add_child),
+	DEVMETHOD(bus_print_child,		bus_generic_print_child),
+	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
+	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
+	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
+	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
+	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
+	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
+	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
+	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
+#if __FreeBSD_version >= 1100000
+	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
+#endif
+
+	/* pcib interface */
+	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
+	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
+	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
+	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
+	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
+
+	/* Vmbus interface */
+	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
+	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
+	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
+	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
+
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_driver = {
+	"vmbus",
+	vmbus_methods,
+	sizeof(struct vmbus_softc)
+};
+
+static devclass_t vmbus_devclass;
+
+DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
+DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
+    NULL, NULL);
+
+MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
+MODULE_DEPEND(vmbus, pci, 1, 1, 1);
+MODULE_VERSION(vmbus, 1);
+
+static __inline struct vmbus_softc *
+vmbus_get_softc(void)
+{
+	return vmbus_sc;
+}
+
+void
+vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
+{
+	struct hypercall_postmsg_in *inprm;
+
+	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
+		panic("invalid data size %zu", dsize);
+
+	inprm = vmbus_xact_req_data(mh->mh_xact);
+	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
+	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
+	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
+	inprm->hc_dsize = dsize;
+}
+
+struct vmbus_msghc *
+vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
+{
+	struct vmbus_msghc *mh;
+	struct vmbus_xact *xact;
+
+	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
+		panic("invalid data size %zu", dsize);
+
+	xact = vmbus_xact_get(sc->vmbus_xc,
+	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
+	if (xact == NULL)
+		return (NULL);
+
+	mh = vmbus_xact_priv(xact, sizeof(*mh));
+	mh->mh_xact = xact;
+
+	vmbus_msghc_reset(mh, dsize);
+	return (mh);
+}
+
+void
+vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+
+	vmbus_xact_put(mh->mh_xact);
+}
+
+void *
+vmbus_msghc_dataptr(struct vmbus_msghc *mh)
+{
+	struct hypercall_postmsg_in *inprm;
+
+	inprm = vmbus_xact_req_data(mh->mh_xact);
+	return (inprm->hc_data);
+}
+
+int
+vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
+{
+	sbintime_t time = SBT_1MS;
+	struct hypercall_postmsg_in *inprm;
+	bus_addr_t inprm_paddr;
+	int i;
+
+	inprm = vmbus_xact_req_data(mh->mh_xact);
+	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
+
+	/*
+	 * Save the input parameter so that we could restore the input
+	 * parameter if the Hypercall failed.
+	 *
+	 * XXX
+	 * Is this really necessary?!  i.e. Will the Hypercall ever
+	 * overwrite the input parameter?
+	 */
+	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
+
+	/*
+	 * In order to cope with transient failures, e.g. insufficient
+	 * resources on host side, we retry the post message Hypercall
+	 * several times.  20 retries seem sufficient.
+	 */
+#define HC_RETRY_MAX	20
+
+	for (i = 0; i < HC_RETRY_MAX; ++i) {
+		uint64_t status;
+
+		status = hypercall_post_message(inprm_paddr);
+		if (status == HYPERCALL_STATUS_SUCCESS)
+			return 0;
+
+		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
+		if (time < SBT_1S * 2)
+			time *= 2;
+
+		/* Restore input parameter and try again */
+		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
+	}
+
+#undef HC_RETRY_MAX
+
+	return EIO;
+}
+
+int
+vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+	int error;
+
+	vmbus_xact_activate(mh->mh_xact);
+	error = vmbus_msghc_exec_noresult(mh);
+	if (error)
+		vmbus_xact_deactivate(mh->mh_xact);
+	return error;
+}
+
+void
+vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+
+	vmbus_xact_deactivate(mh->mh_xact);
+}
+
+const struct vmbus_message *
+vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+	size_t resp_len;
+
+	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
+}
+
+const struct vmbus_message *
+vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+	size_t resp_len;
+
+	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
+}
+
+void
+vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
+{
+
+	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
+}
+
+uint32_t
+vmbus_gpadl_alloc(struct vmbus_softc *sc)
+{
+	uint32_t gpadl;
+
+again:
+	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1); 
+	if (gpadl == 0)
+		goto again;
+	return (gpadl);
+}
+
+static int
+vmbus_connect(struct vmbus_softc *sc, uint32_t version)
+{
+	struct vmbus_chanmsg_connect *req;
+	const struct vmbus_message *msg;
+	struct vmbus_msghc *mh;
+	int error, done = 0;
+
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL)
+		return ENXIO;
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
+	req->chm_ver = version;
+	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
+	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
+	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
+
+	error = vmbus_msghc_exec(sc, mh);
+	if (error) {
+		vmbus_msghc_put(sc, mh);
+		return error;
+	}
+
+	msg = vmbus_msghc_wait_result(sc, mh);
+	done = ((const struct vmbus_chanmsg_connect_resp *)
+	    msg->msg_data)->chm_done;
+
+	vmbus_msghc_put(sc, mh);
+
+	return (done ? 0 : EOPNOTSUPP);
+}
+
+static int
+vmbus_init(struct vmbus_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < nitems(vmbus_version); ++i) {
+		int error;
+
+		error = vmbus_connect(sc, vmbus_version[i]);
+		if (!error) {
+			sc->vmbus_version = vmbus_version[i];
+			device_printf(sc->vmbus_dev, "version %u.%u\n",
+			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
+			    VMBUS_VERSION_MINOR(sc->vmbus_version));
+			return 0;
+		}
+	}
+	return ENXIO;
+}
+
+static void
+vmbus_disconnect(struct vmbus_softc *sc)
+{
+	struct vmbus_chanmsg_disconnect *req;
+	struct vmbus_msghc *mh;
+	int error;
+
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL) {
+		device_printf(sc->vmbus_dev,
+		    "can not get msg hypercall for disconnect\n");
+		return;
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
+
+	error = vmbus_msghc_exec_noresult(mh);
+	vmbus_msghc_put(sc, mh);
+
+	if (error) {
+		device_printf(sc->vmbus_dev,
+		    "disconnect msg hypercall failed\n");
+	}
+}
+
+static int
+vmbus_req_channels(struct vmbus_softc *sc)
+{
+	struct vmbus_chanmsg_chrequest *req;
+	struct vmbus_msghc *mh;
+	int error;
+
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL)
+		return ENXIO;
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
+
+	error = vmbus_msghc_exec_noresult(mh);
+	vmbus_msghc_put(sc, mh);
+
+	return error;
+}
+
+static void
+vmbus_scan_done_task(void *xsc, int pending __unused)
+{
+	struct vmbus_softc *sc = xsc;
+
+	mtx_lock(&Giant);
+	sc->vmbus_scandone = true;
+	mtx_unlock(&Giant);
+	wakeup(&sc->vmbus_scandone);
+}
+
+static void
+vmbus_scan_done(struct vmbus_softc *sc,
+    const struct vmbus_message *msg __unused)
+{
+
+	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
+}
+
+static int
+vmbus_scan(struct vmbus_softc *sc)
+{
+	int error;
+
+	/*
+	 * Identify, probe and attach for non-channel devices.
+	 */
+	bus_generic_probe(sc->vmbus_dev);
+	bus_generic_attach(sc->vmbus_dev);
+
+	/*
+	 * This taskqueue serializes vmbus devices' attach and detach
+	 * for channel offer and rescind messages.
+	 */
+	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
+	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
+	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
+	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
+
+	/*
+	 * This taskqueue handles sub-channel detach, so that vmbus
+	 * device's detach running in vmbus_devtq can drain its sub-
+	 * channels.
+	 */
+	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
+	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
+	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
+
+	/*
+	 * Start vmbus scanning.
+	 */
+	error = vmbus_req_channels(sc);
+	if (error) {
+		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
+		    error);
+		return (error);
+	}
+
+	/*
+	 * Wait for all vmbus devices from the initial channel offers to be
+	 * attached.
+	 */
+	GIANT_REQUIRED;
+	while (!sc->vmbus_scandone)
+		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
+
+	if (bootverbose) {
+		device_printf(sc->vmbus_dev, "device scan, probe and attach "
+		    "done\n");
+	}
+	return (0);
+}
+
+static void
+vmbus_scan_teardown(struct vmbus_softc *sc)
+{
+
+	GIANT_REQUIRED;
+	if (sc->vmbus_devtq != NULL) {
+		mtx_unlock(&Giant);
+		taskqueue_free(sc->vmbus_devtq);
+		mtx_lock(&Giant);
+		sc->vmbus_devtq = NULL;
+	}
+	if (sc->vmbus_subchtq != NULL) {
+		mtx_unlock(&Giant);
+		taskqueue_free(sc->vmbus_subchtq);
+		mtx_lock(&Giant);
+		sc->vmbus_subchtq = NULL;
+	}
+}
+
+static void
+vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
+{
+	vmbus_chanmsg_proc_t msg_proc;
+	uint32_t msg_type;
+
+	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
+	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
+		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
+		    msg_type);
+		return;
+	}
+
+	msg_proc = vmbus_chanmsg_handlers[msg_type];
+	if (msg_proc != NULL)
+		msg_proc(sc, msg);
+
+	/* Channel specific processing */
+	vmbus_chan_msgproc(sc, msg);
+}
+
+static void
+vmbus_msg_task(void *xsc, int pending __unused)
+{
+	struct vmbus_softc *sc = xsc;
+	volatile struct vmbus_message *msg;
+
+	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
+	for (;;) {
+		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
+			/* No message */
+			break;
+		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
+			/* Channel message */
+			vmbus_chanmsg_handle(sc,
+			    __DEVOLATILE(const struct vmbus_message *, msg));
+		}
+
+		msg->msg_type = HYPERV_MSGTYPE_NONE;
+		/*
+		 * Make sure the write to msg_type (i.e. set to
+		 * HYPERV_MSGTYPE_NONE) happens before we read the
+		 * msg_flags and EOMing. Otherwise, the EOMing will
+		 * not deliver any more messages since there is no
+		 * empty slot
+		 *
+		 * NOTE:
+		 * mb() is used here, since atomic_thread_fence_seq_cst()
+		 * will become compiler fence on UP kernel.
+		 */
+		mb();
+		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
+			/*
+			 * This will cause message queue rescan to possibly
+			 * deliver another msg from the hypervisor
+			 */
+			wrmsr(MSR_HV_EOM, 0);
+		}
+	}
+}
+
+static __inline int
+vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
+{
+	volatile struct vmbus_message *msg;
+	struct vmbus_message *msg_base;
+
+	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
+
+	/*
+	 * Check event timer.
+	 *
+	 * TODO: move this to independent IDT vector.
+	 */
+	msg = msg_base + VMBUS_SINT_TIMER;
+	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
+		msg->msg_type = HYPERV_MSGTYPE_NONE;
+
+		vmbus_et_intr(frame);
+
+		/*
+		 * Make sure the write to msg_type (i.e. set to
+		 * HYPERV_MSGTYPE_NONE) happens before we read the
+		 * msg_flags and EOMing. Otherwise, the EOMing will
+		 * not deliver any more messages since there is no
+		 * empty slot
+		 *
+		 * NOTE:
+		 * mb() is used here, since atomic_thread_fence_seq_cst()
+		 * will become compiler fence on UP kernel.
+		 */
+		mb();
+		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
+			/*
+			 * This will cause message queue rescan to possibly
+			 * deliver another msg from the hypervisor
+			 */
+			wrmsr(MSR_HV_EOM, 0);
+		}
+	}
+
+	/*
+	 * Check events.  Hot path for network and storage I/O data; high rate.
+	 *
+	 * NOTE:
+	 * As recommended by the Windows guest fellows, we check events before
+	 * checking messages.
+	 */
+	sc->vmbus_event_proc(sc, cpu);
+
+	/*
+	 * Check messages.  Mainly management stuffs; ultra low rate.
+	 */
+	msg = msg_base + VMBUS_SINT_MESSAGE;
+	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
+		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
+		    VMBUS_PCPU_PTR(sc, message_task, cpu));
+	}
+
+	return (FILTER_HANDLED);
+}
+
+void
+vmbus_handle_intr(struct trapframe *trap_frame)
+{
+	struct vmbus_softc *sc = vmbus_get_softc();
+	int cpu = curcpu;
+
+	/*
+	 * Disable preemption.
+	 */
+	critical_enter();
+
+	/*
+	 * Do a little interrupt counting.
+	 */
+	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
+
+	vmbus_handle_intr1(sc, trap_frame, cpu);
+
+	/*
+	 * Enable preemption.
+	 */
+	critical_exit();
+}
+
+static void
+vmbus_synic_setup(void *xsc)
+{
+	struct vmbus_softc *sc = xsc;
+	int cpu = curcpu;
+	uint64_t val, orig;
+	uint32_t sint;
+
+	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
+		/* Save virtual processor id. */
+		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
+	} else {
+		/* Set virtual processor id to 0 for compatibility. */
+		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
+	}
+
+	/*
+	 * Setup the SynIC message.
+	 */
+	orig = rdmsr(MSR_HV_SIMP);
+	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
+	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
+	     MSR_HV_SIMP_PGSHIFT);
+	wrmsr(MSR_HV_SIMP, val);
+
+	/*
+	 * Setup the SynIC event flags.
+	 */
+	orig = rdmsr(MSR_HV_SIEFP);
+	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
+	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
+	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
+	wrmsr(MSR_HV_SIEFP, val);
+
+
+	/*
+	 * Configure and unmask SINT for message and event flags.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
+	orig = rdmsr(sint);
+	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
+	    (orig & MSR_HV_SINT_RSVD_MASK);
+	wrmsr(sint, val);
+
+	/*
+	 * Configure and unmask SINT for timer.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
+	orig = rdmsr(sint);
+	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
+	    (orig & MSR_HV_SINT_RSVD_MASK);
+	wrmsr(sint, val);
+
+	/*
+	 * All done; enable SynIC.
+	 */
+	orig = rdmsr(MSR_HV_SCONTROL);
+	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
+	wrmsr(MSR_HV_SCONTROL, val);
+}
+
+static void
+vmbus_synic_teardown(void *arg)
+{
+	uint64_t orig;
+	uint32_t sint;
+
+	/*
+	 * Disable SynIC.
+	 */
+	orig = rdmsr(MSR_HV_SCONTROL);
+	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
+
+	/*
+	 * Mask message and event flags SINT.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
+	orig = rdmsr(sint);
+	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
+
+	/*
+	 * Mask timer SINT.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
+	orig = rdmsr(sint);
+	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
+
+	/*
+	 * Teardown SynIC message.
+	 */
+	orig = rdmsr(MSR_HV_SIMP);
+	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
+
+	/*
+	 * Teardown SynIC event flags.
+	 */
+	orig = rdmsr(MSR_HV_SIEFP);
+	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
+}
+
+static int
+vmbus_dma_alloc(struct vmbus_softc *sc)
+{
+	bus_dma_tag_t parent_dtag;
+	uint8_t *evtflags;
+	int cpu;
+
+	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
+	CPU_FOREACH(cpu) {
+		void *ptr;
+
+		/*
+		 * Per-cpu messages and event flags.
+		 */
+		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
+		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+		if (ptr == NULL)
+			return ENOMEM;
+		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
+
+		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
+		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+		if (ptr == NULL)
+			return ENOMEM;
+		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
+	}
+
+	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (evtflags == NULL)
+		return ENOMEM;
+	sc->vmbus_rx_evtflags = (u_long *)evtflags;
+	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
+	sc->vmbus_evtflags = evtflags;
+
+	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (sc->vmbus_mnf1 == NULL)
+		return ENOMEM;
+
+	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
+	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (sc->vmbus_mnf2 == NULL)
+		return ENOMEM;
+
+	return 0;
+}
+
+static void
+vmbus_dma_free(struct vmbus_softc *sc)
+{
+	int cpu;
+
+	if (sc->vmbus_evtflags != NULL) {
+		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
+		sc->vmbus_evtflags = NULL;
+		sc->vmbus_rx_evtflags = NULL;
+		sc->vmbus_tx_evtflags = NULL;
+	}
+	if (sc->vmbus_mnf1 != NULL) {
+		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
+		sc->vmbus_mnf1 = NULL;
+	}
+	if (sc->vmbus_mnf2 != NULL) {
+		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
+		sc->vmbus_mnf2 = NULL;
+	}
+
+	CPU_FOREACH(cpu) {
+		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
+			hyperv_dmamem_free(
+			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
+			    VMBUS_PCPU_GET(sc, message, cpu));
+			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
+		}
+		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
+			hyperv_dmamem_free(
+			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
+			    VMBUS_PCPU_GET(sc, event_flags, cpu));
+			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
+		}
+	}
+}
+
+/**
+ * @brief Find a free IDT slot and setup the interrupt handler.
+ */
+static int
+vmbus_vector_alloc(void)
+{
+	int vector;
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	/*
+	 * Search backwards form the highest IDT vector available for use
+	 * as vmbus channel callback vector. We install 'vmbus_isr'
+	 * handler at that vector and use it to interrupt vcpus.
+	 */
+	vector = APIC_SPURIOUS_INT;
+	while (--vector >= APIC_IPI_INTS) {
+		ip = &idt[vector];
+		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func == (uintptr_t)&IDTVEC(rsvd)) {
+#ifdef __i386__
+			setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT,
+			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+			setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT,
+			    SEL_KPL, 0);
+#endif
+
+			return (vector);
+		}
+	}
+	return (0);
+}
+
+/**
+ * @brief Restore the IDT slot to rsvd.
+ */
+static void
+vmbus_vector_free(int vector)
+{
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	if (vector == 0)
+		return;
+
+	KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
+	    ("invalid vector %d", vector));
+
+	ip = &idt[vector];
+	func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+	KASSERT(func == (uintptr_t)&IDTVEC(vmbus_isr),
+	    ("invalid vector %d", vector));
+
+	setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+static void
+vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
+{
+	cpuset_t *mask = xmask;
+	int error;
+
+	error = cpuset_setthread(curthread->td_tid, mask);
+	if (error) {
+		panic("curthread=%ju: can't pin; error=%d",
+		    (uintmax_t)curthread->td_tid, error);
+	}
+}
+
+static int
+vmbus_intr_setup(struct vmbus_softc *sc)
+{
+	int cpu;
+
+	CPU_FOREACH(cpu) {
+		struct task cpuset_task;
+		char buf[MAXCOMLEN + 1];
+		cpuset_t cpu_mask;
+
+		/* Allocate an interrupt counter for Hyper-V interrupt */
+		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
+		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
+
+		/*
+		 * Setup taskqueue to handle events.  Task will be per-
+		 * channel.
+		 */
+		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
+		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
+		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
+		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu),
+		    1, PI_NET, "hvevent%d", cpu);
+
+		if (vmbus_pin_evttask) {
+			CPU_SETOF(cpu, &cpu_mask);
+			TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
+			    &cpu_mask);
+			taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu),
+			    &cpuset_task);
+			taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu),
+			    &cpuset_task);
+		}
+
+		/*
+		 * Setup tasks and taskqueues to handle messages.
+		 */
+		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
+		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
+		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
+		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1,
+		    PI_NET, "hvmsg%d", cpu);
+		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
+		    vmbus_msg_task, sc);
+
+		CPU_SETOF(cpu, &cpu_mask);
+		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
+		    &cpu_mask);
+		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
+		    &cpuset_task);
+		taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
+		    &cpuset_task);
+	}
+
+	/*
+	 * All Hyper-V ISR required resources are setup, now let's find a
+	 * free IDT vector for Hyper-V ISR and set it up.
+	 */
+	sc->vmbus_idtvec = vmbus_vector_alloc();
+	if (sc->vmbus_idtvec == 0) {
+		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
+		return ENXIO;
+	}
+	if (bootverbose) {
+		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
+		    sc->vmbus_idtvec);
+	}
+	return 0;
+}
+
+static void
+vmbus_intr_teardown(struct vmbus_softc *sc)
+{
+	int cpu;
+
+	vmbus_vector_free(sc->vmbus_idtvec);
+
+	CPU_FOREACH(cpu) {
+		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
+			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
+			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
+		}
+		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
+			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
+			    VMBUS_PCPU_PTR(sc, message_task, cpu));
+			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
+			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
+		}
+	}
+}
+
+static int
+vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
+{
+	return (ENOENT);
+}
+
+static int
+vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
+{
+	const struct vmbus_channel *chan;
+	char guidbuf[HYPERV_GUID_STRLEN];
+
+	chan = vmbus_get_channel(child);
+	if (chan == NULL) {
+		/* Event timer device, which does not belong to a channel */
+		return (0);
+	}
+
+	strlcat(buf, "classid=", buflen);
+	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
+	strlcat(buf, guidbuf, buflen);
+
+	strlcat(buf, " deviceid=", buflen);
+	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
+	strlcat(buf, guidbuf, buflen);
+
+	return (0);
+}
+
+int
+vmbus_add_child(struct vmbus_channel *chan)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	device_t parent = sc->vmbus_dev;
+
+	mtx_lock(&Giant);
+
+	chan->ch_dev = device_add_child(parent, NULL, -1);
+	if (chan->ch_dev == NULL) {
+		mtx_unlock(&Giant);
+		device_printf(parent, "device_add_child for chan%u failed\n",
+		    chan->ch_id);
+		return (ENXIO);
+	}
+	device_set_ivars(chan->ch_dev, chan);
+	device_probe_and_attach(chan->ch_dev);
+
+	mtx_unlock(&Giant);
+	return (0);
+}
+
+int
+vmbus_delete_child(struct vmbus_channel *chan)
+{
+	int error = 0;
+
+	mtx_lock(&Giant);
+	if (chan->ch_dev != NULL) {
+		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
+		    chan->ch_dev);
+		chan->ch_dev = NULL;
+	}
+	mtx_unlock(&Giant);
+	return (error);
+}
+
+static int
+vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
+{
+	struct vmbus_softc *sc = arg1;
+	char verstr[16];
+
+	snprintf(verstr, sizeof(verstr), "%u.%u",
+	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
+	    VMBUS_VERSION_MINOR(sc->vmbus_version));
+	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
+}
+
+/*
+ * We need the function to make sure the MMIO resource is allocated from the
+ * ranges found in _CRS.
+ *
+ * For the release function, we can use bus_generic_release_resource().
+ */
+static struct resource *
+vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
+    rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+	device_t parent = device_get_parent(dev);
+	struct resource *res;
+
+#ifdef NEW_PCIB
+	if (type == SYS_RES_MEMORY) {
+		struct vmbus_softc *sc = device_get_softc(dev);
+
+		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
+		    rid, start, end, count, flags);
+	} else
+#endif
+	{
+		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
+		    end, count, flags);
+	}
+
+	return (res);
+}
+
+static int
+vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
+{
+
+	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
+	    irqs));
+}
+
+static int
+vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
+{
+
+	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
+}
+
+static int
+vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
+{
+
+	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
+}
+
+static int
+vmbus_release_msix(device_t bus, device_t dev, int irq)
+{
+
+	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
+}
+
+static int
+vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
+	uint32_t *data)
+{
+
+	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
+}
+
+static uint32_t
+vmbus_get_version_method(device_t bus, device_t dev)
+{
+	struct vmbus_softc *sc = device_get_softc(bus);
+
+	return sc->vmbus_version;
+}
+
+static int
+vmbus_probe_guid_method(device_t bus, device_t dev,
+    const struct hyperv_guid *guid)
+{
+	const struct vmbus_channel *chan = vmbus_get_channel(dev);
+
+	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
+		return 0;
+	return ENXIO;
+}
+
+static uint32_t
+vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
+{
+	const struct vmbus_softc *sc = device_get_softc(bus);
+
+	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
+}
+
+static struct taskqueue *
+vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
+{
+	const struct vmbus_softc *sc = device_get_softc(bus);
+
+	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
+	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
+}
+
+#ifdef NEW_PCIB
+#define VTPM_BASE_ADDR 0xfed40000
+#define FOUR_GB (1ULL << 32)
+
+enum parse_pass { parse_64, parse_32 };
+
+struct parse_context {
+	device_t vmbus_dev;
+	enum parse_pass pass;
+};
+
+static ACPI_STATUS
+parse_crs(ACPI_RESOURCE *res, void *ctx)
+{
+	const struct parse_context *pc = ctx;
+	device_t vmbus_dev = pc->vmbus_dev;
+
+	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
+	UINT64 start, end;
+
+	switch (res->Type) {
+	case ACPI_RESOURCE_TYPE_ADDRESS32:
+		start = res->Data.Address32.Address.Minimum;
+		end = res->Data.Address32.Address.Maximum;
+		break;
+
+	case ACPI_RESOURCE_TYPE_ADDRESS64:
+		start = res->Data.Address64.Address.Minimum;
+		end = res->Data.Address64.Address.Maximum;
+		break;
+
+	default:
+		/* Unused types. */
+		return (AE_OK);
+	}
+
+	/*
+	 * We don't use <1MB addresses.
+	 */
+	if (end < 0x100000)
+		return (AE_OK);
+
+	/* Don't conflict with vTPM. */
+	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
+		end = VTPM_BASE_ADDR - 1;
+
+	if ((pc->pass == parse_32 && start < FOUR_GB) ||
+	    (pc->pass == parse_64 && start >= FOUR_GB))
+		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
+		    start, end, 0);
+
+	return (AE_OK);
+}
+
+static void
+vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
+{
+	struct parse_context pc;
+	ACPI_STATUS status;
+
+	if (bootverbose)
+		device_printf(dev, "walking _CRS, pass=%d\n", pass);
+
+	pc.vmbus_dev = vmbus_dev;
+	pc.pass = pass;
+	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
+			parse_crs, &pc);
+
+	if (bootverbose && ACPI_FAILURE(status))
+		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
+}
+
+static void
+vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
+{
+	device_t acpi0, parent;
+
+	parent = device_get_parent(dev);
+
+	acpi0 = device_get_parent(parent);
+	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
+		device_t *children;
+		int count;
+
+		/*
+		 * Try to locate VMBUS resources and find _CRS on them.
+		 */
+		if (device_get_children(acpi0, &children, &count) == 0) {
+			int i;
+
+			for (i = 0; i < count; ++i) {
+				if (!device_is_attached(children[i]))
+					continue;
+
+				if (strcmp("vmbus_res",
+				    device_get_name(children[i])) == 0)
+					vmbus_get_crs(children[i], dev, pass);
+			}
+			free(children, M_TEMP);
+		}
+
+		/*
+		 * Try to find _CRS on acpi.
+		 */
+		vmbus_get_crs(acpi0, dev, pass);
+	} else {
+		device_printf(dev, "not grandchild of acpi\n");
+	}
+
+	/*
+	 * Try to find _CRS on parent.
+	 */
+	vmbus_get_crs(parent, dev, pass);
+}
+
+static void
+vmbus_get_mmio_res(device_t dev)
+{
+	struct vmbus_softc *sc = device_get_softc(dev);
+	/*
+	 * We walk the resources twice to make sure that: in the resource
+	 * list, the 32-bit resources appear behind the 64-bit resources.
+	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
+	 * iterate through the list to find a range for a 64-bit BAR in
+	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
+	 * ranges first.
+	 */
+	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
+
+	vmbus_get_mmio_res_pass(dev, parse_64);
+	vmbus_get_mmio_res_pass(dev, parse_32);
+}
+
+static void
+vmbus_free_mmio_res(device_t dev)
+{
+	struct vmbus_softc *sc = device_get_softc(dev);
+
+	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
+}
+#endif	/* NEW_PCIB */
+
+static void
+vmbus_identify(driver_t *driver, device_t parent)
+{
+
+	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
+	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
+		return;
+	device_add_child(parent, "vmbus", -1);
+}
+
+static int
+vmbus_probe(device_t dev)
+{
+
+	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
+	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Hyper-V Vmbus");
+	return (BUS_PROBE_DEFAULT);
+}
+
+/**
+ * @brief Main vmbus driver initialization routine.
+ *
+ * Here, we
+ * - initialize the vmbus driver context
+ * - setup various driver entry points
+ * - invoke the vmbus hv main init routine
+ * - get the irq resource
+ * - invoke the vmbus to add the vmbus root device
+ * - setup the vmbus root device
+ * - retrieve the channel offers
+ */
+static int
+vmbus_doattach(struct vmbus_softc *sc)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	int ret;
+
+	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
+		return (0);
+
+#ifdef NEW_PCIB
+	vmbus_get_mmio_res(sc->vmbus_dev);
+#endif
+
+	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
+
+	sc->vmbus_gpadl = VMBUS_GPADL_START;
+	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
+	TAILQ_INIT(&sc->vmbus_prichans);
+	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
+	TAILQ_INIT(&sc->vmbus_chans);
+	sc->vmbus_chmap = malloc(
+	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+
+	/*
+	 * Create context for "post message" Hypercalls
+	 */
+	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
+	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
+	    sizeof(struct vmbus_msghc));
+	if (sc->vmbus_xc == NULL) {
+		ret = ENXIO;
+		goto cleanup;
+	}
+
+	/*
+	 * Allocate DMA stuffs.
+	 */
+	ret = vmbus_dma_alloc(sc);
+	if (ret != 0)
+		goto cleanup;
+
+	/*
+	 * Setup interrupt.
+	 */
+	ret = vmbus_intr_setup(sc);
+	if (ret != 0)
+		goto cleanup;
+
+	/*
+	 * Setup SynIC.
+	 */
+	if (bootverbose)
+		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
+	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
+	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
+
+	/*
+	 * Initialize vmbus, e.g. connect to Hypervisor.
+	 */
+	ret = vmbus_init(sc);
+	if (ret != 0)
+		goto cleanup;
+
+	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
+	    sc->vmbus_version == VMBUS_VERSION_WIN7)
+		sc->vmbus_event_proc = vmbus_event_proc_compat;
+	else
+		sc->vmbus_event_proc = vmbus_event_proc;
+
+	ret = vmbus_scan(sc);
+	if (ret != 0)
+		goto cleanup;
+
+	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+	    vmbus_sysctl_version, "A", "vmbus version");
+
+	return (ret);
+
+cleanup:
+	vmbus_scan_teardown(sc);
+	vmbus_intr_teardown(sc);
+	vmbus_dma_free(sc);
+	if (sc->vmbus_xc != NULL) {
+		vmbus_xact_ctx_destroy(sc->vmbus_xc);
+		sc->vmbus_xc = NULL;
+	}
+	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
+	mtx_destroy(&sc->vmbus_prichan_lock);
+	mtx_destroy(&sc->vmbus_chan_lock);
+
+	return (ret);
+}
+
+static void
+vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
+{
+}
+
+static int
+vmbus_attach(device_t dev)
+{
+	vmbus_sc = device_get_softc(dev);
+	vmbus_sc->vmbus_dev = dev;
+
+	/*
+	 * Event processing logic will be configured:
+	 * - After the vmbus protocol version negotiation.
+	 * - Before we request channel offers.
+	 */
+	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
+
+	/* 
+	 * If the system has already booted and thread
+	 * scheduling is possible indicated by the global
+	 * cold set to zero, we just call the driver
+	 * initialization directly.
+	 */
+	if (!cold)
+		vmbus_doattach(vmbus_sc);
+
+	return (0);
+}
+
+static int
+vmbus_detach(device_t dev)
+{
+	struct vmbus_softc *sc = device_get_softc(dev);
+
+	bus_generic_detach(dev);
+	vmbus_chan_destroy_all(sc);
+
+	vmbus_scan_teardown(sc);
+
+	vmbus_disconnect(sc);
+
+	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
+		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
+		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
+	}
+
+	vmbus_intr_teardown(sc);
+	vmbus_dma_free(sc);
+
+	if (sc->vmbus_xc != NULL) {
+		vmbus_xact_ctx_destroy(sc->vmbus_xc);
+		sc->vmbus_xc = NULL;
+	}
+
+	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
+	mtx_destroy(&sc->vmbus_prichan_lock);
+	mtx_destroy(&sc->vmbus_chan_lock);
+
+#ifdef NEW_PCIB
+	vmbus_free_mmio_res(dev);
+#endif
+
+	return (0);
+}
+
+static void
+vmbus_sysinit(void *arg __unused)
+{
+	struct vmbus_softc *sc = vmbus_get_softc();
+
+	if (vm_guest != VM_GUEST_HV || sc == NULL)
+		return;
+
+	/* 
+	 * If the system has already booted and thread
+	 * scheduling is possible, as indicated by the
+	 * global cold set to zero, we just call the driver
+	 * initialization directly.
+	 */
+	if (!cold) 
+		vmbus_doattach(sc);
+}
+/*
+ * NOTE:
+ * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
+ * initialized.
+ */
+SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_br.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_br.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_br.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,408 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+#include <dev/hyperv/vmbus/vmbus_brvar.h>
+
+/* Amount of space available for write */
+#define	VMBUS_BR_WAVAIL(r, w, z)	\
+	(((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
+
+/* Increase bufing index */
+#define VMBUS_BR_IDXINC(idx, inc, sz)	(((idx) + (inc)) % (sz))
+
+static int			vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS);
+static int			vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS);
+static void			vmbus_br_setup(struct vmbus_br *, void *, int);
+
+static int
+vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
+{
+	const struct vmbus_br *br = arg1;
+	uint32_t rindex, windex, imask, ravail, wavail;
+	char state[256];
+
+	rindex = br->vbr_rindex;
+	windex = br->vbr_windex;
+	imask = br->vbr_imask;
+	wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
+	ravail = br->vbr_dsize - wavail;
+
+	snprintf(state, sizeof(state),
+	    "rindex:%u windex:%u imask:%u ravail:%u wavail:%u",
+	    rindex, windex, imask, ravail, wavail);
+	return sysctl_handle_string(oidp, state, sizeof(state), req);
+}
+
+/*
+ * Binary bufring states.
+ */
+static int
+vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
+{
+#define BR_STATE_RIDX	0
+#define BR_STATE_WIDX	1
+#define BR_STATE_IMSK	2
+#define BR_STATE_RSPC	3
+#define BR_STATE_WSPC	4
+#define BR_STATE_MAX	5
+
+	const struct vmbus_br *br = arg1;
+	uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
+
+	rindex = br->vbr_rindex;
+	windex = br->vbr_windex;
+	wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
+
+	state[BR_STATE_RIDX] = rindex;
+	state[BR_STATE_WIDX] = windex;
+	state[BR_STATE_IMSK] = br->vbr_imask;
+	state[BR_STATE_WSPC] = wavail;
+	state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
+
+	return sysctl_handle_opaque(oidp, state, sizeof(state), req);
+}
+
+void
+vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree,
+    struct vmbus_br *br, const char *name)
+{
+	struct sysctl_oid *tree;
+	char desc[64];
+
+	tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO,
+	    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (tree == NULL)
+		return;
+
+	snprintf(desc, sizeof(desc), "%s state", name);
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	    br, 0, vmbus_br_sysctl_state, "A", desc);
+
+	snprintf(desc, sizeof(desc), "%s binary state", name);
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin",
+	    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	    br, 0, vmbus_br_sysctl_state_bin, "IU", desc);
+}
+
+void
+vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr)
+{
+	rbr->rxbr_imask = 1;
+	mb();
+}
+
+static __inline uint32_t
+vmbus_rxbr_avail(const struct vmbus_rxbr *rbr)
+{
+	uint32_t rindex, windex;
+
+	/* Get snapshot */
+	rindex = rbr->rxbr_rindex;
+	windex = rbr->rxbr_windex;
+
+	return (rbr->rxbr_dsize -
+	    VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize));
+}
+
+uint32_t
+vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
+{
+	rbr->rxbr_imask = 0;
+	mb();
+
+	/*
+	 * Now check to see if the ring buffer is still empty.
+	 * If it is not, we raced and we need to process new
+	 * incoming channel packets.
+	 */
+	return vmbus_rxbr_avail(rbr);
+}
+
+static void
+vmbus_br_setup(struct vmbus_br *br, void *buf, int blen)
+{
+	br->vbr = buf;
+	br->vbr_dsize = blen - sizeof(struct vmbus_bufring);
+}
+
+void
+vmbus_rxbr_init(struct vmbus_rxbr *rbr)
+{
+	mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN);
+}
+
+void
+vmbus_rxbr_deinit(struct vmbus_rxbr *rbr)
+{
+	mtx_destroy(&rbr->rxbr_lock);
+}
+
+void
+vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen)
+{
+	vmbus_br_setup(&rbr->rxbr, buf, blen);
+}
+
+void
+vmbus_txbr_init(struct vmbus_txbr *tbr)
+{
+	mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN);
+}
+
+void
+vmbus_txbr_deinit(struct vmbus_txbr *tbr)
+{
+	mtx_destroy(&tbr->txbr_lock);
+}
+
+void
+vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
+{
+	vmbus_br_setup(&tbr->txbr, buf, blen);
+}
+
+/*
+ * When we write to the ring buffer, check if the host needs to be
+ * signaled.
+ *
+ * The contract:
+ * - The host guarantees that while it is draining the TX bufring,
+ *   it will set the br_imask to indicate it does not need to be
+ *   interrupted when new data are added.
+ * - The host guarantees that it will completely drain the TX bufring
+ *   before exiting the read loop.  Further, once the TX bufring is
+ *   empty, it will clear the br_imask and re-check to see if new
+ *   data have arrived.
+ */
+static __inline boolean_t
+vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex)
+{
+	mb();
+	if (tbr->txbr_imask)
+		return (FALSE);
+
+	__compiler_membar();
+
+	/*
+	 * This is the only case we need to signal when the
+	 * ring transitions from being empty to non-empty.
+	 */
+	if (old_windex == tbr->txbr_rindex)
+		return (TRUE);
+
+	return (FALSE);
+}
+
+static __inline uint32_t
+vmbus_txbr_avail(const struct vmbus_txbr *tbr)
+{
+	uint32_t rindex, windex;
+
+	/* Get snapshot */
+	rindex = tbr->txbr_rindex;
+	windex = tbr->txbr_windex;
+
+	return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize);
+}
+
+static __inline uint32_t
+vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex,
+    const void *src0, uint32_t cplen)
+{
+	const uint8_t *src = src0;
+	uint8_t *br_data = tbr->txbr_data;
+	uint32_t br_dsize = tbr->txbr_dsize;
+
+	if (cplen > br_dsize - windex) {
+		uint32_t fraglen = br_dsize - windex;
+
+		/* Wrap-around detected */
+		memcpy(br_data + windex, src, fraglen);
+		memcpy(br_data, src + fraglen, cplen - fraglen);
+	} else {
+		memcpy(br_data + windex, src, cplen);
+	}
+	return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
+}
+
+/*
+ * Write scattered channel packet to TX bufring.
+ *
+ * The offset of this channel packet is written as a 64bits value
+ * immediately after this channel packet.
+ */
+int
+vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen,
+    boolean_t *need_sig)
+{
+	uint32_t old_windex, windex, total;
+	uint64_t save_windex;
+	int i;
+
+	total = 0;
+	for (i = 0; i < iovlen; i++)
+		total += iov[i].iov_len;
+	total += sizeof(save_windex);
+
+	mtx_lock_spin(&tbr->txbr_lock);
+
+	/*
+	 * NOTE:
+	 * If this write is going to make br_windex same as br_rindex,
+	 * i.e. the available space for write is same as the write size,
+	 * we can't do it then, since br_windex == br_rindex means that
+	 * the bufring is empty.
+	 */
+	if (vmbus_txbr_avail(tbr) <= total) {
+		mtx_unlock_spin(&tbr->txbr_lock);
+		return (EAGAIN);
+	}
+
+	/* Save br_windex for later use */
+	old_windex = tbr->txbr_windex;
+
+	/*
+	 * Copy the scattered channel packet to the TX bufring.
+	 */
+	windex = old_windex;
+	for (i = 0; i < iovlen; i++) {
+		windex = vmbus_txbr_copyto(tbr, windex,
+		    iov[i].iov_base, iov[i].iov_len);
+	}
+
+	/*
+	 * Set the offset of the current channel packet.
+	 */
+	save_windex = ((uint64_t)old_windex) << 32;
+	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+	    sizeof(save_windex));
+
+	/*
+	 * Update the write index _after_ the channel packet
+	 * is copied.
+	 */
+	__compiler_membar();
+	tbr->txbr_windex = windex;
+
+	mtx_unlock_spin(&tbr->txbr_lock);
+
+	*need_sig = vmbus_txbr_need_signal(tbr, old_windex);
+
+	return (0);
+}
+
+static __inline uint32_t
+vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex,
+    void *dst0, int cplen)
+{
+	uint8_t *dst = dst0;
+	const uint8_t *br_data = rbr->rxbr_data;
+	uint32_t br_dsize = rbr->rxbr_dsize;
+
+	if (cplen > br_dsize - rindex) {
+		uint32_t fraglen = br_dsize - rindex;
+
+		/* Wrap-around detected. */
+		memcpy(dst, br_data + rindex, fraglen);
+		memcpy(dst + fraglen, br_data, cplen - fraglen);
+	} else {
+		memcpy(dst, br_data + rindex, cplen);
+	}
+	return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
+}
+
+int
+vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
+{
+	mtx_lock_spin(&rbr->rxbr_lock);
+
+	/*
+	 * The requested data and the 64bits channel packet
+	 * offset should be there at least.
+	 */
+	if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) {
+		mtx_unlock_spin(&rbr->rxbr_lock);
+		return (EAGAIN);
+	}
+	vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
+
+	mtx_unlock_spin(&rbr->rxbr_lock);
+
+	return (0);
+}
+
+/*
+ * NOTE:
+ * We assume (dlen + skip) == sizeof(channel packet).
+ */
+int
+vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip)
+{
+	uint32_t rindex, br_dsize = rbr->rxbr_dsize;
+
+	KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip));
+
+	mtx_lock_spin(&rbr->rxbr_lock);
+
+	if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) {
+		mtx_unlock_spin(&rbr->rxbr_lock);
+		return (EAGAIN);
+	}
+
+	/*
+	 * Copy channel packet from RX bufring.
+	 */
+	rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize);
+	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
+
+	/*
+	 * Discard this channel packet's 64bits offset, which is useless to us.
+	 */
+	rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize);
+
+	/*
+	 * Update the read index _after_ the channel packet is fetched.
+	 */
+	__compiler_membar();
+	rbr->rxbr_rindex = rindex;
+
+	mtx_unlock_spin(&rbr->rxbr_lock);
+
+	return (0);
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_br.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_brvar.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_brvar.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_brvar.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,131 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_brvar.h 307249 2016-10-14 02:42:08Z sephe $
+ */
+
+#ifndef _VMBUS_BRVAR_H_
+#define _VMBUS_BRVAR_H_
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/_iovec.h>
+
+struct vmbus_br {
+	struct vmbus_bufring	*vbr;
+	uint32_t		vbr_dsize;	/* total data size */
+};
+
+#define vbr_windex		vbr->br_windex
+#define vbr_rindex		vbr->br_rindex
+#define vbr_imask		vbr->br_imask
+#define vbr_data		vbr->br_data
+
+struct vmbus_rxbr {
+	struct mtx		rxbr_lock;
+	struct vmbus_br		rxbr;
+};
+
+#define rxbr_windex		rxbr.vbr_windex
+#define rxbr_rindex		rxbr.vbr_rindex
+#define rxbr_imask		rxbr.vbr_imask
+#define rxbr_data		rxbr.vbr_data
+#define rxbr_dsize		rxbr.vbr_dsize
+
+struct vmbus_txbr {
+	struct mtx		txbr_lock;
+	struct vmbus_br		txbr;
+};
+
+#define txbr_windex		txbr.vbr_windex
+#define txbr_rindex		txbr.vbr_rindex
+#define txbr_imask		txbr.vbr_imask
+#define txbr_data		txbr.vbr_data
+#define txbr_dsize		txbr.vbr_dsize
+
+struct sysctl_ctx_list;
+struct sysctl_oid;
+
+static __inline int
+vmbus_txbr_maxpktsz(const struct vmbus_txbr *tbr)
+{
+
+	/*
+	 * - 64 bits for the trailing start index (- sizeof(uint64_t)).
+	 * - The rindex and windex can't be same (- 1).  See
+	 *   the comment near vmbus_bufring.br_{r,w}index.
+	 */
+	return (tbr->txbr_dsize - sizeof(uint64_t) - 1);
+}
+
+static __inline bool
+vmbus_txbr_empty(const struct vmbus_txbr *tbr)
+{
+
+	return (tbr->txbr_windex == tbr->txbr_rindex ? true : false);
+}
+
+static __inline bool
+vmbus_rxbr_empty(const struct vmbus_rxbr *rbr)
+{
+
+	return (rbr->rxbr_windex == rbr->rxbr_rindex ? true : false);
+}
+
+static __inline int
+vmbus_br_nelem(int br_size, int elem_size)
+{
+
+	/* Strip bufring header */
+	br_size -= sizeof(struct vmbus_bufring);
+	/* Add per-element trailing index */
+	elem_size += sizeof(uint64_t);
+	return (br_size / elem_size);
+}
+
+void		vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx,
+		    struct sysctl_oid *br_tree, struct vmbus_br *br,
+		    const char *name);
+
+void		vmbus_rxbr_init(struct vmbus_rxbr *rbr);
+void		vmbus_rxbr_deinit(struct vmbus_rxbr *rbr);
+void		vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen);
+int		vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen);
+int		vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen,
+		    uint32_t skip);
+void		vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr);
+uint32_t	vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr);
+
+void		vmbus_txbr_init(struct vmbus_txbr *tbr);
+void		vmbus_txbr_deinit(struct vmbus_txbr *tbr);
+void		vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen);
+int		vmbus_txbr_write(struct vmbus_txbr *tbr,
+		    const struct iovec iov[], int iovlen, boolean_t *need_sig);
+
+#endif  /* _VMBUS_BRVAR_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_brvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_chan.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_chan.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,2207 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_chan.c 311257 2017-01-04 05:41:47Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/atomic.h>
+#include <machine/stdarg.h>
+
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+#include <dev/hyperv/vmbus/hyperv_var.h>
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+#include <dev/hyperv/vmbus/vmbus_var.h>
+#include <dev/hyperv/vmbus/vmbus_brvar.h>
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
+
+struct vmbus_chan_pollarg {
+	struct vmbus_channel	*poll_chan;
+	u_int			poll_hz;
+};
+
+static void			vmbus_chan_update_evtflagcnt(
+				    struct vmbus_softc *,
+				    const struct vmbus_channel *);
+static int			vmbus_chan_close_internal(
+				    struct vmbus_channel *);
+static int			vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS);
+static void			vmbus_chan_sysctl_create(
+				    struct vmbus_channel *);
+static struct vmbus_channel	*vmbus_chan_alloc(struct vmbus_softc *);
+static void			vmbus_chan_free(struct vmbus_channel *);
+static int			vmbus_chan_add(struct vmbus_channel *);
+static void			vmbus_chan_cpu_default(struct vmbus_channel *);
+static int			vmbus_chan_release(struct vmbus_channel *);
+static void			vmbus_chan_set_chmap(struct vmbus_channel *);
+static void			vmbus_chan_clear_chmap(struct vmbus_channel *);
+static void			vmbus_chan_detach(struct vmbus_channel *);
+static bool			vmbus_chan_wait_revoke(
+				    const struct vmbus_channel *, bool);
+static void			vmbus_chan_poll_timeout(void *);
+static bool			vmbus_chan_poll_cancel_intq(
+				    struct vmbus_channel *);
+static void			vmbus_chan_poll_cancel(struct vmbus_channel *);
+
+static void			vmbus_chan_ins_prilist(struct vmbus_softc *,
+				    struct vmbus_channel *);
+static void			vmbus_chan_rem_prilist(struct vmbus_softc *,
+				    struct vmbus_channel *);
+static void			vmbus_chan_ins_list(struct vmbus_softc *,
+				    struct vmbus_channel *);
+static void			vmbus_chan_rem_list(struct vmbus_softc *,
+				    struct vmbus_channel *);
+static void			vmbus_chan_ins_sublist(struct vmbus_channel *,
+				    struct vmbus_channel *);
+static void			vmbus_chan_rem_sublist(struct vmbus_channel *,
+				    struct vmbus_channel *);
+
+static void			vmbus_chan_task(void *, int);
+static void			vmbus_chan_task_nobatch(void *, int);
+static void			vmbus_chan_poll_task(void *, int);
+static void			vmbus_chan_clrchmap_task(void *, int);
+static void			vmbus_chan_pollcfg_task(void *, int);
+static void			vmbus_chan_polldis_task(void *, int);
+static void			vmbus_chan_poll_cancel_task(void *, int);
+static void			vmbus_prichan_attach_task(void *, int);
+static void			vmbus_subchan_attach_task(void *, int);
+static void			vmbus_prichan_detach_task(void *, int);
+static void			vmbus_subchan_detach_task(void *, int);
+
+static void			vmbus_chan_msgproc_choffer(struct vmbus_softc *,
+				    const struct vmbus_message *);
+static void			vmbus_chan_msgproc_chrescind(
+				    struct vmbus_softc *,
+				    const struct vmbus_message *);
+
+static int			vmbus_chan_printf(const struct vmbus_channel *,
+				    const char *, ...) __printflike(2, 3);
+
+/*
+ * Vmbus channel message processing.
+ */
+static const vmbus_chanmsg_proc_t
+vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
+	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
+	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
+
+	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
+	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
+	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
+};
+
+/*
+ * Notify host that there are data pending on our TX bufring.
+ */
+static __inline void
+vmbus_chan_signal_tx(const struct vmbus_channel *chan)
+{
+	atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
+	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
+		atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask);
+	else
+		hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
+}
+
+static void
+vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
+{
+
+	mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED);
+	if (atomic_testandset_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONPRIL_SHIFT))
+		panic("channel is already on the prilist");
+	TAILQ_INSERT_TAIL(&sc->vmbus_prichans, chan, ch_prilink);
+}
+
+static void
+vmbus_chan_rem_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
+{
+
+	mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED);
+	if (atomic_testandclear_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONPRIL_SHIFT) == 0)
+		panic("channel is not on the prilist");
+	TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
+}
+
+static void
+vmbus_chan_ins_sublist(struct vmbus_channel *prichan,
+    struct vmbus_channel *chan)
+{
+
+	mtx_assert(&prichan->ch_subchan_lock, MA_OWNED);
+
+	if (atomic_testandset_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONSUBL_SHIFT))
+		panic("channel is already on the sublist");
+	TAILQ_INSERT_TAIL(&prichan->ch_subchans, chan, ch_sublink);
+
+	/* Bump sub-channel count. */
+	prichan->ch_subchan_cnt++;
+}
+
+static void
+vmbus_chan_rem_sublist(struct vmbus_channel *prichan,
+    struct vmbus_channel *chan)
+{
+
+	mtx_assert(&prichan->ch_subchan_lock, MA_OWNED);
+
+	KASSERT(prichan->ch_subchan_cnt > 0,
+	    ("invalid subchan_cnt %d", prichan->ch_subchan_cnt));
+	prichan->ch_subchan_cnt--;
+
+	if (atomic_testandclear_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONSUBL_SHIFT) == 0)
+		panic("channel is not on the sublist");
+	TAILQ_REMOVE(&prichan->ch_subchans, chan, ch_sublink);
+}
+
+static void
+vmbus_chan_ins_list(struct vmbus_softc *sc, struct vmbus_channel *chan)
+{
+
+	mtx_assert(&sc->vmbus_chan_lock, MA_OWNED);
+	if (atomic_testandset_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONLIST_SHIFT))
+		panic("channel is already on the list");
+	TAILQ_INSERT_TAIL(&sc->vmbus_chans, chan, ch_link);
+}
+
+static void
+vmbus_chan_rem_list(struct vmbus_softc *sc, struct vmbus_channel *chan)
+{
+
+	mtx_assert(&sc->vmbus_chan_lock, MA_OWNED);
+	if (atomic_testandclear_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_ONLIST_SHIFT) == 0)
+		panic("channel is not on the list");
+	TAILQ_REMOVE(&sc->vmbus_chans, chan, ch_link);
+}
+
+static int
+vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
+{
+	struct vmbus_channel *chan = arg1;
+	int mnf = 0;
+
+	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
+		mnf = 1;
+	return sysctl_handle_int(oidp, &mnf, 0, req);
+}
+
+static void
+vmbus_chan_sysctl_create(struct vmbus_channel *chan)
+{
+	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
+	struct sysctl_ctx_list *ctx;
+	uint32_t ch_id;
+	char name[16];
+
+	/*
+	 * Add sysctl nodes related to this channel to this
+	 * channel's sysctl ctx, so that they can be destroyed
+	 * independently upon close of this channel, which can
+	 * happen even if the device is not detached.
+	 */
+	ctx = &chan->ch_sysctl_ctx;
+	sysctl_ctx_init(ctx);
+
+	/*
+	 * Create dev.NAME.UNIT.channel tree.
+	 */
+	ch_tree = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
+	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (ch_tree == NULL)
+		return;
+
+	/*
+	 * Create dev.NAME.UNIT.channel.CHANID tree.
+	 */
+	if (VMBUS_CHAN_ISPRIMARY(chan))
+		ch_id = chan->ch_id;
+	else
+		ch_id = chan->ch_prichan->ch_id;
+	snprintf(name, sizeof(name), "%d", ch_id);
+	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
+	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (chid_tree == NULL)
+		return;
+
+	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
+		/*
+		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
+		 */
+		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
+		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+		if (ch_tree == NULL)
+			return;
+
+		/*
+		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
+		 *
+		 * NOTE:
+		 * chid_tree is changed to this new sysctl tree.
+		 */
+		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
+		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
+		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+		if (chid_tree == NULL)
+			return;
+
+		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
+	}
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	    chan, 0, vmbus_chan_sysctl_mnf, "I",
+	    "has monitor notification facilities");
+
+	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+	    "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (br_tree != NULL) {
+		/*
+		 * Create sysctl tree for RX bufring.
+		 */
+		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx");
+		/*
+		 * Create sysctl tree for TX bufring.
+		 */
+		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx");
+	}
+}
+
+int
+vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size,
+    const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
+{
+	struct vmbus_chan_br cbr;
+	int error;
+
+	/*
+	 * Allocate the TX+RX bufrings.
+	 */
+	KASSERT(chan->ch_bufring == NULL, ("bufrings are allocated"));
+	chan->ch_bufring = hyperv_dmamem_alloc(bus_get_dma_tag(chan->ch_dev),
+	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
+	    BUS_DMA_WAITOK);
+	if (chan->ch_bufring == NULL) {
+		vmbus_chan_printf(chan, "bufring allocation failed\n");
+		return (ENOMEM);
+	}
+
+	cbr.cbr = chan->ch_bufring;
+	cbr.cbr_paddr = chan->ch_bufring_dma.hv_paddr;
+	cbr.cbr_txsz = txbr_size;
+	cbr.cbr_rxsz = rxbr_size;
+
+	error = vmbus_chan_open_br(chan, &cbr, udata, udlen, cb, cbarg);
+	if (error) {
+		if (error == EISCONN) {
+			/*
+			 * XXX
+			 * The bufring GPADL is still connected; abandon
+			 * this bufring, instead of having mysterious
+			 * crash or trashed data later on.
+			 */
+			vmbus_chan_printf(chan, "chan%u bufring GPADL "
+			    "is still connected upon channel open error; "
+			    "leak %d bytes memory\n", chan->ch_id,
+			    txbr_size + rxbr_size);
+		} else {
+			hyperv_dmamem_free(&chan->ch_bufring_dma,
+			    chan->ch_bufring);
+		}
+		chan->ch_bufring = NULL;
+	}
+	return (error);
+}
+
+int
+vmbus_chan_open_br(struct vmbus_channel *chan, const struct vmbus_chan_br *cbr,
+    const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	const struct vmbus_message *msg;
+	struct vmbus_chanmsg_chopen *req;
+	struct vmbus_msghc *mh;
+	uint32_t status;
+	int error, txbr_size, rxbr_size;
+	task_fn_t *task_fn;
+	uint8_t *br;
+
+	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
+		vmbus_chan_printf(chan,
+		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
+		return (EINVAL);
+	}
+
+	br = cbr->cbr;
+	txbr_size = cbr->cbr_txsz;
+	rxbr_size = cbr->cbr_rxsz;
+	KASSERT((txbr_size & PAGE_MASK) == 0,
+	    ("send bufring size is not multiple page"));
+	KASSERT((rxbr_size & PAGE_MASK) == 0,
+	    ("recv bufring size is not multiple page"));
+	KASSERT((cbr->cbr_paddr & PAGE_MASK) == 0,
+	    ("bufring is not page aligned"));
+
+	/*
+	 * Zero out the TX/RX bufrings, in case that they were used before.
+	 */
+	memset(br, 0, txbr_size + rxbr_size);
+
+	if (atomic_testandset_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_OPENED_SHIFT))
+		panic("double-open chan%u", chan->ch_id);
+
+	chan->ch_cb = cb;
+	chan->ch_cbarg = cbarg;
+
+	vmbus_chan_update_evtflagcnt(sc, chan);
+
+	chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid);
+	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
+		task_fn = vmbus_chan_task;
+	else
+		task_fn = vmbus_chan_task_nobatch;
+	TASK_INIT(&chan->ch_task, 0, task_fn, chan);
+
+	/* TX bufring comes first */
+	vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size);
+	/* RX bufring immediately follows TX bufring */
+	vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size);
+
+	/* Create sysctl tree for this channel */
+	vmbus_chan_sysctl_create(chan);
+
+	/*
+	 * Connect the bufrings, both RX and TX, to this channel.
+	 */
+	error = vmbus_chan_gpadl_connect(chan, cbr->cbr_paddr,
+	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
+	if (error) {
+		vmbus_chan_printf(chan,
+		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
+		goto failed;
+	}
+
+	/*
+	 * Install this channel, before it is opened, but after everything
+	 * else has been setup.
+	 */
+	vmbus_chan_set_chmap(chan);
+
+	/*
+	 * Open channel w/ the bufring GPADL on the target CPU.
+	 */
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL) {
+		vmbus_chan_printf(chan,
+		    "can not get msg hypercall for chopen(chan%u)\n",
+		    chan->ch_id);
+		error = ENXIO;
+		goto failed;
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
+	req->chm_chanid = chan->ch_id;
+	req->chm_openid = chan->ch_id;
+	req->chm_gpadl = chan->ch_bufring_gpadl;
+	req->chm_vcpuid = chan->ch_vcpuid;
+	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
+	if (udlen > 0)
+		memcpy(req->chm_udata, udata, udlen);
+
+	error = vmbus_msghc_exec(sc, mh);
+	if (error) {
+		vmbus_chan_printf(chan,
+		    "chopen(chan%u) msg hypercall exec failed: %d\n",
+		    chan->ch_id, error);
+		vmbus_msghc_put(sc, mh);
+		goto failed;
+	}
+
+	for (;;) {
+		msg = vmbus_msghc_poll_result(sc, mh);
+		if (msg != NULL)
+			break;
+		if (vmbus_chan_is_revoked(chan)) {
+			int i;
+
+			/*
+			 * NOTE:
+			 * Hypervisor does _not_ send response CHOPEN to
+			 * a revoked channel.
+			 */
+			vmbus_chan_printf(chan,
+			    "chan%u is revoked, when it is being opened\n",
+			    chan->ch_id);
+
+			/*
+			 * XXX
+			 * Add extra delay before cancel the hypercall
+			 * execution; mainly to close any possible
+			 * CHRESCIND and CHOPEN_RESP races on the
+			 * hypervisor side.
+			 */
+#define REVOKE_LINGER	100
+			for (i = 0; i < REVOKE_LINGER; ++i) {
+				msg = vmbus_msghc_poll_result(sc, mh);
+				if (msg != NULL)
+					break;
+				pause("rchopen", 1);
+			}
+#undef REVOKE_LINGER
+			if (msg == NULL)
+				vmbus_msghc_exec_cancel(sc, mh);
+			break;
+		}
+		pause("chopen", 1);
+	}
+	if (msg != NULL) {
+		status = ((const struct vmbus_chanmsg_chopen_resp *)
+		    msg->msg_data)->chm_status;
+	} else {
+		/* XXX any non-0 value is ok here. */
+		status = 0xff;
+	}
+
+	vmbus_msghc_put(sc, mh);
+
+	if (status == 0) {
+		if (bootverbose)
+			vmbus_chan_printf(chan, "chan%u opened\n", chan->ch_id);
+		return (0);
+	}
+
+	vmbus_chan_printf(chan, "failed to open chan%u\n", chan->ch_id);
+	error = ENXIO;
+
+failed:
+	sysctl_ctx_free(&chan->ch_sysctl_ctx);
+	vmbus_chan_clear_chmap(chan);
+	if (chan->ch_bufring_gpadl != 0) {
+		int error1;
+
+		error1 = vmbus_chan_gpadl_disconnect(chan,
+		    chan->ch_bufring_gpadl);
+		if (error1) {
+			/*
+			 * Give caller a hint that the bufring GPADL is still
+			 * connected.
+			 */
+			error = EISCONN;
+		}
+		chan->ch_bufring_gpadl = 0;
+	}
+	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
+	return (error);
+}
+
+int
+vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr,
+    int size, uint32_t *gpadl0)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	struct vmbus_msghc *mh;
+	struct vmbus_chanmsg_gpadl_conn *req;
+	const struct vmbus_message *msg;
+	size_t reqsz;
+	uint32_t gpadl, status;
+	int page_count, range_len, i, cnt, error;
+	uint64_t page_id;
+
+	KASSERT(*gpadl0 == 0, ("GPADL is not zero"));
+
+	/*
+	 * Preliminary checks.
+	 */
+
+	KASSERT((size & PAGE_MASK) == 0,
+	    ("invalid GPA size %d, not multiple page size", size));
+	page_count = size >> PAGE_SHIFT;
+
+	KASSERT((paddr & PAGE_MASK) == 0,
+	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
+	page_id = paddr >> PAGE_SHIFT;
+
+	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
+	/*
+	 * We don't support multiple GPA ranges.
+	 */
+	if (range_len > UINT16_MAX) {
+		vmbus_chan_printf(chan, "GPA too large, %d pages\n",
+		    page_count);
+		return EOPNOTSUPP;
+	}
+
+	/*
+	 * Allocate GPADL id.
+	 */
+	gpadl = vmbus_gpadl_alloc(sc);
+
+	/*
+	 * Connect this GPADL to the target channel.
+	 *
+	 * NOTE:
+	 * Since each message can only hold small set of page
+	 * addresses, several messages may be required to
+	 * complete the connection.
+	 */
+	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
+		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
+	else
+		cnt = page_count;
+	page_count -= cnt;
+
+	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
+	    chm_range.gpa_page[cnt]);
+	mh = vmbus_msghc_get(sc, reqsz);
+	if (mh == NULL) {
+		vmbus_chan_printf(chan,
+		    "can not get msg hypercall for gpadl_conn(chan%u)\n",
+		    chan->ch_id);
+		return EIO;
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
+	req->chm_chanid = chan->ch_id;
+	req->chm_gpadl = gpadl;
+	req->chm_range_len = range_len;
+	req->chm_range_cnt = 1;
+	req->chm_range.gpa_len = size;
+	req->chm_range.gpa_ofs = 0;
+	for (i = 0; i < cnt; ++i)
+		req->chm_range.gpa_page[i] = page_id++;
+
+	error = vmbus_msghc_exec(sc, mh);
+	if (error) {
+		vmbus_chan_printf(chan,
+		    "gpadl_conn(chan%u) msg hypercall exec failed: %d\n",
+		    chan->ch_id, error);
+		vmbus_msghc_put(sc, mh);
+		return error;
+	}
+
+	while (page_count > 0) {
+		struct vmbus_chanmsg_gpadl_subconn *subreq;
+
+		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
+			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
+		else
+			cnt = page_count;
+		page_count -= cnt;
+
+		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
+		    chm_gpa_page[cnt]);
+		vmbus_msghc_reset(mh, reqsz);
+
+		subreq = vmbus_msghc_dataptr(mh);
+		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
+		subreq->chm_gpadl = gpadl;
+		for (i = 0; i < cnt; ++i)
+			subreq->chm_gpa_page[i] = page_id++;
+
+		vmbus_msghc_exec_noresult(mh);
+	}
+	KASSERT(page_count == 0, ("invalid page count %d", page_count));
+
+	msg = vmbus_msghc_wait_result(sc, mh);
+	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
+	    msg->msg_data)->chm_status;
+
+	vmbus_msghc_put(sc, mh);
+
+	if (status != 0) {
+		vmbus_chan_printf(chan, "gpadl_conn(chan%u) failed: %u\n",
+		    chan->ch_id, status);
+		return EIO;
+	}
+
+	/* Done; commit the GPADL id. */
+	*gpadl0 = gpadl;
+	if (bootverbose) {
+		vmbus_chan_printf(chan, "gpadl_conn(chan%u) succeeded\n",
+		    chan->ch_id);
+	}
+	return 0;
+}
+
+static bool
+vmbus_chan_wait_revoke(const struct vmbus_channel *chan, bool can_sleep)
+{
+#define WAIT_COUNT	200	/* 200ms */
+
+	int i;
+
+	for (i = 0; i < WAIT_COUNT; ++i) {
+		if (vmbus_chan_is_revoked(chan))
+			return (true);
+		if (can_sleep)
+			pause("wchrev", 1);
+		else
+			DELAY(1000);
+	}
+	return (false);
+
+#undef WAIT_COUNT
+}
+
+/*
+ * Disconnect the GPA from the target channel
+ */
+int
+vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	struct vmbus_msghc *mh;
+	struct vmbus_chanmsg_gpadl_disconn *req;
+	int error;
+
+	KASSERT(gpadl != 0, ("GPADL is zero"));
+
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL) {
+		vmbus_chan_printf(chan,
+		    "can not get msg hypercall for gpadl_disconn(chan%u)\n",
+		    chan->ch_id);
+		return (EBUSY);
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
+	req->chm_chanid = chan->ch_id;
+	req->chm_gpadl = gpadl;
+
+	error = vmbus_msghc_exec(sc, mh);
+	if (error) {
+		vmbus_msghc_put(sc, mh);
+
+		if (vmbus_chan_wait_revoke(chan, true)) {
+			/*
+			 * Error is benign; this channel is revoked,
+			 * so this GPADL will not be touched anymore.
+			 */
+			vmbus_chan_printf(chan,
+			    "gpadl_disconn(revoked chan%u) msg hypercall "
+			    "exec failed: %d\n", chan->ch_id, error);
+			return (0);
+		}
+		vmbus_chan_printf(chan,
+		    "gpadl_disconn(chan%u) msg hypercall exec failed: %d\n",
+		    chan->ch_id, error);
+		return (error);
+	}
+
+	vmbus_msghc_wait_result(sc, mh);
+	/* Discard result; no useful information */
+	vmbus_msghc_put(sc, mh);
+
+	return (0);
+}
+
+static void
+vmbus_chan_detach(struct vmbus_channel *chan)
+{
+	int refs;
+
+	KASSERT(chan->ch_refs > 0, ("chan%u: invalid refcnt %d",
+	    chan->ch_id, chan->ch_refs));
+	refs = atomic_fetchadd_int(&chan->ch_refs, -1);
+#ifdef INVARIANTS
+	if (VMBUS_CHAN_ISPRIMARY(chan)) {
+		KASSERT(refs == 1, ("chan%u: invalid refcnt %d for prichan",
+		    chan->ch_id, refs + 1));
+	}
+#endif
+	if (refs == 1) {
+		/*
+		 * Detach the target channel.
+		 */
+		if (bootverbose) {
+			vmbus_chan_printf(chan, "chan%u detached\n",
+			    chan->ch_id);
+		}
+		taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task);
+	}
+}
+
+static void
+vmbus_chan_clrchmap_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+
+	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = NULL;
+}
+
+static void
+vmbus_chan_clear_chmap(struct vmbus_channel *chan)
+{
+	struct task chmap_task;
+
+	TASK_INIT(&chmap_task, 0, vmbus_chan_clrchmap_task, chan);
+	vmbus_chan_run_task(chan, &chmap_task);
+}
+
+static void
+vmbus_chan_set_chmap(struct vmbus_channel *chan)
+{
+	__compiler_membar();
+	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = chan;
+}
+
+static void
+vmbus_chan_poll_cancel_task(void *xchan, int pending __unused)
+{
+
+	vmbus_chan_poll_cancel_intq(xchan);
+}
+
+static void
+vmbus_chan_poll_cancel(struct vmbus_channel *chan)
+{
+	struct task poll_cancel;
+
+	TASK_INIT(&poll_cancel, 0, vmbus_chan_poll_cancel_task, chan);
+	vmbus_chan_run_task(chan, &poll_cancel);
+}
+
+static int
+vmbus_chan_close_internal(struct vmbus_channel *chan)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	struct vmbus_msghc *mh;
+	struct vmbus_chanmsg_chclose *req;
+	uint32_t old_stflags;
+	int error;
+
+	/*
+	 * NOTE:
+	 * Sub-channels are closed upon their primary channel closing,
+	 * so they can be closed even before they are opened.
+	 */
+	for (;;) {
+		old_stflags = chan->ch_stflags;
+		if (atomic_cmpset_int(&chan->ch_stflags, old_stflags,
+		    old_stflags & ~VMBUS_CHAN_ST_OPENED))
+			break;
+	}
+	if ((old_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
+		/* Not opened yet; done */
+		if (bootverbose) {
+			vmbus_chan_printf(chan, "chan%u not opened\n",
+			    chan->ch_id);
+		}
+		return (0);
+	}
+
+	/*
+	 * Free this channel's sysctl tree attached to its device's
+	 * sysctl tree.
+	 */
+	sysctl_ctx_free(&chan->ch_sysctl_ctx);
+
+	/*
+	 * Cancel polling, if it is enabled.
+	 */
+	vmbus_chan_poll_cancel(chan);
+
+	/*
+	 * NOTE:
+	 * Order is critical.  This channel _must_ be uninstalled first,
+	 * else the channel task may be enqueued by the IDT after it has
+	 * been drained.
+	 */
+	vmbus_chan_clear_chmap(chan);
+	taskqueue_drain(chan->ch_tq, &chan->ch_task);
+	chan->ch_tq = NULL;
+
+	/*
+	 * Close this channel.
+	 */
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL) {
+		vmbus_chan_printf(chan,
+		    "can not get msg hypercall for chclose(chan%u)\n",
+		    chan->ch_id);
+		error = ENXIO;
+		goto disconnect;
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
+	req->chm_chanid = chan->ch_id;
+
+	error = vmbus_msghc_exec_noresult(mh);
+	vmbus_msghc_put(sc, mh);
+
+	if (error) {
+		vmbus_chan_printf(chan,
+		    "chclose(chan%u) msg hypercall exec failed: %d\n",
+		    chan->ch_id, error);
+		goto disconnect;
+	}
+
+	if (bootverbose)
+		vmbus_chan_printf(chan, "chan%u closed\n", chan->ch_id);
+
+disconnect:
+	/*
+	 * Disconnect the TX+RX bufrings from this channel.
+	 */
+	if (chan->ch_bufring_gpadl != 0) {
+		int error1;
+
+		error1 = vmbus_chan_gpadl_disconnect(chan,
+		    chan->ch_bufring_gpadl);
+		if (error1) {
+			/*
+			 * XXX
+			 * The bufring GPADL is still connected; abandon
+			 * this bufring, instead of having mysterious
+			 * crash or trashed data later on.
+			 */
+			vmbus_chan_printf(chan, "chan%u bufring GPADL "
+			    "is still connected after close\n", chan->ch_id);
+			chan->ch_bufring = NULL;
+			/*
+			 * Give caller a hint that the bufring GPADL is
+			 * still connected.
+			 */
+			error = EISCONN;
+		}
+		chan->ch_bufring_gpadl = 0;
+	}
+
+	/*
+	 * Destroy the TX+RX bufrings.
+	 */
+	if (chan->ch_bufring != NULL) {
+		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
+		chan->ch_bufring = NULL;
+	}
+	return (error);
+}
+
+int
+vmbus_chan_close_direct(struct vmbus_channel *chan)
+{
+	int error;
+
+#ifdef INVARIANTS
+	if (VMBUS_CHAN_ISPRIMARY(chan)) {
+		struct vmbus_channel *subchan;
+
+		/*
+		 * All sub-channels _must_ have been closed, or are _not_
+		 * opened at all.
+		 */
+		mtx_lock(&chan->ch_subchan_lock);
+		TAILQ_FOREACH(subchan, &chan->ch_subchans, ch_sublink) {
+			KASSERT(
+			   (subchan->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0,
+			   ("chan%u: subchan%u is still opened",
+			    chan->ch_id, subchan->ch_subidx));
+		}
+		mtx_unlock(&chan->ch_subchan_lock);
+	}
+#endif
+
+	error = vmbus_chan_close_internal(chan);
+	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
+		/*
+		 * This sub-channel is referenced, when it is linked to
+		 * the primary channel; drop that reference now.
+		 */
+		vmbus_chan_detach(chan);
+	}
+	return (error);
+}
+
+/*
+ * Caller should make sure that all sub-channels have
+ * been added to 'chan' and all to-be-closed channels
+ * are not being opened.
+ */
+void
+vmbus_chan_close(struct vmbus_channel *chan)
+{
+	int subchan_cnt;
+
+	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
+		/*
+		 * Sub-channel is closed when its primary channel
+		 * is closed; done.
+		 */
+		return;
+	}
+
+	/*
+	 * Close all sub-channels, if any.
+	 */
+	subchan_cnt = chan->ch_subchan_cnt;
+	if (subchan_cnt > 0) {
+		struct vmbus_channel **subchan;
+		int i;
+
+		subchan = vmbus_subchan_get(chan, subchan_cnt);
+		for (i = 0; i < subchan_cnt; ++i) {
+			vmbus_chan_close_internal(subchan[i]);
+			/*
+			 * This sub-channel is referenced, when it is
+			 * linked to the primary channel; drop that
+			 * reference now.
+			 */
+			vmbus_chan_detach(subchan[i]);
+		}
+		vmbus_subchan_rel(subchan, subchan_cnt);
+	}
+
+	/* Then close the primary channel. */
+	vmbus_chan_close_internal(chan);
+}
+
+void
+vmbus_chan_intr_drain(struct vmbus_channel *chan)
+{
+
+	taskqueue_drain(chan->ch_tq, &chan->ch_task);
+}
+
+int
+vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
+    void *data, int dlen, uint64_t xactid)
+{
+	struct vmbus_chanpkt pkt;
+	int pktlen, pad_pktlen, hlen, error;
+	uint64_t pad = 0;
+	struct iovec iov[3];
+	boolean_t send_evt;
+
+	hlen = sizeof(pkt);
+	pktlen = hlen + dlen;
+	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
+
+	pkt.cp_hdr.cph_type = type;
+	pkt.cp_hdr.cph_flags = flags;
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
+	pkt.cp_hdr.cph_xactid = xactid;
+
+	iov[0].iov_base = &pkt;
+	iov[0].iov_len = hlen;
+	iov[1].iov_base = data;
+	iov[1].iov_len = dlen;
+	iov[2].iov_base = &pad;
+	iov[2].iov_len = pad_pktlen - pktlen;
+
+	error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt);
+	if (!error && send_evt)
+		vmbus_chan_signal_tx(chan);
+	return error;
+}
+
+int
+vmbus_chan_send_sglist(struct vmbus_channel *chan,
+    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
+{
+	struct vmbus_chanpkt_sglist pkt;
+	int pktlen, pad_pktlen, hlen, error;
+	struct iovec iov[4];
+	boolean_t send_evt;
+	uint64_t pad = 0;
+
+	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
+	pktlen = hlen + dlen;
+	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
+
+	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
+	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
+	pkt.cp_hdr.cph_xactid = xactid;
+	pkt.cp_rsvd = 0;
+	pkt.cp_gpa_cnt = sglen;
+
+	iov[0].iov_base = &pkt;
+	iov[0].iov_len = sizeof(pkt);
+	iov[1].iov_base = sg;
+	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
+	iov[2].iov_base = data;
+	iov[2].iov_len = dlen;
+	iov[3].iov_base = &pad;
+	iov[3].iov_len = pad_pktlen - pktlen;
+
+	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
+	if (!error && send_evt)
+		vmbus_chan_signal_tx(chan);
+	return error;
+}
+
+int
+vmbus_chan_send_prplist(struct vmbus_channel *chan,
+    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
+    uint64_t xactid)
+{
+	struct vmbus_chanpkt_prplist pkt;
+	int pktlen, pad_pktlen, hlen, error;
+	struct iovec iov[4];
+	boolean_t send_evt;
+	uint64_t pad = 0;
+
+	hlen = __offsetof(struct vmbus_chanpkt_prplist,
+	    cp_range[0].gpa_page[prp_cnt]);
+	pktlen = hlen + dlen;
+	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
+
+	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
+	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
+	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
+	pkt.cp_hdr.cph_xactid = xactid;
+	pkt.cp_rsvd = 0;
+	pkt.cp_range_cnt = 1;
+
+	iov[0].iov_base = &pkt;
+	iov[0].iov_len = sizeof(pkt);
+	iov[1].iov_base = prp;
+	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
+	iov[2].iov_base = data;
+	iov[2].iov_len = dlen;
+	iov[3].iov_base = &pad;
+	iov[3].iov_len = pad_pktlen - pktlen;
+
+	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
+	if (!error && send_evt)
+		vmbus_chan_signal_tx(chan);
+	return error;
+}
+
+int
+vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0,
+    uint64_t *xactid)
+{
+	struct vmbus_chanpkt_hdr pkt;
+	int error, dlen, hlen;
+
+	error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
+	if (error)
+		return (error);
+
+	if (__predict_false(pkt.cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) {
+		vmbus_chan_printf(chan, "invalid hlen %u\n", pkt.cph_hlen);
+		/* XXX this channel is dead actually. */
+		return (EIO);
+	}
+	if (__predict_false(pkt.cph_hlen > pkt.cph_tlen)) {
+		vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n",
+		    pkt.cph_hlen, pkt.cph_tlen);
+		/* XXX this channel is dead actually. */
+		return (EIO);
+	}
+
+	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
+	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
+
+	if (*dlen0 < dlen) {
+		/* Return the size of this packet's data. */
+		*dlen0 = dlen;
+		return (ENOBUFS);
+	}
+
+	*xactid = pkt.cph_xactid;
+	*dlen0 = dlen;
+
+	/* Skip packet header */
+	error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen);
+	KASSERT(!error, ("vmbus_rxbr_read failed"));
+
+	return (0);
+}
+
+int
+vmbus_chan_recv_pkt(struct vmbus_channel *chan,
+    struct vmbus_chanpkt_hdr *pkt, int *pktlen0)
+{
+	int error, pktlen, pkt_hlen;
+
+	pkt_hlen = sizeof(*pkt);
+	error = vmbus_rxbr_peek(&chan->ch_rxbr, pkt, pkt_hlen);
+	if (error)
+		return (error);
+
+	if (__predict_false(pkt->cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) {
+		vmbus_chan_printf(chan, "invalid hlen %u\n", pkt->cph_hlen);
+		/* XXX this channel is dead actually. */
+		return (EIO);
+	}
+	if (__predict_false(pkt->cph_hlen > pkt->cph_tlen)) {
+		vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n",
+		    pkt->cph_hlen, pkt->cph_tlen);
+		/* XXX this channel is dead actually. */
+		return (EIO);
+	}
+
+	pktlen = VMBUS_CHANPKT_GETLEN(pkt->cph_tlen);
+	if (*pktlen0 < pktlen) {
+		/* Return the size of this packet. */
+		*pktlen0 = pktlen;
+		return (ENOBUFS);
+	}
+	*pktlen0 = pktlen;
+
+	/*
+	 * Skip the fixed-size packet header, which has been filled
+	 * by the above vmbus_rxbr_peek().
+	 */
+	error = vmbus_rxbr_read(&chan->ch_rxbr, pkt + 1,
+	    pktlen - pkt_hlen, pkt_hlen);
+	KASSERT(!error, ("vmbus_rxbr_read failed"));
+
+	return (0);
+}
+
+static void
+vmbus_chan_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+	vmbus_chan_callback_t cb = chan->ch_cb;
+	void *cbarg = chan->ch_cbarg;
+
+	KASSERT(chan->ch_poll_intvl == 0,
+	    ("chan%u: interrupted in polling mode", chan->ch_id));
+
+	/*
+	 * Optimize host to guest signaling by ensuring:
+	 * 1. While reading the channel, we disable interrupts from
+	 *    host.
+	 * 2. Ensure that we process all posted messages from the host
+	 *    before returning from this callback.
+	 * 3. Once we return, enable signaling from the host. Once this
+	 *    state is set we check to see if additional packets are
+	 *    available to read. In this case we repeat the process.
+	 *
+	 * NOTE: Interrupt has been disabled in the ISR.
+	 */
+	for (;;) {
+		uint32_t left;
+
+		cb(chan, cbarg);
+
+		left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr);
+		if (left == 0) {
+			/* No more data in RX bufring; done */
+			break;
+		}
+		vmbus_rxbr_intr_mask(&chan->ch_rxbr);
+	}
+}
+
+static void
+vmbus_chan_task_nobatch(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+
+	KASSERT(chan->ch_poll_intvl == 0,
+	    ("chan%u: interrupted in polling mode", chan->ch_id));
+	chan->ch_cb(chan, chan->ch_cbarg);
+}
+
+static void
+vmbus_chan_poll_timeout(void *xchan)
+{
+	struct vmbus_channel *chan = xchan;
+
+	KASSERT(chan->ch_poll_intvl != 0,
+	    ("chan%u: polling timeout in interrupt mode", chan->ch_id));
+	taskqueue_enqueue(chan->ch_tq, &chan->ch_poll_task);
+}
+
+static void
+vmbus_chan_poll_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+
+	KASSERT(chan->ch_poll_intvl != 0,
+	    ("chan%u: polling in interrupt mode", chan->ch_id));
+	callout_reset_sbt_curcpu(&chan->ch_poll_timeo, chan->ch_poll_intvl, 0,
+	    vmbus_chan_poll_timeout, chan, chan->ch_poll_flags);
+	chan->ch_cb(chan, chan->ch_cbarg);
+}
+
+static void
+vmbus_chan_pollcfg_task(void *xarg, int pending __unused)
+{
+	const struct vmbus_chan_pollarg *arg = xarg;
+	struct vmbus_channel *chan = arg->poll_chan;
+	sbintime_t intvl;
+	int poll_flags;
+
+	/*
+	 * Save polling interval.
+	 */
+	intvl = SBT_1S / arg->poll_hz;
+	if (intvl == 0)
+		intvl = 1;
+	if (intvl == chan->ch_poll_intvl) {
+		/* Nothing changes; done */
+		return;
+	}
+	chan->ch_poll_intvl = intvl;
+
+	/* Adjust callout flags. */
+	poll_flags = C_DIRECT_EXEC;
+	if (arg->poll_hz <= hz)
+		poll_flags |= C_HARDCLOCK;
+	chan->ch_poll_flags = poll_flags;
+
+	/*
+	 * Disconnect this channel from the channel map to make sure that
+	 * the RX bufring interrupt enabling bit can not be touched, and
+	 * ISR can not enqueue this channel task anymore.  THEN, disable
+	 * interrupt from the RX bufring (TX bufring does not generate
+	 * interrupt to VM).
+	 *
+	 * NOTE: order is critical.
+	 */
+	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = NULL;
+	__compiler_membar();
+	vmbus_rxbr_intr_mask(&chan->ch_rxbr);
+
+	/*
+	 * NOTE:
+	 * At this point, this channel task will not be enqueued by
+	 * the ISR anymore, time to cancel the pending one.
+	 */
+	taskqueue_cancel(chan->ch_tq, &chan->ch_task, NULL);
+
+	/* Kick start! */
+	taskqueue_enqueue(chan->ch_tq, &chan->ch_poll_task);
+}
+
+static bool
+vmbus_chan_poll_cancel_intq(struct vmbus_channel *chan)
+{
+
+	if (chan->ch_poll_intvl == 0) {
+		/* Not enabled. */
+		return (false);
+	}
+
+	/*
+	 * Stop polling callout, so that channel polling task
+	 * will not be enqueued anymore.
+	 */
+	callout_drain(&chan->ch_poll_timeo);
+
+	/*
+	 * Disable polling by resetting polling interval.
+	 *
+	 * NOTE:
+	 * The polling interval resetting MUST be conducted
+	 * after the callout is drained; mainly to keep the
+	 * proper assertion in place.
+	 */
+	chan->ch_poll_intvl = 0;
+
+	/*
+	 * NOTE:
+	 * At this point, this channel polling task will not be
+	 * enqueued by the callout anymore, time to cancel the
+	 * pending one.
+	 */
+	taskqueue_cancel(chan->ch_tq, &chan->ch_poll_task, NULL);
+
+	/* Polling was enabled. */
+	return (true);
+}
+
+static void
+vmbus_chan_polldis_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+
+	if (!vmbus_chan_poll_cancel_intq(chan)) {
+		/* Already disabled; done. */
+		return;
+	}
+
+	/*
+	 * Plug this channel back to the channel map and unmask
+	 * the RX bufring interrupt.
+	 */
+	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = chan;
+	__compiler_membar();
+	vmbus_rxbr_intr_unmask(&chan->ch_rxbr);
+
+	/*
+	 * Kick start the interrupt task, just in case unmasking
+	 * interrupt races ISR.
+	 */
+	taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
+}
+
+static __inline void
+vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
+    int flag_cnt)
+{
+	int f;
+
+	for (f = 0; f < flag_cnt; ++f) {
+		uint32_t chid_base;
+		u_long flags;
+		int chid_ofs;
+
+		if (event_flags[f] == 0)
+			continue;
+
+		flags = atomic_swap_long(&event_flags[f], 0);
+		chid_base = f << VMBUS_EVTFLAG_SHIFT;
+
+		while ((chid_ofs = ffsl(flags)) != 0) {
+			struct vmbus_channel *chan;
+
+			--chid_ofs; /* NOTE: ffsl is 1-based */
+			flags &= ~(1UL << chid_ofs);
+
+			chan = sc->vmbus_chmap[chid_base + chid_ofs];
+			if (__predict_false(chan == NULL)) {
+				/* Channel is closed. */
+				continue;
+			}
+			__compiler_membar();
+
+			if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
+				vmbus_rxbr_intr_mask(&chan->ch_rxbr);
+			taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
+		}
+	}
+}
+
+void
+vmbus_event_proc(struct vmbus_softc *sc, int cpu)
+{
+	struct vmbus_evtflags *eventf;
+
+	/*
+	 * On Host with Win8 or above, the event page can be checked directly
+	 * to get the id of the channel that has the pending interrupt.
+	 */
+	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
+	vmbus_event_flags_proc(sc, eventf->evt_flags,
+	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
+}
+
+void
+vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
+{
+	struct vmbus_evtflags *eventf;
+
+	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
+	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
+		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
+		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
+	}
+}
+
+static void
+vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
+    const struct vmbus_channel *chan)
+{
+	volatile int *flag_cnt_ptr;
+	int flag_cnt;
+
+	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
+	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
+
+	for (;;) {
+		int old_flag_cnt;
+
+		old_flag_cnt = *flag_cnt_ptr;
+		if (old_flag_cnt >= flag_cnt)
+			break;
+		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
+			if (bootverbose) {
+				vmbus_chan_printf(chan,
+				    "chan%u update cpu%d flag_cnt to %d\n",
+				    chan->ch_id, chan->ch_cpuid, flag_cnt);
+			}
+			break;
+		}
+	}
+}
+
+static struct vmbus_channel *
+vmbus_chan_alloc(struct vmbus_softc *sc)
+{
+	struct vmbus_channel *chan;
+
+	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
+
+	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
+	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
+	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
+	if (chan->ch_monprm == NULL) {
+		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
+		free(chan, M_DEVBUF);
+		return NULL;
+	}
+
+	chan->ch_refs = 1;
+	chan->ch_vmbus = sc;
+	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
+	sx_init(&chan->ch_orphan_lock, "vmbus chorphan");
+	TAILQ_INIT(&chan->ch_subchans);
+	vmbus_rxbr_init(&chan->ch_rxbr);
+	vmbus_txbr_init(&chan->ch_txbr);
+
+	TASK_INIT(&chan->ch_poll_task, 0, vmbus_chan_poll_task, chan);
+	callout_init(&chan->ch_poll_timeo, 1);
+
+	return chan;
+}
+
+static void
+vmbus_chan_free(struct vmbus_channel *chan)
+{
+
+	KASSERT(TAILQ_EMPTY(&chan->ch_subchans) && chan->ch_subchan_cnt == 0,
+	    ("still owns sub-channels"));
+	KASSERT((chan->ch_stflags &
+	    (VMBUS_CHAN_ST_OPENED |
+	     VMBUS_CHAN_ST_ONPRIL |
+	     VMBUS_CHAN_ST_ONSUBL |
+	     VMBUS_CHAN_ST_ONLIST)) == 0, ("free busy channel"));
+	KASSERT(chan->ch_orphan_xact == NULL,
+	    ("still has orphan xact installed"));
+	KASSERT(chan->ch_refs == 0, ("chan%u: invalid refcnt %d",
+	    chan->ch_id, chan->ch_refs));
+	KASSERT(chan->ch_poll_intvl == 0, ("chan%u: polling is activated",
+	    chan->ch_id));
+
+	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
+	mtx_destroy(&chan->ch_subchan_lock);
+	sx_destroy(&chan->ch_orphan_lock);
+	vmbus_rxbr_deinit(&chan->ch_rxbr);
+	vmbus_txbr_deinit(&chan->ch_txbr);
+	free(chan, M_DEVBUF);
+}
+
+static int
+vmbus_chan_add(struct vmbus_channel *newchan)
+{
+	struct vmbus_softc *sc = newchan->ch_vmbus;
+	struct vmbus_channel *prichan;
+
+	if (newchan->ch_id == 0) {
+		/*
+		 * XXX
+		 * Chan0 will neither be processed nor should be offered;
+		 * skip it.
+		 */
+		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
+		return EINVAL;
+	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
+		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
+		    newchan->ch_id);
+		return EINVAL;
+	}
+
+	mtx_lock(&sc->vmbus_prichan_lock);
+	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
+		/*
+		 * Sub-channel will have the same type GUID and instance
+		 * GUID as its primary channel.
+		 */
+		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
+		    sizeof(struct hyperv_guid)) == 0 &&
+		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
+		    sizeof(struct hyperv_guid)) == 0)
+			break;
+	}
+	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
+		if (prichan == NULL) {
+			/* Install the new primary channel */
+			vmbus_chan_ins_prilist(sc, newchan);
+			mtx_unlock(&sc->vmbus_prichan_lock);
+			goto done;
+		} else {
+			mtx_unlock(&sc->vmbus_prichan_lock);
+			device_printf(sc->vmbus_dev,
+			    "duplicated primary chan%u\n", newchan->ch_id);
+			return EINVAL;
+		}
+	} else { /* Sub-channel */
+		if (prichan == NULL) {
+			mtx_unlock(&sc->vmbus_prichan_lock);
+			device_printf(sc->vmbus_dev,
+			    "no primary chan for chan%u\n", newchan->ch_id);
+			return EINVAL;
+		}
+		/*
+		 * Found the primary channel for this sub-channel and
+		 * move on.
+		 *
+		 * XXX refcnt prichan
+		 */
+	}
+	mtx_unlock(&sc->vmbus_prichan_lock);
+
+	/*
+	 * This is a sub-channel; link it with the primary channel.
+	 */
+	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
+	    ("new channel is not sub-channel"));
+	KASSERT(prichan != NULL, ("no primary channel"));
+
+	/*
+	 * Reference count this sub-channel; it will be dereferenced
+	 * when this sub-channel is closed.
+	 */
+	KASSERT(newchan->ch_refs == 1, ("chan%u: invalid refcnt %d",
+	    newchan->ch_id, newchan->ch_refs));
+	atomic_add_int(&newchan->ch_refs, 1);
+
+	newchan->ch_prichan = prichan;
+	newchan->ch_dev = prichan->ch_dev;
+
+	mtx_lock(&prichan->ch_subchan_lock);
+	vmbus_chan_ins_sublist(prichan, newchan);
+	mtx_unlock(&prichan->ch_subchan_lock);
+	/*
+	 * Notify anyone that is interested in this sub-channel,
+	 * after this sub-channel is setup.
+	 */
+	wakeup(prichan);
+done:
+	/*
+	 * Hook this channel up for later revocation.
+	 */
+	mtx_lock(&sc->vmbus_chan_lock);
+	vmbus_chan_ins_list(sc, newchan);
+	mtx_unlock(&sc->vmbus_chan_lock);
+
+	if (bootverbose) {
+		vmbus_chan_printf(newchan, "chan%u subidx%u offer\n",
+		    newchan->ch_id, newchan->ch_subidx);
+	}
+
+	/* Select default cpu for this channel. */
+	vmbus_chan_cpu_default(newchan);
+
+	return 0;
+}
+
+void
+vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu)
+{
+	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
+
+	if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 ||
+	    chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) {
+		/* Only cpu0 is supported */
+		cpu = 0;
+	}
+
+	chan->ch_cpuid = cpu;
+	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu);
+
+	if (bootverbose) {
+		vmbus_chan_printf(chan,
+		    "chan%u assigned to cpu%u [vcpu%u]\n",
+		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
+	}
+}
+
+void
+vmbus_chan_cpu_rr(struct vmbus_channel *chan)
+{
+	static uint32_t vmbus_chan_nextcpu;
+	int cpu;
+
+	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
+	vmbus_chan_cpu_set(chan, cpu);
+}
+
+static void
+vmbus_chan_cpu_default(struct vmbus_channel *chan)
+{
+	/*
+	 * By default, pin the channel to cpu0.  Devices having
+	 * special channel-cpu mapping requirement should call
+	 * vmbus_chan_cpu_{set,rr}().
+	 */
+	vmbus_chan_cpu_set(chan, 0);
+}
+
+static void
+vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
+    const struct vmbus_message *msg)
+{
+	const struct vmbus_chanmsg_choffer *offer;
+	struct vmbus_channel *chan;
+	task_fn_t *detach_fn, *attach_fn;
+	int error;
+
+	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
+
+	chan = vmbus_chan_alloc(sc);
+	if (chan == NULL) {
+		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
+		    offer->chm_chanid);
+		return;
+	}
+
+	chan->ch_id = offer->chm_chanid;
+	chan->ch_subidx = offer->chm_subidx;
+	chan->ch_guid_type = offer->chm_chtype;
+	chan->ch_guid_inst = offer->chm_chinst;
+
+	/* Batch reading is on by default */
+	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
+
+	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
+	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
+		chan->ch_monprm->mp_connid = offer->chm_connid;
+
+	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
+		int trig_idx;
+
+		/*
+		 * Setup MNF stuffs.
+		 */
+		chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF;
+
+		trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
+		if (trig_idx >= VMBUS_MONTRIGS_MAX)
+			panic("invalid monitor trigger %u", offer->chm_montrig);
+		chan->ch_montrig =
+		    &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending;
+
+		chan->ch_montrig_mask =
+		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
+	}
+
+	/*
+	 * Setup event flag.
+	 */
+	chan->ch_evtflag =
+	    &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT];
+	chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK);
+
+	/*
+	 * Setup attach and detach tasks.
+	 */
+	if (VMBUS_CHAN_ISPRIMARY(chan)) {
+		chan->ch_mgmt_tq = sc->vmbus_devtq;
+		attach_fn = vmbus_prichan_attach_task;
+		detach_fn = vmbus_prichan_detach_task;
+	} else {
+		chan->ch_mgmt_tq = sc->vmbus_subchtq;
+		attach_fn = vmbus_subchan_attach_task;
+		detach_fn = vmbus_subchan_detach_task;
+	}
+	TASK_INIT(&chan->ch_attach_task, 0, attach_fn, chan);
+	TASK_INIT(&chan->ch_detach_task, 0, detach_fn, chan);
+
+	error = vmbus_chan_add(chan);
+	if (error) {
+		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
+		    chan->ch_id, error);
+		atomic_subtract_int(&chan->ch_refs, 1);
+		vmbus_chan_free(chan);
+		return;
+	}
+	taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_attach_task);
+}
+
+static void
+vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
+    const struct vmbus_message *msg)
+{
+	const struct vmbus_chanmsg_chrescind *note;
+	struct vmbus_channel *chan;
+
+	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
+	if (note->chm_chanid > VMBUS_CHAN_MAX) {
+		device_printf(sc->vmbus_dev, "invalid revoked chan%u\n",
+		    note->chm_chanid);
+		return;
+	}
+
+	/*
+	 * Find and remove the target channel from the channel list.
+	 */
+	mtx_lock(&sc->vmbus_chan_lock);
+	TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) {
+		if (chan->ch_id == note->chm_chanid)
+			break;
+	}
+	if (chan == NULL) {
+		mtx_unlock(&sc->vmbus_chan_lock);
+		device_printf(sc->vmbus_dev, "chan%u is not offered\n",
+		    note->chm_chanid);
+		return;
+	}
+	vmbus_chan_rem_list(sc, chan);
+	mtx_unlock(&sc->vmbus_chan_lock);
+
+	if (VMBUS_CHAN_ISPRIMARY(chan)) {
+		/*
+		 * The target channel is a primary channel; remove the
+		 * target channel from the primary channel list now,
+		 * instead of later, so that it will not be found by
+		 * other sub-channel offers, which are processed in
+		 * this thread.
+		 */
+		mtx_lock(&sc->vmbus_prichan_lock);
+		vmbus_chan_rem_prilist(sc, chan);
+		mtx_unlock(&sc->vmbus_prichan_lock);
+	}
+
+	/*
+	 * NOTE:
+	 * The following processing order is critical:
+	 * Set the REVOKED state flag before orphaning the installed xact.
+	 */
+
+	if (atomic_testandset_int(&chan->ch_stflags,
+	    VMBUS_CHAN_ST_REVOKED_SHIFT))
+		panic("channel has already been revoked");
+
+	sx_xlock(&chan->ch_orphan_lock);
+	if (chan->ch_orphan_xact != NULL)
+		vmbus_xact_ctx_orphan(chan->ch_orphan_xact);
+	sx_xunlock(&chan->ch_orphan_lock);
+
+	if (bootverbose)
+		vmbus_chan_printf(chan, "chan%u revoked\n", note->chm_chanid);
+	vmbus_chan_detach(chan);
+}
+
+static int
+vmbus_chan_release(struct vmbus_channel *chan)
+{
+	struct vmbus_softc *sc = chan->ch_vmbus;
+	struct vmbus_chanmsg_chfree *req;
+	struct vmbus_msghc *mh;
+	int error;
+
+	mh = vmbus_msghc_get(sc, sizeof(*req));
+	if (mh == NULL) {
+		vmbus_chan_printf(chan,
+		    "can not get msg hypercall for chfree(chan%u)\n",
+		    chan->ch_id);
+		return (ENXIO);
+	}
+
+	req = vmbus_msghc_dataptr(mh);
+	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
+	req->chm_chanid = chan->ch_id;
+
+	error = vmbus_msghc_exec_noresult(mh);
+	vmbus_msghc_put(sc, mh);
+
+	if (error) {
+		vmbus_chan_printf(chan,
+		    "chfree(chan%u) msg hypercall exec failed: %d\n",
+		    chan->ch_id, error);
+	} else {
+		if (bootverbose)
+			vmbus_chan_printf(chan, "chan%u freed\n", chan->ch_id);
+	}
+	return (error);
+}
+
+static void
+vmbus_prichan_detach_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+
+	KASSERT(VMBUS_CHAN_ISPRIMARY(chan),
+	    ("chan%u is not primary channel", chan->ch_id));
+
+	/* Delete and detach the device associated with this channel. */
+	vmbus_delete_child(chan);
+
+	/* Release this channel (back to vmbus). */
+	vmbus_chan_release(chan);
+
+	/* Free this channel's resource. */
+	vmbus_chan_free(chan);
+}
+
+static void
+vmbus_subchan_detach_task(void *xchan, int pending __unused)
+{
+	struct vmbus_channel *chan = xchan;
+	struct vmbus_channel *pri_chan = chan->ch_prichan;
+
+	KASSERT(!VMBUS_CHAN_ISPRIMARY(chan),
+	    ("chan%u is primary channel", chan->ch_id));
+
+	/* Release this channel (back to vmbus). */
+	vmbus_chan_release(chan);
+
+	/* Unlink from its primary channel's sub-channel list. */
+	mtx_lock(&pri_chan->ch_subchan_lock);
+	vmbus_chan_rem_sublist(pri_chan, chan);
+	mtx_unlock(&pri_chan->ch_subchan_lock);
+	/* Notify anyone that is waiting for this sub-channel to vanish. */
+	wakeup(pri_chan);
+
+	/* Free this channel's resource. */
+	vmbus_chan_free(chan);
+}
+
+static void
+vmbus_prichan_attach_task(void *xchan, int pending __unused)
+{
+
+	/*
+	 * Add device for this primary channel.
+	 */
+	vmbus_add_child(xchan);
+}
+
+static void
+vmbus_subchan_attach_task(void *xchan __unused, int pending __unused)
+{
+
+	/* Nothing */
+}
+
+void
+vmbus_chan_destroy_all(struct vmbus_softc *sc)
+{
+
+	/*
+	 * Detach all devices and destroy the corresponding primary
+	 * channels.
+	 */
+	for (;;) {
+		struct vmbus_channel *chan;
+
+		mtx_lock(&sc->vmbus_chan_lock);
+		TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) {
+			if (VMBUS_CHAN_ISPRIMARY(chan))
+				break;
+		}
+		if (chan == NULL) {
+			/* No more primary channels; done. */
+			mtx_unlock(&sc->vmbus_chan_lock);
+			break;
+		}
+		vmbus_chan_rem_list(sc, chan);
+		mtx_unlock(&sc->vmbus_chan_lock);
+
+		mtx_lock(&sc->vmbus_prichan_lock);
+		vmbus_chan_rem_prilist(sc, chan);
+		mtx_unlock(&sc->vmbus_prichan_lock);
+
+		taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task);
+	}
+}
+
+struct vmbus_channel **
+vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt)
+{
+	struct vmbus_channel **ret, *chan;
+	int i;
+
+	KASSERT(subchan_cnt > 0, ("invalid sub-channel count %d", subchan_cnt));
+
+	ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP,
+	    M_WAITOK);
+
+	mtx_lock(&pri_chan->ch_subchan_lock);
+
+	while (pri_chan->ch_subchan_cnt < subchan_cnt)
+		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
+
+	i = 0;
+	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
+		/* TODO: refcnt chan */
+		ret[i] = chan;
+
+		++i;
+		if (i == subchan_cnt)
+			break;
+	}
+	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
+	    pri_chan->ch_subchan_cnt, subchan_cnt));
+
+	mtx_unlock(&pri_chan->ch_subchan_lock);
+
+	return ret;
+}
+
+void
+vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused)
+{
+
+	free(subchan, M_TEMP);
+}
+
+void
+vmbus_subchan_drain(struct vmbus_channel *pri_chan)
+{
+	mtx_lock(&pri_chan->ch_subchan_lock);
+	while (pri_chan->ch_subchan_cnt > 0)
+		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
+	mtx_unlock(&pri_chan->ch_subchan_lock);
+}
+
+void
+vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
+{
+	vmbus_chanmsg_proc_t msg_proc;
+	uint32_t msg_type;
+
+	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
+	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
+	    ("invalid message type %u", msg_type));
+
+	msg_proc = vmbus_chan_msgprocs[msg_type];
+	if (msg_proc != NULL)
+		msg_proc(sc, msg);
+}
+
+void
+vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on)
+{
+	if (!on)
+		chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD;
+	else
+		chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
+}
+
+uint32_t
+vmbus_chan_id(const struct vmbus_channel *chan)
+{
+	return chan->ch_id;
+}
+
+uint32_t
+vmbus_chan_subidx(const struct vmbus_channel *chan)
+{
+	return chan->ch_subidx;
+}
+
+bool
+vmbus_chan_is_primary(const struct vmbus_channel *chan)
+{
+	if (VMBUS_CHAN_ISPRIMARY(chan))
+		return true;
+	else
+		return false;
+}
+
+const struct hyperv_guid *
+vmbus_chan_guid_inst(const struct vmbus_channel *chan)
+{
+	return &chan->ch_guid_inst;
+}
+
+int
+vmbus_chan_prplist_nelem(int br_size, int prpcnt_max, int dlen_max)
+{
+	int elem_size;
+
+	elem_size = __offsetof(struct vmbus_chanpkt_prplist,
+	    cp_range[0].gpa_page[prpcnt_max]);
+	elem_size += dlen_max;
+	elem_size = VMBUS_CHANPKT_TOTLEN(elem_size);
+
+	return (vmbus_br_nelem(br_size, elem_size));
+}
+
+bool
+vmbus_chan_tx_empty(const struct vmbus_channel *chan)
+{
+
+	return (vmbus_txbr_empty(&chan->ch_txbr));
+}
+
+bool
+vmbus_chan_rx_empty(const struct vmbus_channel *chan)
+{
+
+	return (vmbus_rxbr_empty(&chan->ch_rxbr));
+}
+
+static int
+vmbus_chan_printf(const struct vmbus_channel *chan, const char *fmt, ...)
+{
+	va_list ap;
+	device_t dev;
+	int retval;
+
+	if (chan->ch_dev == NULL || !device_is_alive(chan->ch_dev))
+		dev = chan->ch_vmbus->vmbus_dev;
+	else
+		dev = chan->ch_dev;
+
+	retval = device_print_prettyname(dev);
+	va_start(ap, fmt);
+	retval += vprintf(fmt, ap);
+	va_end(ap);
+
+	return (retval);
+}
+
+void
+vmbus_chan_run_task(struct vmbus_channel *chan, struct task *task)
+{
+
+	taskqueue_enqueue(chan->ch_tq, task);
+	taskqueue_drain(chan->ch_tq, task);
+}
+
+struct taskqueue *
+vmbus_chan_mgmt_tq(const struct vmbus_channel *chan)
+{
+
+	return (chan->ch_mgmt_tq);
+}
+
+bool
+vmbus_chan_is_revoked(const struct vmbus_channel *chan)
+{
+
+	if (chan->ch_stflags & VMBUS_CHAN_ST_REVOKED)
+		return (true);
+	return (false);
+}
+
+void
+vmbus_chan_set_orphan(struct vmbus_channel *chan, struct vmbus_xact_ctx *xact)
+{
+
+	sx_xlock(&chan->ch_orphan_lock);
+	chan->ch_orphan_xact = xact;
+	sx_xunlock(&chan->ch_orphan_lock);
+}
+
+void
+vmbus_chan_unset_orphan(struct vmbus_channel *chan)
+{
+
+	sx_xlock(&chan->ch_orphan_lock);
+	chan->ch_orphan_xact = NULL;
+	sx_xunlock(&chan->ch_orphan_lock);
+}
+
+const void *
+vmbus_chan_xact_wait(const struct vmbus_channel *chan,
+    struct vmbus_xact *xact, size_t *resp_len, bool can_sleep)
+{
+	const void *ret;
+
+	if (can_sleep)
+		ret = vmbus_xact_wait(xact, resp_len);
+	else
+		ret = vmbus_xact_busywait(xact, resp_len);
+	if (vmbus_chan_is_revoked(chan)) {
+		/*
+		 * This xact probably is interrupted, and the
+		 * interruption can race the reply reception,
+		 * so we have to make sure that there are nothing
+		 * left on the RX bufring, i.e. this xact will
+		 * not be touched, once this function returns.
+		 *
+		 * Since the hypervisor will not put more data
+		 * onto the RX bufring once the channel is revoked,
+		 * the following loop will be terminated, once all
+		 * data are drained by the driver's channel
+		 * callback.
+		 */
+		while (!vmbus_chan_rx_empty(chan)) {
+			if (can_sleep)
+				pause("chxact", 1);
+			else
+				DELAY(1000);
+		}
+	}
+	return (ret);
+}
+
+void
+vmbus_chan_poll_enable(struct vmbus_channel *chan, u_int pollhz)
+{
+	struct vmbus_chan_pollarg arg;
+	struct task poll_cfg;
+
+	KASSERT(chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD,
+	    ("enable polling on non-batch chan%u", chan->ch_id));
+	KASSERT(pollhz >= VMBUS_CHAN_POLLHZ_MIN &&
+	    pollhz <= VMBUS_CHAN_POLLHZ_MAX, ("invalid pollhz %u", pollhz));
+
+	arg.poll_chan = chan;
+	arg.poll_hz = pollhz;
+	TASK_INIT(&poll_cfg, 0, vmbus_chan_pollcfg_task, &arg);
+	vmbus_chan_run_task(chan, &poll_cfg);
+}
+
+void
+vmbus_chan_poll_disable(struct vmbus_channel *chan)
+{
+	struct task poll_dis;
+
+	KASSERT(chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD,
+	    ("disable polling on non-batch chan%u", chan->ch_id));
+
+	TASK_INIT(&poll_dis, 0, vmbus_chan_polldis_task, chan);
+	vmbus_chan_run_task(chan, &poll_dis);
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_chan.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_chanvar.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_chanvar.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_chanvar.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,190 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_chanvar.h 310802 2016-12-30 02:18:34Z sephe $
+ */
+
+#ifndef _VMBUS_CHANVAR_H_
+#define _VMBUS_CHANVAR_H_
+
+#include <sys/param.h>
+#include <sys/callout.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/vmbus/vmbus_brvar.h>
+
+struct vmbus_channel {
+	/*
+	 * NOTE:
+	 * Fields before ch_txbr are only accessed on this channel's
+	 * target CPU.
+	 */
+	uint32_t			ch_flags;	/* VMBUS_CHAN_FLAG_ */
+	int				ch_poll_flags;	/* callout flags */
+
+	/*
+	 * RX bufring; immediately following ch_txbr.
+	 */
+	struct vmbus_rxbr		ch_rxbr;
+
+	struct taskqueue		*ch_tq;
+	struct task			ch_task;
+	struct task			ch_poll_task;
+	sbintime_t			ch_poll_intvl;
+	struct callout			ch_poll_timeo;
+	vmbus_chan_callback_t		ch_cb;
+	void				*ch_cbarg;
+
+	/*
+	 * TX bufring; at the beginning of ch_bufring.
+	 *
+	 * NOTE:
+	 * Put TX bufring and the following MNF/evtflag to a new
+	 * cacheline, since they will be accessed on all CPUs by
+	 * locking ch_txbr first.
+	 *
+	 * XXX
+	 * TX bufring and following MNF/evtflags do _not_ fit in
+	 * one 64B cacheline.
+	 */
+	struct vmbus_txbr		ch_txbr __aligned(CACHE_LINE_SIZE);
+	uint32_t			ch_txflags;	/* VMBUS_CHAN_TXF_ */
+
+	/*
+	 * These are based on the vmbus_chanmsg_choffer.chm_montrig.
+	 * Save it here for easy access.
+	 */
+	uint32_t			ch_montrig_mask;/* MNF trig mask */
+	volatile uint32_t		*ch_montrig;	/* MNF trigger loc. */
+
+	/*
+	 * These are based on the vmbus_chanmsg_choffer.chm_chanid.
+	 * Save it here for easy access.
+	 */
+	u_long				ch_evtflag_mask;/* event flag */
+	volatile u_long			*ch_evtflag;	/* event flag loc. */
+
+	/*
+	 * Rarely used fields.
+	 */
+
+	struct hyperv_mon_param		*ch_monprm;
+	struct hyperv_dma		ch_monprm_dma;
+
+	uint32_t			ch_id;		/* channel id */
+	device_t			ch_dev;
+	struct vmbus_softc		*ch_vmbus;
+
+	int				ch_cpuid;	/* owner cpu */
+	/*
+	 * Virtual cpuid for ch_cpuid; it is used to communicate cpuid
+	 * related information w/ Hyper-V.  If MSR_HV_VP_INDEX does not
+	 * exist, ch_vcpuid will always be 0 for compatibility.
+	 */
+	uint32_t			ch_vcpuid;
+
+	/*
+	 * If this is a primary channel, ch_subchan* fields
+	 * contain sub-channels belonging to this primary
+	 * channel.
+	 */
+	struct mtx			ch_subchan_lock;
+	TAILQ_HEAD(, vmbus_channel)	ch_subchans;
+	int				ch_subchan_cnt;
+
+	/* If this is a sub-channel */
+	TAILQ_ENTRY(vmbus_channel)	ch_sublink;	/* sub-channel link */
+	struct vmbus_channel		*ch_prichan;	/* owner primary chan */
+
+	void				*ch_bufring;	/* TX+RX bufrings */
+	struct hyperv_dma		ch_bufring_dma;
+	uint32_t			ch_bufring_gpadl;
+
+	struct task			ch_attach_task;	/* run in ch_mgmt_tq */
+	struct task			ch_detach_task;	/* run in ch_mgmt_tq */
+	struct taskqueue		*ch_mgmt_tq;
+
+	/* If this is a primary channel */
+	TAILQ_ENTRY(vmbus_channel)	ch_prilink;	/* primary chan link */
+
+	TAILQ_ENTRY(vmbus_channel)	ch_link;	/* channel link */
+	uint32_t			ch_subidx;	/* subchan index */
+	volatile uint32_t		ch_stflags;	/* atomic-op */
+							/* VMBUS_CHAN_ST_ */
+	struct hyperv_guid		ch_guid_type;
+	struct hyperv_guid		ch_guid_inst;
+
+	struct sx			ch_orphan_lock;
+	struct vmbus_xact_ctx		*ch_orphan_xact;
+
+	int				ch_refs;
+
+	struct sysctl_ctx_list		ch_sysctl_ctx;
+} __aligned(CACHE_LINE_SIZE);
+
+#define VMBUS_CHAN_ISPRIMARY(chan)	((chan)->ch_subidx == 0)
+
+/*
+ * If this flag is set, this channel's interrupt will be masked in ISR,
+ * and the RX bufring will be drained before this channel's interrupt is
+ * unmasked.
+ *
+ * This flag is turned on by default.  Drivers can turn it off according
+ * to their own requirement.
+ */
+#define VMBUS_CHAN_FLAG_BATCHREAD	0x0002
+
+#define VMBUS_CHAN_TXF_HASMNF		0x0001
+
+#define VMBUS_CHAN_ST_OPENED_SHIFT	0
+#define VMBUS_CHAN_ST_ONPRIL_SHIFT	1
+#define VMBUS_CHAN_ST_ONSUBL_SHIFT	2
+#define VMBUS_CHAN_ST_ONLIST_SHIFT	3
+#define VMBUS_CHAN_ST_REVOKED_SHIFT	4	/* sticky */
+#define VMBUS_CHAN_ST_OPENED		(1 << VMBUS_CHAN_ST_OPENED_SHIFT)
+#define VMBUS_CHAN_ST_ONPRIL		(1 << VMBUS_CHAN_ST_ONPRIL_SHIFT)
+#define VMBUS_CHAN_ST_ONSUBL		(1 << VMBUS_CHAN_ST_ONSUBL_SHIFT)
+#define VMBUS_CHAN_ST_ONLIST		(1 << VMBUS_CHAN_ST_ONLIST_SHIFT)
+#define VMBUS_CHAN_ST_REVOKED		(1 << VMBUS_CHAN_ST_REVOKED_SHIFT)
+
+struct vmbus_softc;
+struct vmbus_message;
+
+void		vmbus_event_proc(struct vmbus_softc *, int);
+void		vmbus_event_proc_compat(struct vmbus_softc *, int);
+void		vmbus_chan_msgproc(struct vmbus_softc *,
+		    const struct vmbus_message *);
+void		vmbus_chan_destroy_all(struct vmbus_softc *);
+
+#endif	/* !_VMBUS_CHANVAR_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_chanvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_et.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_et.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_et.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,204 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015,2016-2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_et.c 324461 2017-10-10 02:22:34Z sephe $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+#include <sys/timeet.h>
+
+#include <machine/cpu.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/vmbus/hyperv_reg.h>
+#include <dev/hyperv/vmbus/hyperv_var.h>
+#include <dev/hyperv/vmbus/vmbus_var.h>
+
+#define VMBUS_ET_NAME			"hvet"
+
+#define MSR_HV_STIMER0_CFG_SINT		\
+	((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \
+	 MSR_HV_STIMER_CFG_SINT_MASK)
+
+/*
+ * Additionally required feature:
+ * - SynIC is needed for interrupt generation.
+ */
+#define CPUID_HV_ET_MASK		(CPUID_HV_MSR_SYNIC |		\
+					 CPUID_HV_MSR_SYNTIMER)
+
+static void			vmbus_et_identify(driver_t *, device_t);
+static int			vmbus_et_probe(device_t);
+static int			vmbus_et_attach(device_t);
+static int			vmbus_et_detach(device_t);
+static int			vmbus_et_start(struct eventtimer *, sbintime_t,
+				    sbintime_t);
+
+static struct eventtimer	vmbus_et;
+
+static device_method_t vmbus_et_methods[] = {
+	DEVMETHOD(device_identify,	vmbus_et_identify),
+	DEVMETHOD(device_probe,		vmbus_et_probe),
+	DEVMETHOD(device_attach,	vmbus_et_attach),
+	DEVMETHOD(device_detach,	vmbus_et_detach),
+
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_et_driver = {
+	VMBUS_ET_NAME,
+	vmbus_et_methods,
+	0
+};
+
+static devclass_t vmbus_et_devclass;
+
+DRIVER_MODULE(hv_et, vmbus, vmbus_et_driver, vmbus_et_devclass, NULL, NULL);
+MODULE_VERSION(hv_et, 1);
+
+static __inline uint64_t
+hyperv_sbintime2count(sbintime_t time)
+{
+	struct timespec val;
+
+	val = sbttots(time);
+	return (val.tv_sec * HYPERV_TIMER_FREQ) +
+	    (val.tv_nsec / HYPERV_TIMER_NS_FACTOR);
+}
+
+static int
+vmbus_et_start(struct eventtimer *et __unused, sbintime_t first,
+    sbintime_t period __unused)
+{
+	uint64_t current;
+
+	current = hyperv_tc64();
+	current += hyperv_sbintime2count(first);
+	wrmsr(MSR_HV_STIMER0_COUNT, current);
+
+	return (0);
+}
+
+void
+vmbus_et_intr(struct trapframe *frame)
+{
+	struct trapframe *oldframe;
+	struct thread *td;
+
+	if (vmbus_et.et_active) {
+		td = curthread;
+		td->td_intr_nesting_level++;
+		oldframe = td->td_intr_frame;
+		td->td_intr_frame = frame;
+		vmbus_et.et_event_cb(&vmbus_et, vmbus_et.et_arg);
+		td->td_intr_frame = oldframe;
+		td->td_intr_nesting_level--;
+	}
+}
+
+static void
+vmbus_et_identify(driver_t *driver, device_t parent)
+{
+	if (device_get_unit(parent) != 0 ||
+	    device_find_child(parent, VMBUS_ET_NAME, -1) != NULL ||
+	    (hyperv_features & CPUID_HV_ET_MASK) != CPUID_HV_ET_MASK ||
+	    hyperv_tc64 == NULL)
+		return;
+
+	device_add_child(parent, VMBUS_ET_NAME, -1);
+}
+
+static int
+vmbus_et_probe(device_t dev)
+{
+	if (resource_disabled(VMBUS_ET_NAME, 0))
+		return (ENXIO);
+
+	device_set_desc(dev, "Hyper-V event timer");
+
+	return (BUS_PROBE_NOWILDCARD);
+}
+
+static void
+vmbus_et_config(void *arg __unused)
+{
+	/*
+	 * Make sure that STIMER0 is really disabled before writing
+	 * to STIMER0_CONFIG.
+	 *
+	 * "Writing to the configuration register of a timer that
+	 *  is already enabled may result in undefined behaviour."
+	 */
+	for (;;) {
+		uint64_t val;
+
+		/* Stop counting, and this also implies disabling STIMER0 */
+		wrmsr(MSR_HV_STIMER0_COUNT, 0);
+
+		val = rdmsr(MSR_HV_STIMER0_CONFIG);
+		if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0)
+			break;
+		cpu_spinwait();
+	}
+	wrmsr(MSR_HV_STIMER0_CONFIG,
+	    MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT);
+}
+
+static int
+vmbus_et_attach(device_t dev)
+{
+	/* TODO: use independent IDT vector */
+
+	vmbus_et.et_name = "Hyper-V";
+	vmbus_et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
+	vmbus_et.et_quality = 1000;
+	vmbus_et.et_frequency = HYPERV_TIMER_FREQ;
+	vmbus_et.et_min_period = (0x00000001ULL << 32) / HYPERV_TIMER_FREQ;
+	vmbus_et.et_max_period = (0xfffffffeULL << 32) / HYPERV_TIMER_FREQ;
+	vmbus_et.et_start = vmbus_et_start;
+
+	/*
+	 * Delay a bit to make sure that hyperv_tc64 will not return 0,
+	 * since writing 0 to STIMER0_COUNT will disable STIMER0.
+	 */
+	DELAY(100);
+	smp_rendezvous(NULL, vmbus_et_config, NULL, NULL);
+
+	return (et_register(&vmbus_et));
+}
+
+static int
+vmbus_et_detach(device_t dev)
+{
+	return (et_deregister(&vmbus_et));
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_et.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_if.m
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_if.m	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_if.m	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,61 @@
+/* $MidnightBSD$ */
+#-
+# Copyright (c) 2016 Microsoft Corp.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice unmodified, this list of conditions, and the following
+#    disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# $FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_if.m 310768 2016-12-29 09:02:49Z sephe $
+#
+
+#include <sys/param.h>
+#include <sys/bus.h>
+
+INTERFACE vmbus;
+
+HEADER {
+	struct hyperv_guid;
+	struct taskqueue;
+};
+
+METHOD uint32_t get_version {
+	device_t bus;
+	device_t dev;
+};
+
+METHOD int probe_guid {
+	device_t bus;
+	device_t dev;
+	const struct hyperv_guid *guid;
+};
+
+METHOD uint32_t get_vcpu_id {
+	device_t bus;
+	device_t dev;
+	int cpu;
+};
+
+METHOD struct taskqueue * get_event_taskq {
+	device_t bus;
+	device_t dev;
+	int cpu;
+};


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_reg.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_reg.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,337 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_reg.h 307198 2016-10-13 07:42:44Z sephe $
+ */
+
+#ifndef _VMBUS_REG_H_
+#define _VMBUS_REG_H_
+
+#include <sys/param.h>
+#include <dev/hyperv/include/hyperv.h> /* XXX for hyperv_guid */
+#include <dev/hyperv/include/vmbus.h>
+#include <dev/hyperv/vmbus/hyperv_reg.h>
+
+/*
+ * Hyper-V SynIC message format.
+ */
+
+#define VMBUS_MSG_DSIZE_MAX		240
+#define VMBUS_MSG_SIZE			256
+
+struct vmbus_message {
+	uint32_t	msg_type;	/* HYPERV_MSGTYPE_ */
+	uint8_t		msg_dsize;	/* data size */
+	uint8_t		msg_flags;	/* VMBUS_MSGFLAG_ */
+	uint16_t	msg_rsvd;
+	uint64_t	msg_id;
+	uint8_t		msg_data[VMBUS_MSG_DSIZE_MAX];
+} __packed;
+CTASSERT(sizeof(struct vmbus_message) == VMBUS_MSG_SIZE);
+
+#define VMBUS_MSGFLAG_PENDING		0x01
+
+/*
+ * Hyper-V SynIC event flags
+ */
+
+#ifdef __LP64__
+#define VMBUS_EVTFLAGS_MAX	32
+#define VMBUS_EVTFLAG_SHIFT	6
+#else
+#define VMBUS_EVTFLAGS_MAX	64
+#define VMBUS_EVTFLAG_SHIFT	5
+#endif
+#define VMBUS_EVTFLAG_LEN	(1 << VMBUS_EVTFLAG_SHIFT)
+#define VMBUS_EVTFLAG_MASK	(VMBUS_EVTFLAG_LEN - 1)
+#define VMBUS_EVTFLAGS_SIZE	256
+
+struct vmbus_evtflags {
+	u_long		evt_flags[VMBUS_EVTFLAGS_MAX];
+} __packed;
+CTASSERT(sizeof(struct vmbus_evtflags) == VMBUS_EVTFLAGS_SIZE);
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+
+struct vmbus_mon_trig {
+	uint32_t	mt_pending;
+	uint32_t	mt_armed;
+} __packed;
+
+#define VMBUS_MONTRIGS_MAX	4
+#define VMBUS_MONTRIG_LEN	32
+
+struct vmbus_mnf {
+	uint32_t	mnf_state;
+	uint32_t	mnf_rsvd1;
+
+	struct vmbus_mon_trig mnf_trigs[VMBUS_MONTRIGS_MAX];
+	uint8_t		mnf_rsvd2[536];
+
+	uint16_t	mnf_lat[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+	uint8_t		mnf_rsvd3[256];
+
+	struct hyperv_mon_param
+			mnf_param[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+	uint8_t		mnf_rsvd4[1984];
+} __packed;
+CTASSERT(sizeof(struct vmbus_mnf) == PAGE_SIZE);
+
+/*
+ * Buffer ring
+ */
+struct vmbus_bufring {
+	/*
+	 * If br_windex == br_rindex, this bufring is empty; this
+	 * means we can _not_ write data to the bufring, if the
+	 * write is going to make br_windex same as br_rindex.
+	 */
+	volatile uint32_t	br_windex;
+	volatile uint32_t	br_rindex;
+
+	/*
+	 * Interrupt mask {0,1}
+	 *
+	 * For TX bufring, host set this to 1, when it is processing
+	 * the TX bufring, so that we can safely skip the TX event
+	 * notification to host.
+	 *
+	 * For RX bufring, once this is set to 1 by us, host will not
+	 * further dispatch interrupts to us, even if there are data
+	 * pending on the RX bufring.  This effectively disables the
+	 * interrupt of the channel to which this RX bufring is attached.
+	 */
+	volatile uint32_t	br_imask;
+
+	uint8_t			br_rsvd[4084];
+	uint8_t			br_data[];
+} __packed;
+CTASSERT(sizeof(struct vmbus_bufring) == PAGE_SIZE);
+
+/*
+ * Channel
+ */
+
+#define VMBUS_CHAN_MAX_COMPAT	256
+#define VMBUS_CHAN_MAX		(VMBUS_EVTFLAG_LEN * VMBUS_EVTFLAGS_MAX)
+
+/*
+ * Channel packets
+ */
+
+#define VMBUS_CHANPKT_SIZE_ALIGN	(1 << VMBUS_CHANPKT_SIZE_SHIFT)
+
+#define VMBUS_CHANPKT_SETLEN(pktlen, len)		\
+do {							\
+	(pktlen) = (len) >> VMBUS_CHANPKT_SIZE_SHIFT;	\
+} while (0)
+
+#define VMBUS_CHANPKT_TOTLEN(tlen)	\
+	roundup2((tlen), VMBUS_CHANPKT_SIZE_ALIGN)
+
+#define VMBUS_CHANPKT_HLEN_MIN		\
+	(sizeof(struct vmbus_chanpkt_hdr) >> VMBUS_CHANPKT_SIZE_SHIFT)
+
+struct vmbus_chanpkt {
+	struct vmbus_chanpkt_hdr cp_hdr;
+} __packed;
+
+struct vmbus_chanpkt_sglist {
+	struct vmbus_chanpkt_hdr cp_hdr;
+	uint32_t	cp_rsvd;
+	uint32_t	cp_gpa_cnt;
+	struct vmbus_gpa cp_gpa[];
+} __packed;
+
+struct vmbus_chanpkt_prplist {
+	struct vmbus_chanpkt_hdr cp_hdr;
+	uint32_t	cp_rsvd;
+	uint32_t	cp_range_cnt;
+	struct vmbus_gpa_range cp_range[];
+} __packed;
+
+/*
+ * Channel messages
+ * - Embedded in vmbus_message.msg_data, e.g. response and notification.
+ * - Embedded in hypercall_postmsg_in.hc_data, e.g. request.
+ */
+
+#define VMBUS_CHANMSG_TYPE_CHOFFER		1	/* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHRESCIND		2	/* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHREQUEST		3	/* REQ */
+#define VMBUS_CHANMSG_TYPE_CHOFFER_DONE		4	/* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHOPEN		5	/* REQ */
+#define VMBUS_CHANMSG_TYPE_CHOPEN_RESP		6	/* RESP */
+#define VMBUS_CHANMSG_TYPE_CHCLOSE		7	/* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_CONN		8	/* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_SUBCONN	9	/* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_CONNRESP	10	/* RESP */
+#define VMBUS_CHANMSG_TYPE_GPADL_DISCONN	11	/* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_DISCONNRESP	12	/* RESP */
+#define VMBUS_CHANMSG_TYPE_CHFREE		13	/* REQ */
+#define VMBUS_CHANMSG_TYPE_CONNECT		14	/* REQ */
+#define VMBUS_CHANMSG_TYPE_CONNECT_RESP		15	/* RESP */
+#define VMBUS_CHANMSG_TYPE_DISCONNECT		16	/* REQ */
+#define VMBUS_CHANMSG_TYPE_MAX			22
+
+struct vmbus_chanmsg_hdr {
+	uint32_t	chm_type;	/* VMBUS_CHANMSG_TYPE_ */
+	uint32_t	chm_rsvd;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CONNECT */
+struct vmbus_chanmsg_connect {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_ver;
+	uint32_t	chm_rsvd;
+	uint64_t	chm_evtflags;
+	uint64_t	chm_mnf1;
+	uint64_t	chm_mnf2;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CONNECT_RESP */
+struct vmbus_chanmsg_connect_resp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint8_t		chm_done;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHREQUEST */
+struct vmbus_chanmsg_chrequest {
+	struct vmbus_chanmsg_hdr chm_hdr;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_DISCONNECT */
+struct vmbus_chanmsg_disconnect {
+	struct vmbus_chanmsg_hdr chm_hdr;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOPEN */
+struct vmbus_chanmsg_chopen {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_openid;
+	uint32_t	chm_gpadl;
+	uint32_t	chm_vcpuid;
+	uint32_t	chm_txbr_pgcnt;
+#define VMBUS_CHANMSG_CHOPEN_UDATA_SIZE	120
+	uint8_t		chm_udata[VMBUS_CHANMSG_CHOPEN_UDATA_SIZE];
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOPEN_RESP */
+struct vmbus_chanmsg_chopen_resp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_openid;
+	uint32_t	chm_status;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_GPADL_CONN */
+struct vmbus_chanmsg_gpadl_conn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+	uint16_t	chm_range_len;
+	uint16_t	chm_range_cnt;
+	struct vmbus_gpa_range chm_range;
+} __packed;
+
+#define VMBUS_CHANMSG_GPADL_CONN_PGMAX		26
+CTASSERT(__offsetof(struct vmbus_chanmsg_gpadl_conn,
+    chm_range.gpa_page[VMBUS_CHANMSG_GPADL_CONN_PGMAX]) <=
+    HYPERCALL_POSTMSGIN_DSIZE_MAX);
+
+/* VMBUS_CHANMSG_TYPE_GPADL_SUBCONN */
+struct vmbus_chanmsg_gpadl_subconn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_msgno;
+	uint32_t	chm_gpadl;
+	uint64_t	chm_gpa_page[];
+} __packed;
+
+#define VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX	28
+CTASSERT(__offsetof(struct vmbus_chanmsg_gpadl_subconn,
+    chm_gpa_page[VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX]) <=
+    HYPERCALL_POSTMSGIN_DSIZE_MAX);
+
+/* VMBUS_CHANMSG_TYPE_GPADL_CONNRESP */
+struct vmbus_chanmsg_gpadl_connresp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+	uint32_t	chm_status;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHCLOSE */
+struct vmbus_chanmsg_chclose {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_GPADL_DISCONN */
+struct vmbus_chanmsg_gpadl_disconn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHFREE */
+struct vmbus_chanmsg_chfree {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHRESCIND */
+struct vmbus_chanmsg_chrescind {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOFFER */
+struct vmbus_chanmsg_choffer {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	struct hyperv_guid chm_chtype;
+	struct hyperv_guid chm_chinst;
+	uint64_t	chm_chlat;	/* unit: 100ns */
+	uint32_t	chm_chrev;
+	uint32_t	chm_svrctx_sz;
+	uint16_t	chm_chflags;
+	uint16_t	chm_mmio_sz;	/* unit: MB */
+	uint8_t		chm_udata[120];
+	uint16_t	chm_subidx;
+	uint16_t	chm_rsvd;
+	uint32_t	chm_chanid;
+	uint8_t		chm_montrig;
+	uint8_t		chm_flags1;	/* VMBUS_CHOFFER_FLAG1_ */
+	uint16_t	chm_flags2;
+	uint32_t	chm_connid;
+} __packed;
+CTASSERT(sizeof(struct vmbus_chanmsg_choffer) <= VMBUS_MSG_DSIZE_MAX);
+
+#define VMBUS_CHOFFER_FLAG1_HASMNF	0x01
+
+#endif	/* !_VMBUS_REG_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_reg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_res.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_res.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_res.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,99 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2017 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_res.c 318393 2017-05-17 02:40:06Z sephe $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include <dev/hyperv/include/hyperv.h>
+
+#include "acpi_if.h"
+#include "bus_if.h"
+
+static int		vmbus_res_probe(device_t);
+static int		vmbus_res_attach(device_t);
+static int		vmbus_res_detach(device_t);
+
+static device_method_t vmbus_res_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,			vmbus_res_probe),
+	DEVMETHOD(device_attach,		vmbus_res_attach),
+	DEVMETHOD(device_detach,		vmbus_res_detach),
+	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
+	DEVMETHOD(device_suspend,		bus_generic_suspend),
+	DEVMETHOD(device_resume,		bus_generic_resume),
+
+	DEVMETHOD_END
+};
+
+static driver_t vmbus_res_driver = {
+	"vmbus_res",
+	vmbus_res_methods,
+	1
+};
+
+static devclass_t vmbus_res_devclass;
+
+DRIVER_MODULE(vmbus_res, acpi, vmbus_res_driver, vmbus_res_devclass,
+    NULL, NULL);
+MODULE_DEPEND(vmbus_res, acpi, 1, 1, 1);
+MODULE_VERSION(vmbus_res, 1);
+
+static int
+vmbus_res_probe(device_t dev)
+{
+	char *id[] = { "VMBUS", NULL };
+
+	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
+	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
+	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
+		return (ENXIO);
+
+	device_set_desc(dev, "Hyper-V Vmbus Resource");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+vmbus_res_attach(device_t dev __unused)
+{
+
+	return (0);
+}
+
+static int
+vmbus_res_detach(device_t dev __unused)
+{
+
+	return (0);
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_res.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_var.h
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_var.h	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_var.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,173 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_var.h 311257 2017-01-04 05:41:47Z sephe $
+ */
+
+#ifndef _VMBUS_VAR_H_
+#define _VMBUS_VAR_H_
+
+#include <sys/param.h>
+#include <sys/taskqueue.h>
+#include <sys/rman.h>
+
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcib_private.h>
+
+/*
+ * NOTE: DO NOT CHANGE THIS.
+ */
+#define VMBUS_SINT_MESSAGE	2
+/*
+ * NOTE:
+ * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
+ * - DO NOT set it to 0.
+ */
+#define VMBUS_SINT_TIMER	4
+
+/*
+ * NOTE: DO NOT CHANGE THESE
+ */
+#define VMBUS_CONNID_MESSAGE		1
+#define VMBUS_CONNID_EVENT		2
+
+struct vmbus_message;
+struct vmbus_softc;
+
+typedef void		(*vmbus_chanmsg_proc_t)(struct vmbus_softc *,
+			    const struct vmbus_message *);
+
+#define VMBUS_CHANMSG_PROC(name, func)	\
+	[VMBUS_CHANMSG_TYPE_##name] = func
+#define VMBUS_CHANMSG_PROC_WAKEUP(name)	\
+	VMBUS_CHANMSG_PROC(name, vmbus_msghc_wakeup)
+
+struct vmbus_pcpu_data {
+	u_long			*intr_cnt;	/* Hyper-V interrupt counter */
+	struct vmbus_message	*message;	/* shared messages */
+	uint32_t		vcpuid;		/* virtual cpuid */
+	int			event_flags_cnt;/* # of event flags */
+	struct vmbus_evtflags	*event_flags;	/* event flags from host */
+
+	/* Rarely used fields */
+	struct hyperv_dma	message_dma;	/* busdma glue */
+	struct hyperv_dma	event_flags_dma;/* busdma glue */
+	struct taskqueue	*event_tq;	/* event taskq */
+	struct taskqueue	*message_tq;	/* message taskq */
+	struct task		message_task;	/* message task */
+} __aligned(CACHE_LINE_SIZE);
+
+#if __FreeBSD_version < 1100000
+typedef u_long rman_res_t;
+#endif
+
+struct vmbus_softc {
+	void			(*vmbus_event_proc)(struct vmbus_softc *, int);
+	u_long			*vmbus_tx_evtflags;
+						/* event flags to host */
+	struct vmbus_mnf	*vmbus_mnf2;	/* monitored by host */
+
+	u_long			*vmbus_rx_evtflags;
+						/* compat evtflgs from host */
+	struct vmbus_channel *volatile *vmbus_chmap;
+	struct vmbus_xact_ctx	*vmbus_xc;
+	struct vmbus_pcpu_data	vmbus_pcpu[MAXCPU];
+
+	/*
+	 * Rarely used fields
+	 */
+
+	device_t		vmbus_dev;
+	int			vmbus_idtvec;
+	uint32_t		vmbus_flags;	/* see VMBUS_FLAG_ */
+	uint32_t		vmbus_version;
+	uint32_t		vmbus_gpadl;
+
+	/* Shared memory for vmbus_{rx,tx}_evtflags */
+	void			*vmbus_evtflags;
+	struct hyperv_dma	vmbus_evtflags_dma;
+
+	void			*vmbus_mnf1;	/* monitored by VM, unused */
+	struct hyperv_dma	vmbus_mnf1_dma;
+	struct hyperv_dma	vmbus_mnf2_dma;
+
+	bool			vmbus_scandone;
+	struct task		vmbus_scandone_task;
+
+	struct taskqueue	*vmbus_devtq;	/* for dev attach/detach */
+	struct taskqueue	*vmbus_subchtq;	/* for sub-chan attach/detach */
+
+	/* Primary channels */
+	struct mtx		vmbus_prichan_lock;
+	TAILQ_HEAD(, vmbus_channel) vmbus_prichans;
+
+	/* Complete channel list */
+	struct mtx		vmbus_chan_lock;
+	TAILQ_HEAD(, vmbus_channel) vmbus_chans;
+
+#ifdef NEW_PCIB
+	/* The list of usable MMIO ranges for PCIe pass-through */
+	struct pcib_host_resources vmbus_mmio_res;
+#endif
+};
+
+#define VMBUS_FLAG_ATTACHED	0x0001	/* vmbus was attached */
+#define VMBUS_FLAG_SYNIC	0x0002	/* SynIC was setup */
+
+#define VMBUS_PCPU_GET(sc, field, cpu)	(sc)->vmbus_pcpu[(cpu)].field
+#define VMBUS_PCPU_PTR(sc, field, cpu)	&(sc)->vmbus_pcpu[(cpu)].field
+
+struct vmbus_channel;
+struct trapframe;
+struct vmbus_message;
+struct vmbus_msghc;
+
+void		vmbus_handle_intr(struct trapframe *);
+int		vmbus_add_child(struct vmbus_channel *);
+int		vmbus_delete_child(struct vmbus_channel *);
+void		vmbus_et_intr(struct trapframe *);
+uint32_t	vmbus_gpadl_alloc(struct vmbus_softc *);
+
+struct vmbus_msghc *
+		vmbus_msghc_get(struct vmbus_softc *, size_t);
+void		vmbus_msghc_put(struct vmbus_softc *, struct vmbus_msghc *);
+void		*vmbus_msghc_dataptr(struct vmbus_msghc *);
+int		vmbus_msghc_exec_noresult(struct vmbus_msghc *);
+int		vmbus_msghc_exec(struct vmbus_softc *, struct vmbus_msghc *);
+void		vmbus_msghc_exec_cancel(struct vmbus_softc *,
+		    struct vmbus_msghc *);
+const struct vmbus_message *
+		vmbus_msghc_wait_result(struct vmbus_softc *,
+		    struct vmbus_msghc *);
+const struct vmbus_message *
+		vmbus_msghc_poll_result(struct vmbus_softc *,
+		    struct vmbus_msghc *);
+void		vmbus_msghc_wakeup(struct vmbus_softc *,
+		    const struct vmbus_message *);
+void		vmbus_msghc_reset(struct vmbus_msghc *, size_t);
+
+#endif	/* !_VMBUS_VAR_H_ */


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_var.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/hyperv/vmbus/vmbus_xact.c
===================================================================
--- trunk/sys/dev/hyperv/vmbus/vmbus_xact.c	                        (rev 0)
+++ trunk/sys/dev/hyperv/vmbus/vmbus_xact.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,443 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus_xact.c 310761 2016-12-29 07:27:13Z sephe $");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+
+#include <dev/hyperv/include/hyperv_busdma.h>
+#include <dev/hyperv/include/vmbus_xact.h>
+
+struct vmbus_xact {
+	struct vmbus_xact_ctx		*x_ctx;
+	void				*x_priv;
+
+	void				*x_req;
+	struct hyperv_dma		x_req_dma;
+
+	const void			*x_resp;
+	size_t				x_resp_len;
+	void				*x_resp0;
+};
+
+struct vmbus_xact_ctx {
+	size_t				xc_req_size;
+	size_t				xc_resp_size;
+	size_t				xc_priv_size;
+
+	struct mtx			xc_lock;
+	/*
+	 * Protected by xc_lock.
+	 */
+	uint32_t			xc_flags;	/* VMBUS_XACT_CTXF_ */
+	struct vmbus_xact		*xc_free;
+	struct vmbus_xact		*xc_active;
+	struct vmbus_xact		*xc_orphan;
+};
+
+#define VMBUS_XACT_CTXF_DESTROY		0x0001
+
+static struct vmbus_xact	*vmbus_xact_alloc(struct vmbus_xact_ctx *,
+				    bus_dma_tag_t);
+static void			vmbus_xact_free(struct vmbus_xact *);
+static struct vmbus_xact	*vmbus_xact_get1(struct vmbus_xact_ctx *,
+				    uint32_t);
+static const void		*vmbus_xact_wait1(struct vmbus_xact *, size_t *,
+				    bool);
+static const void		*vmbus_xact_return(struct vmbus_xact *,
+				    size_t *);
+static void			vmbus_xact_save_resp(struct vmbus_xact *,
+				    const void *, size_t);
+static void			vmbus_xact_ctx_free(struct vmbus_xact_ctx *);
+
+static struct vmbus_xact *
+vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag)
+{
+	struct vmbus_xact *xact;
+
+	xact = malloc(sizeof(*xact), M_DEVBUF, M_WAITOK | M_ZERO);
+	xact->x_ctx = ctx;
+
+	/* XXX assume that page aligned is enough */
+	xact->x_req = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
+	    ctx->xc_req_size, &xact->x_req_dma, BUS_DMA_WAITOK);
+	if (xact->x_req == NULL) {
+		free(xact, M_DEVBUF);
+		return (NULL);
+	}
+	if (ctx->xc_priv_size != 0)
+		xact->x_priv = malloc(ctx->xc_priv_size, M_DEVBUF, M_WAITOK);
+	xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK);
+
+	return (xact);
+}
+
+static void
+vmbus_xact_free(struct vmbus_xact *xact)
+{
+
+	hyperv_dmamem_free(&xact->x_req_dma, xact->x_req);
+	free(xact->x_resp0, M_DEVBUF);
+	if (xact->x_priv != NULL)
+		free(xact->x_priv, M_DEVBUF);
+	free(xact, M_DEVBUF);
+}
+
+static struct vmbus_xact *
+vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag)
+{
+	struct vmbus_xact *xact;
+
+	mtx_lock(&ctx->xc_lock);
+
+	while ((ctx->xc_flags & dtor_flag) == 0 && ctx->xc_free == NULL)
+		mtx_sleep(&ctx->xc_free, &ctx->xc_lock, 0, "gxact", 0);
+	if (ctx->xc_flags & dtor_flag) {
+		/* Being destroyed */
+		xact = NULL;
+	} else {
+		xact = ctx->xc_free;
+		KASSERT(xact != NULL, ("no free xact"));
+		KASSERT(xact->x_resp == NULL, ("xact has pending response"));
+		ctx->xc_free = NULL;
+	}
+
+	mtx_unlock(&ctx->xc_lock);
+
+	return (xact);
+}
+
+struct vmbus_xact_ctx *
+vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size,
+    size_t priv_size)
+{
+	struct vmbus_xact_ctx *ctx;
+
+	KASSERT(req_size > 0, ("request size is 0"));
+	KASSERT(resp_size > 0, ("response size is 0"));
+
+	ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO);
+	ctx->xc_req_size = req_size;
+	ctx->xc_resp_size = resp_size;
+	ctx->xc_priv_size = priv_size;
+
+	ctx->xc_free = vmbus_xact_alloc(ctx, dtag);
+	if (ctx->xc_free == NULL) {
+		free(ctx, M_DEVBUF);
+		return (NULL);
+	}
+
+	mtx_init(&ctx->xc_lock, "vmbus xact", NULL, MTX_DEF);
+
+	return (ctx);
+}
+
+bool
+vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx)
+{
+	mtx_lock(&ctx->xc_lock);
+	if (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) {
+		mtx_unlock(&ctx->xc_lock);
+		return (false);
+	}
+	ctx->xc_flags |= VMBUS_XACT_CTXF_DESTROY;
+	mtx_unlock(&ctx->xc_lock);
+
+	wakeup(&ctx->xc_free);
+	wakeup(&ctx->xc_active);
+
+	ctx->xc_orphan = vmbus_xact_get1(ctx, 0);
+	if (ctx->xc_orphan == NULL)
+		panic("can't get xact");
+	return (true);
+}
+
+static void
+vmbus_xact_ctx_free(struct vmbus_xact_ctx *ctx)
+{
+	KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
+	    ("xact ctx was not orphaned"));
+	KASSERT(ctx->xc_orphan != NULL, ("no orphaned xact"));
+
+	vmbus_xact_free(ctx->xc_orphan);
+	mtx_destroy(&ctx->xc_lock);
+	free(ctx, M_DEVBUF);
+}
+
+void
+vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx)
+{
+
+	vmbus_xact_ctx_orphan(ctx);
+	vmbus_xact_ctx_free(ctx);
+}
+
+struct vmbus_xact *
+vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len)
+{
+	struct vmbus_xact *xact;
+
+	if (req_len > ctx->xc_req_size)
+		panic("invalid request size %zu", req_len);
+
+	xact = vmbus_xact_get1(ctx, VMBUS_XACT_CTXF_DESTROY);
+	if (xact == NULL)
+		return (NULL);
+
+	memset(xact->x_req, 0, req_len);
+	return (xact);
+}
+
+void
+vmbus_xact_put(struct vmbus_xact *xact)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+
+	KASSERT(ctx->xc_active == NULL, ("pending active xact"));
+	xact->x_resp = NULL;
+
+	mtx_lock(&ctx->xc_lock);
+	KASSERT(ctx->xc_free == NULL, ("has free xact"));
+	ctx->xc_free = xact;
+	mtx_unlock(&ctx->xc_lock);
+	wakeup(&ctx->xc_free);
+}
+
+void *
+vmbus_xact_req_data(const struct vmbus_xact *xact)
+{
+
+	return (xact->x_req);
+}
+
+bus_addr_t
+vmbus_xact_req_paddr(const struct vmbus_xact *xact)
+{
+
+	return (xact->x_req_dma.hv_paddr);
+}
+
+void *
+vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len)
+{
+
+	if (priv_len > xact->x_ctx->xc_priv_size)
+		panic("invalid priv size %zu", priv_len);
+	return (xact->x_priv);
+}
+
+void
+vmbus_xact_activate(struct vmbus_xact *xact)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+
+	KASSERT(xact->x_resp == NULL, ("xact has pending response"));
+
+	mtx_lock(&ctx->xc_lock);
+	KASSERT(ctx->xc_active == NULL, ("pending active xact"));
+	ctx->xc_active = xact;
+	mtx_unlock(&ctx->xc_lock);
+}
+
+void
+vmbus_xact_deactivate(struct vmbus_xact *xact)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+
+	mtx_lock(&ctx->xc_lock);
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	ctx->xc_active = NULL;
+	mtx_unlock(&ctx->xc_lock);
+}
+
+static const void *
+vmbus_xact_return(struct vmbus_xact *xact, size_t *resp_len)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	const void *resp;
+
+	mtx_assert(&ctx->xc_lock, MA_OWNED);
+	KASSERT(ctx->xc_active == xact, ("xact trashed"));
+
+	if ((ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) && xact->x_resp == NULL) {
+		uint8_t b = 0;
+
+		/*
+		 * Orphaned and no response was received yet; fake up
+		 * an one byte response.
+		 */
+		printf("vmbus: xact ctx was orphaned w/ pending xact\n");
+		vmbus_xact_save_resp(ctx->xc_active, &b, sizeof(b));
+	}
+	KASSERT(xact->x_resp != NULL, ("no response"));
+
+	ctx->xc_active = NULL;
+
+	resp = xact->x_resp;
+	*resp_len = xact->x_resp_len;
+
+	return (resp);
+}
+
+static const void *
+vmbus_xact_wait1(struct vmbus_xact *xact, size_t *resp_len,
+    bool can_sleep)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	const void *resp;
+
+	mtx_lock(&ctx->xc_lock);
+
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	while (xact->x_resp == NULL &&
+	    (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) {
+		if (can_sleep) {
+			mtx_sleep(&ctx->xc_active, &ctx->xc_lock, 0,
+			    "wxact", 0);
+		} else {
+			mtx_unlock(&ctx->xc_lock);
+			DELAY(1000);
+			mtx_lock(&ctx->xc_lock);
+		}
+	}
+	resp = vmbus_xact_return(xact, resp_len);
+
+	mtx_unlock(&ctx->xc_lock);
+
+	return (resp);
+}
+
+const void *
+vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len)
+{
+
+	return (vmbus_xact_wait1(xact, resp_len, true /* can sleep */));
+}
+
+const void *
+vmbus_xact_busywait(struct vmbus_xact *xact, size_t *resp_len)
+{
+
+	return (vmbus_xact_wait1(xact, resp_len, false /* can't sleep */));
+}
+
+const void *
+vmbus_xact_poll(struct vmbus_xact *xact, size_t *resp_len)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	const void *resp;
+
+	mtx_lock(&ctx->xc_lock);
+
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	if (xact->x_resp == NULL &&
+	    (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) {
+		mtx_unlock(&ctx->xc_lock);
+		*resp_len = 0;
+		return (NULL);
+	}
+	resp = vmbus_xact_return(xact, resp_len);
+
+	mtx_unlock(&ctx->xc_lock);
+
+	return (resp);
+}
+
+static void
+vmbus_xact_save_resp(struct vmbus_xact *xact, const void *data, size_t dlen)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	size_t cplen = dlen;
+
+	mtx_assert(&ctx->xc_lock, MA_OWNED);
+
+	if (cplen > ctx->xc_resp_size) {
+		printf("vmbus: xact response truncated %zu -> %zu\n",
+		    cplen, ctx->xc_resp_size);
+		cplen = ctx->xc_resp_size;
+	}
+
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	memcpy(xact->x_resp0, data, cplen);
+	xact->x_resp_len = cplen;
+	xact->x_resp = xact->x_resp0;
+}
+
+void
+vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	int do_wakeup = 0;
+
+	mtx_lock(&ctx->xc_lock);
+	/*
+	 * NOTE:
+	 * xc_active could be NULL, if the ctx has been orphaned.
+	 */
+	if (ctx->xc_active != NULL) {
+		vmbus_xact_save_resp(xact, data, dlen);
+		do_wakeup = 1;
+	} else {
+		KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
+		    ("no active xact pending"));
+		printf("vmbus: drop xact response\n");
+	}
+	mtx_unlock(&ctx->xc_lock);
+
+	if (do_wakeup)
+		wakeup(&ctx->xc_active);
+}
+
+void
+vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen)
+{
+	int do_wakeup = 0;
+
+	mtx_lock(&ctx->xc_lock);
+	/*
+	 * NOTE:
+	 * xc_active could be NULL, if the ctx has been orphaned.
+	 */
+	if (ctx->xc_active != NULL) {
+		vmbus_xact_save_resp(ctx->xc_active, data, dlen);
+		do_wakeup = 1;
+	} else {
+		KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
+		    ("no active xact pending"));
+		printf("vmbus: drop xact response\n");
+	}
+	mtx_unlock(&ctx->xc_lock);
+
+	if (do_wakeup)
+		wakeup(&ctx->xc_active);
+}


Property changes on: trunk/sys/dev/hyperv/vmbus/vmbus_xact.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/ic/cd1400.h
===================================================================
--- trunk/sys/dev/ic/cd1400.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/cd1400.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * cyclades cyclom-y serial driver
  *	Andrew Herbert <andrew at werple.apana.org.au>, 17 August 1993
@@ -27,7 +28,7 @@
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/cd1400.h 50477 1999-08-28 01:08:13Z peter $
  */
 
 /*

Modified: trunk/sys/dev/ic/cd180.h
===================================================================
--- trunk/sys/dev/ic/cd180.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/cd180.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia.
  * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia.
@@ -24,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/cd180.h 139749 2005-01-06 01:43:34Z imp $
  */
 
 /*

Modified: trunk/sys/dev/ic/esp.h
===================================================================
--- trunk/sys/dev/ic/esp.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/esp.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1995 Sean Eric Fagan.
  * All rights reserved.
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/esp.h 77961 2001-06-10 04:20:37Z nyan $
  */
 
 #ifndef _IC_ESP_H_

Modified: trunk/sys/dev/ic/hd64570.h
===================================================================
--- trunk/sys/dev/ic/hd64570.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/hd64570.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1995 John Hay.  All rights reserved.
  *
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/hd64570.h 139749 2005-01-06 01:43:34Z imp $
  */
 #ifndef _HD64570_H_
 #define _HD64570_H_

Modified: trunk/sys/dev/ic/i8237.h
===================================================================
--- trunk/sys/dev/ic/i8237.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i8237.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,7 +1,8 @@
+/* $MidnightBSD$ */
 /*
  * Intel 8237 DMA Controller
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i8237.h 146214 2005-05-14 10:14:56Z nyan $
  */
 
 #define	DMA37MD_SINGLE	0x40	/* single pass mode */

Modified: trunk/sys/dev/ic/i8251.h
===================================================================
--- trunk/sys/dev/ic/i8251.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i8251.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i8251.h 182835 2008-09-07 04:35:04Z nyan $
  */
 
 /*

Modified: trunk/sys/dev/ic/i8253reg.h
===================================================================
--- trunk/sys/dev/ic/i8253reg.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i8253reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
@@ -27,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	from: Header: timerreg.h,v 1.2 93/02/28 15:08:58 mccanne Exp
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i8253reg.h 146215 2005-05-14 10:26:31Z nyan $
  */
 
 /*

Modified: trunk/sys/dev/ic/i8255.h
===================================================================
--- trunk/sys/dev/ic/i8255.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i8255.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 TAKAHASHI Yoshihiro
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i8255.h 182835 2008-09-07 04:35:04Z nyan $
  */
 
 #ifndef	_DEV_IC_I8255_H_

Modified: trunk/sys/dev/ic/i82586.h
===================================================================
--- trunk/sys/dev/ic/i82586.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i82586.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992, University of Vermont and State Agricultural College.
  * Copyright (c) 1992, Garrett A. Wollman.
@@ -31,7 +32,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i82586.h 112734 2003-03-28 06:27:08Z mdodd $
  */
 
 /*

Modified: trunk/sys/dev/ic/i8259.h
===================================================================
--- trunk/sys/dev/ic/i8259.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/i8259.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/i8259.h 151580 2005-10-23 09:05:51Z glebius $
  */
 
 /*

Modified: trunk/sys/dev/ic/nec765.h
===================================================================
--- trunk/sys/dev/ic/nec765.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/nec765.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
@@ -27,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)nec765.h	7.1 (Berkeley) 5/9/91
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/nec765.h 162165 2006-09-08 21:46:01Z jkim $
  */
 
 /*

Modified: trunk/sys/dev/ic/ns16550.h
===================================================================
--- trunk/sys/dev/ic/ns16550.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/ns16550.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
@@ -27,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)ns16550.h	7.1 (Berkeley) 5/9/91
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/ns16550.h 318151 2017-05-10 20:12:23Z marius $
  */
 
 /*
@@ -182,8 +183,10 @@
 #define	com_xoff1	6	/* XOFF 1 character (R/W) */
 #define	com_xoff2	7	/* XOFF 2 character (R/W) */
 
+#define DW_REG_USR	31	/* DesignWare derived Uart Status Reg */
 #define com_usr		39	/* Octeon 16750/16550 Uart Status Reg */
 #define REG_USR		com_usr
+#define USR_BUSY	1	/* Uart Busy. Serial transfer in progress */
 #define USR_TXFIFO_NOTFULL 2    /* Uart TX FIFO Not full */
 
 /* 16950 register #1.  Access enabled by ACR[7].  Also requires !LCR[7]. */
@@ -203,6 +206,7 @@
  * requires ACR[6].
  */
 #define	com_icr		5	/* index control register (R/W) */
+#define	REG_ICR		com_icr
 
 /*
  * 16950 register #7.  It is the same as com_scr except it has a different
@@ -218,6 +222,7 @@
  */
 
 #define	com_acr		0	/* additional control register (R/W) */
+#define	REG_ACR		com_acr
 #define	ACR_ASE		0x80	/* ASR/RFL/TFL enable */
 #define	ACR_ICRE	0x40	/* ICR enable */
 #define	ACR_TLE		0x20	/* TTL/RTL enable */

Modified: trunk/sys/dev/ic/quicc.h
===================================================================
--- trunk/sys/dev/ic/quicc.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/quicc.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 Juniper Networks
  * All rights reserved.
@@ -23,7 +24,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/quicc.h 176772 2008-03-03 18:20:17Z raj $
  */
 
 #ifndef _DEV_IC_QUICC_H_

Modified: trunk/sys/dev/ic/rsa.h
===================================================================
--- trunk/sys/dev/ic/rsa.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/rsa.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 FreeBSD Inc.
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/rsa.h 77962 2001-06-10 04:28:39Z nyan $
  */
 
 /*

Modified: trunk/sys/dev/ic/sab82532.h
===================================================================
--- trunk/sys/dev/ic/sab82532.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/sab82532.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*	$OpenBSD: sab82532reg.h,v 1.2 2002/04/08 17:49:42 jason Exp $	*/
 
 /*-
@@ -34,7 +35,7 @@
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/sab82532.h 139749 2005-01-06 01:43:34Z imp $
  */
 
 #ifndef _DEV_IC_SAB82532_H_

Modified: trunk/sys/dev/ic/via6522reg.h
===================================================================
--- trunk/sys/dev/ic/via6522reg.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/via6522reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright 2004 by Peter Grehan. All rights reserved.
  *
@@ -24,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/via6522reg.h 153030 2005-12-02 22:36:14Z grehan $
  */
 
 #ifndef _VIA6522REG_H_

Modified: trunk/sys/dev/ic/wd33c93reg.h
===================================================================
--- trunk/sys/dev/ic/wd33c93reg.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/wd33c93reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/ic/wd33c93reg.h 139749 2005-01-06 01:43:34Z imp $ */
 /*	$NecBSD: wd33c93reg.h,v 1.21.24.1 2001/06/13 05:52:05 honda Exp $	*/
 /*	$NetBSD$	*/
 /*-

Modified: trunk/sys/dev/ic/z8530.h
===================================================================
--- trunk/sys/dev/ic/z8530.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ic/z8530.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003 Marcel Moolenaar
  * All rights reserved.
@@ -23,7 +24,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ic/z8530.h 155967 2006-02-24 02:03:35Z marcel $
  */
 
 #ifndef _DEV_IC_Z8530_H_

Modified: trunk/sys/dev/ichsmb/ichsmb.c
===================================================================
--- trunk/sys/dev/ichsmb/ichsmb.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ichsmb/ichsmb.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ichsmb.c
  *
@@ -36,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/ichsmb/ichsmb.c 188077 2009-02-03 16:14:37Z jhb $");
 
 /*
  * Support for the SMBus controller logical device which is part of the

Modified: trunk/sys/dev/ichsmb/ichsmb_pci.c
===================================================================
--- trunk/sys/dev/ichsmb/ichsmb_pci.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ichsmb/ichsmb_pci.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ichsmb_pci.c
  *
@@ -5,7 +6,7 @@
  * Copyright (c) 2000 Whistle Communications, Inc.
  * All rights reserved.
  * Author: Archie Cobbs <archie at freebsd.org>
- * 
+ *
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
@@ -16,7 +17,7 @@
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
- * 
+ *
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
@@ -37,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/ichsmb/ichsmb_pci.c 321932 2017-08-02 15:13:13Z gavin $");
 
 /*
  * Support for the SMBus controller logical device which is part of the
@@ -67,31 +68,89 @@
 #include <dev/ichsmb/ichsmb_reg.h>
 
 /* PCI unique identifiers */
-#define ID_82801AA			0x24138086
-#define ID_82801AB			0x24238086
-#define ID_82801BA			0x24438086
-#define ID_82801CA			0x24838086
-#define ID_82801DC			0x24C38086
-#define ID_82801EB			0x24D38086
-#define ID_82801FB			0x266A8086
-#define ID_82801GB			0x27da8086
-#define ID_82801H			0x283e8086
-#define ID_82801I			0x29308086
-#define ID_82801JI			0x3a308086
-#define ID_PCH				0x3b308086
-#define ID_6300ESB			0x25a48086
-#define	ID_631xESB			0x269b8086
-#define ID_DH89XXCC			0x23308086
-#define ID_PATSBURG			0x1d228086
-#define ID_CPT				0x1c228086
-#define ID_PPT				0x1e228086
-#define ID_AVOTON			0x1f3c8086
-#define ID_COLETOCRK			0x23B08086
-#define ID_LPT				0x8c228086
-#define ID_WCPT				0x8ca28086
-#define ID_WCPTLP			0x9ca28086
+#define	PCI_VENDOR_INTEL		0x8086
+#define	ID_82801AA			0x2413
+#define	ID_82801AB			0x2423
+#define	ID_82801BA			0x2443
+#define	ID_82801CA			0x2483
+#define	ID_82801DC			0x24C3
+#define	ID_82801EB			0x24D3
+#define	ID_82801FB			0x266A
+#define	ID_82801GB			0x27da
+#define	ID_82801H			0x283e
+#define	ID_82801I			0x2930
+#define	ID_EP80579			0x5032
+#define	ID_82801JI			0x3a30
+#define	ID_82801JD			0x3a60
+#define	ID_PCH				0x3b30
+#define	ID_6300ESB			0x25a4
+#define	ID_631xESB			0x269b
+#define	ID_DH89XXCC			0x2330
+#define	ID_PATSBURG			0x1d22
+#define	ID_CPT				0x1c22
+#define	ID_PPT				0x1e22
+#define	ID_AVOTON			0x1f3c
+#define	ID_COLETOCRK			0x23B0
+#define	ID_LPT				0x8c22
+#define	ID_LPTLP			0x9c22
+#define	ID_WCPT				0x8ca2
+#define	ID_WCPTLP			0x9ca2
+#define	ID_BAYTRAIL			0x0f12
+#define	ID_BRASWELL			0x2292
+#define	ID_WELLSBURG			0x8d22
+#define	ID_SRPT				0xa123
+#define	ID_SRPTLP			0x9d23
+#define	ID_DENVERTON			0x19df
+#define	ID_BROXTON			0x5ad4
+#define	ID_LEWISBURG			0xa1a3
+#define	ID_LEWISBURG2			0xa223
+#define	ID_KABYLAKE			0xa2a3
 
-#define PCIS_SERIALBUS_SMBUS_PROGIF	0x00
+static const struct ichsmb_device {
+	uint16_t	id;
+	const char	*name;
+} ichsmb_devices[] = {
+	{ ID_82801AA,	"Intel 82801AA (ICH) SMBus controller"		},
+	{ ID_82801AB,	"Intel 82801AB (ICH0) SMBus controller"		},
+	{ ID_82801BA,	"Intel 82801BA (ICH2) SMBus controller"		},
+	{ ID_82801CA,	"Intel 82801CA (ICH3) SMBus controller"		},
+	{ ID_82801DC,	"Intel 82801DC (ICH4) SMBus controller"		},
+	{ ID_82801EB,	"Intel 82801EB (ICH5) SMBus controller"		},
+	{ ID_82801FB,	"Intel 82801FB (ICH6) SMBus controller"		},
+	{ ID_82801GB,	"Intel 82801GB (ICH7) SMBus controller"		},
+	{ ID_82801H,	"Intel 82801H (ICH8) SMBus controller"		},
+	{ ID_82801I,	"Intel 82801I (ICH9) SMBus controller"		},
+	{ ID_82801GB,	"Intel 82801GB (ICH7) SMBus controller"		},
+	{ ID_82801H,	"Intel 82801H (ICH8) SMBus controller"		},
+	{ ID_82801I,	"Intel 82801I (ICH9) SMBus controller"		},
+	{ ID_EP80579,	"Intel EP80579 SMBus controller"		},
+	{ ID_82801JI,	"Intel 82801JI (ICH10) SMBus controller"	},
+	{ ID_82801JD,	"Intel 82801JD (ICH10) SMBus controller"	},
+	{ ID_PCH,	"Intel PCH SMBus controller"			},
+	{ ID_6300ESB,	"Intel 6300ESB (ICH) SMBus controller"		},
+	{ ID_631xESB,	"Intel 631xESB/6321ESB (ESB2) SMBus controller"	},
+	{ ID_DH89XXCC,	"Intel DH89xxCC SMBus controller"		},
+	{ ID_PATSBURG,	"Intel Patsburg SMBus controller"		},
+	{ ID_CPT,	"Intel Cougar Point SMBus controller"		},
+	{ ID_PPT,	"Intel Panther Point SMBus controller"		},
+	{ ID_AVOTON,	"Intel Avoton SMBus controller"			},
+	{ ID_LPT,	"Intel Lynx Point SMBus controller"		},
+	{ ID_LPTLP,	"Intel Lynx Point-LP SMBus controller"		},
+	{ ID_WCPT,	"Intel Wildcat Point SMBus controller"		},
+	{ ID_WCPTLP,	"Intel Wildcat Point-LP SMBus controller"	},
+	{ ID_BAYTRAIL,	"Intel Baytrail SMBus controller"		},
+	{ ID_BRASWELL,	"Intel Braswell SMBus controller"		},
+	{ ID_COLETOCRK,	"Intel Coleto Creek SMBus controller"		},
+	{ ID_WELLSBURG,	"Intel Wellsburg SMBus controller"		},
+	{ ID_SRPT,	"Intel Sunrise Point-H SMBus controller"	},
+	{ ID_SRPTLP,	"Intel Sunrise Point-LP SMBus controller"	},
+	{ ID_DENVERTON,	"Intel Denverton SMBus controller"		},
+	{ ID_BROXTON,	"Intel Broxton SMBus controller"		},
+	{ ID_LEWISBURG,	"Intel Lewisburg SMBus controller"		},
+	{ ID_LEWISBURG2,"Intel Lewisburg SMBus controller"		},
+	{ ID_KABYLAKE,	"Intel Kaby Lake SMBus controller"		},
+	{ 0, NULL },
+};
 
 /* Internal functions */
 static int	ichsmb_pci_probe(device_t dev);
@@ -137,83 +196,19 @@
 static int
 ichsmb_pci_probe(device_t dev)
 {
-	/* Check PCI identifier */
-	switch (pci_get_devid(dev)) {
-	case ID_82801AA:
-		device_set_desc(dev, "Intel 82801AA (ICH) SMBus controller");
-		break;
-	case ID_82801AB:
-		device_set_desc(dev, "Intel 82801AB (ICH0) SMBus controller");
-		break;
-	case ID_82801BA:
-		device_set_desc(dev, "Intel 82801BA (ICH2) SMBus controller");
-		break;
-	case ID_82801CA:
-		device_set_desc(dev, "Intel 82801CA (ICH3) SMBus controller");
-		break;
-	case ID_82801DC:
-		device_set_desc(dev, "Intel 82801DC (ICH4) SMBus controller");
-		break;
-	case ID_82801EB:
-		device_set_desc(dev, "Intel 82801EB (ICH5) SMBus controller");
-		break;
-	case ID_82801FB:
-		device_set_desc(dev, "Intel 82801FB (ICH6) SMBus controller");
-		break;
-	case ID_82801GB:
-		device_set_desc(dev, "Intel 82801GB (ICH7) SMBus controller");
-		break;
-	case ID_82801H:
-		device_set_desc(dev, "Intel 82801H (ICH8) SMBus controller");
-		break;
-	case ID_82801I:
-		device_set_desc(dev, "Intel 82801I (ICH9) SMBus controller");
-		break;
-	case ID_82801JI:
-		device_set_desc(dev, "Intel 82801JI (ICH10) SMBus controller");
-		break;
-	case ID_PCH:
-		device_set_desc(dev, "Intel PCH SMBus controller");
-		break;
-	case ID_6300ESB:
-		device_set_desc(dev, "Intel 6300ESB (ICH) SMBus controller");
-		break;
-	case ID_631xESB:
-		device_set_desc(dev, "Intel 631xESB/6321ESB (ESB2) SMBus controller");
-		break;
-	case ID_DH89XXCC:
-		device_set_desc(dev, "Intel DH89xxCC SMBus controller");
-		break;
-	case ID_PATSBURG:
-		device_set_desc(dev, "Intel Patsburg SMBus controller");
-		break;
-	case ID_CPT:
-		device_set_desc(dev, "Intel Cougar Point SMBus controller");
-		break;
-	case ID_PPT:
-		device_set_desc(dev, "Intel Panther Point SMBus controller");
-		break;
-	case ID_AVOTON:
-		device_set_desc(dev, "Intel Avoton SMBus controller");
-		break;
-	case ID_LPT:
-		device_set_desc(dev, "Intel Lynx Point SMBus controller");
-		break;
-	case ID_WCPT:
-		device_set_desc(dev, "Intel Wildcat Point SMBus controller");
-		break;
-	case ID_WCPTLP:
-		device_set_desc(dev, "Intel Wildcat Point-LP SMBus controller");
-		break;
-	case ID_COLETOCRK:
-		device_set_desc(dev, "Intel Coleto Creek SMBus controller");
-		break;
-	default:
+	const struct ichsmb_device *device;
+
+	if (pci_get_vendor(dev) != PCI_VENDOR_INTEL)
 		return (ENXIO);
+
+	for (device = ichsmb_devices; device->name != NULL; device++) {
+		if (pci_get_device(dev) == device->id) {
+			device_set_desc(dev, device->name);
+			return (ichsmb_probe(dev));
+		}
 	}
 
-	/* Done */
-	return (ichsmb_probe(dev));
+	return (ENXIO);
 }
 
 static int

Modified: trunk/sys/dev/ichsmb/ichsmb_reg.h
===================================================================
--- trunk/sys/dev/ichsmb/ichsmb_reg.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ichsmb/ichsmb_reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ichsmb_reg.h
  *
@@ -35,7 +36,7 @@
  *
  * Author: Archie Cobbs <archie at freebsd.org>
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ichsmb/ichsmb_reg.h 139749 2005-01-06 01:43:34Z imp $
  */
 
 #ifndef _DEV_ICHSMB_ICHSMB_REG_H_

Modified: trunk/sys/dev/ichsmb/ichsmb_var.h
===================================================================
--- trunk/sys/dev/ichsmb/ichsmb_var.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/ichsmb/ichsmb_var.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ichsmb_var.h
  *
@@ -35,7 +36,7 @@
  *
  * Author: Archie Cobbs <archie at freebsd.org>
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/ichsmb/ichsmb_var.h 179622 2008-06-06 18:29:56Z jhb $
  */
 
 #ifndef _DEV_ICHSMB_ICHSMB_VAR_H

Modified: trunk/sys/dev/iicbus/ad7417.c
===================================================================
--- trunk/sys/dev/iicbus/ad7417.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/ad7417.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Andreas Tobler
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ad7417.c 239397 2012-08-19 19:31:36Z andreast $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -104,6 +105,7 @@
 	uint32_t                sc_addr;
 	struct ad7417_sensor    *sc_sensors;
 	int                     sc_nsensors;
+	int                     init_done;
 };
 static device_method_t  ad7417_methods[] = {
 	/* Device interface */
@@ -247,7 +249,10 @@
 {
 	uint8_t buf;
 	int err;
+	struct ad7417_softc *sc;
 
+	sc = device_get_softc(dev);
+
 	adc741x_config = 0;
 	/* Clear Config2 */
 	buf = 0;
@@ -267,6 +272,8 @@
 	if (err < 0)
 		return (-1);
 
+	sc->init_done = 1;
+
 	return (0);
 
 }
@@ -430,10 +437,10 @@
 
 		if (sc->sc_sensors[i].type == ADC7417_TEMP_SENSOR) {
 			unit = "temp";
-			desc = "Sensor temp in C";
+			desc = "sensor unit (C)";
 		} else {
 			unit = "volt";
-			desc = "Sensor Volt in V";
+			desc = "sensor unit (mV)";
 		}
 		/* I use i to pass the sensor id. */
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
@@ -584,9 +591,10 @@
 
 	sc = device_get_softc(sens->dev);
 
-	/* Init the ADC. */
-	if (ad7417_init_adc(sc->sc_dev, sc->sc_addr) < 0)
-		return (-1);
+	/* Init the ADC if not already done.*/
+	if (!sc->init_done)
+		if (ad7417_init_adc(sc->sc_dev, sc->sc_addr) < 0)
+			return (-1);
 
 	if (sens->type == ADC7417_TEMP_SENSOR) {
 		if (ad7417_get_temp(sc->sc_dev, sc->sc_addr, &temp) < 0)

Modified: trunk/sys/dev/iicbus/ad7418.c
===================================================================
--- trunk/sys/dev/iicbus/ad7418.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/ad7418.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 Sam Leffler.  All rights reserved.
  *
@@ -23,7 +24,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ad7418.c 246128 2013-01-30 18:01:20Z sbz $");
 /*
  * Analog Devices AD7418 chip sitting on the I2C bus.
  */
@@ -219,7 +220,7 @@
 	DEVMETHOD(device_probe,		ad7418_probe),
 	DEVMETHOD(device_attach,	ad7418_attach),
 
-	{0, 0},
+	DEVMETHOD_END
 };
 
 static driver_t ad7418_driver = {

Added: trunk/sys/dev/iicbus/adt746x.c
===================================================================
--- trunk/sys/dev/iicbus/adt746x.c	                        (rev 0)
+++ trunk/sys/dev/iicbus/adt746x.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,665 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Andreas Tobler
+ * Copyright (c) 2014 Justin Hibbits
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/adt746x.c 263197 2014-03-15 00:23:35Z jhibbits $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/callout.h>
+#include <sys/conf.h>
+#include <sys/cpu.h>
+#include <sys/ctype.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/limits.h>
+
+#include <machine/bus.h>
+#include <machine/md_var.h>
+
+#include <dev/iicbus/iicbus.h>
+#include <dev/iicbus/iiconf.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <powerpc/powermac/powermac_thermal.h>
+
+/* ADT746X registers. */
+#define ADT746X_TACH1LOW          0x28
+#define ADT746X_TACH1HIGH         0x29
+#define ADT746X_TACH2LOW          0x2a
+#define ADT746X_TACH2HIGH         0x2b
+#define ADT746X_PWM1              0x30
+#define ADT746X_PWM2              0x31
+#define ADT746X_DEVICE_ID         0x3d
+#define ADT746X_COMPANY_ID        0x3e
+#define ADT746X_REV_ID            0x3f
+#define ADT746X_CONFIG            0x40
+#define ADT746X_PWM1_CONF         0x5c
+#define ADT746X_PWM2_CONF         0x5d
+#define ADT746X_MANUAL_MASK       0xe0
+
+#define ADT7460_DEV_ID            0x27
+#define ADT7467_DEV_ID            0x68
+
+struct adt746x_fan {
+	struct pmac_fan fan;
+	device_t        dev;
+	int             id;
+	int             setpoint;
+	int		pwm_reg;
+	int		conf_reg;
+};
+
+struct adt746x_sensor {
+	struct pmac_therm therm;
+	device_t          dev;
+	int               id;
+	cell_t	          reg;
+	enum {
+		ADT746X_SENSOR_TEMP,
+		ADT746X_SENSOR_VOLT,
+		ADT746X_SENSOR_SPEED
+	} type;
+};
+
+struct adt746x_softc {
+	device_t		sc_dev;
+	struct intr_config_hook enum_hook;
+	uint32_t                sc_addr;
+	/* The 7467 supports up to 4 fans, 2 voltage and 3 temperature sensors. */
+	struct adt746x_fan	sc_fans[4];
+	int			sc_nfans;
+	struct adt746x_sensor   sc_sensors[9];
+	int			sc_nsensors;
+	int                     device_id;
+    
+};
+
+
+/* Regular bus attachment functions */
+
+static int  adt746x_probe(device_t);
+static int  adt746x_attach(device_t);
+
+
+/* Utility functions */
+static void adt746x_attach_fans(device_t dev);
+static void adt746x_attach_sensors(device_t dev);
+static int  adt746x_fill_fan_prop(device_t dev);
+static int  adt746x_fill_sensor_prop(device_t dev);
+
+static int  adt746x_fan_set_pwm(struct adt746x_fan *fan, int pwm);
+static int  adt746x_fan_get_pwm(struct adt746x_fan *fan);
+static int  adt746x_sensor_read(struct adt746x_sensor *sens);
+static void adt746x_start(void *xdev);
+
+/* i2c read/write functions. */
+static int  adt746x_write(device_t dev, uint32_t addr, uint8_t reg,
+			  uint8_t *buf);
+static int  adt746x_read(device_t dev, uint32_t addr, uint8_t reg,
+			 uint8_t *data);
+
+static device_method_t  adt746x_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,	 adt746x_probe),
+	DEVMETHOD(device_attach, adt746x_attach),
+	{ 0, 0 },
+};
+
+static driver_t adt746x_driver = {
+	"adt746x",
+	adt746x_methods,
+	sizeof(struct adt746x_softc)
+};
+
+static devclass_t adt746x_devclass;
+
+DRIVER_MODULE(adt746x, iicbus, adt746x_driver, adt746x_devclass, 0, 0);
+static MALLOC_DEFINE(M_ADT746X, "adt746x", "ADT Sensor Information");
+
+
+/* i2c read/write functions. */
+
+static int
+adt746x_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff)
+{
+	uint8_t buf[4];
+	int try = 0;
+
+	struct iic_msg msg[] = {
+		{addr, IIC_M_WR, 2, buf }
+	};
+
+	/* Prepare the write msg. */
+	buf[0] = reg;
+	memcpy(buf + 1, buff, 1);
+
+	for (;;)
+	{
+		if (iicbus_transfer(dev, msg, 1) == 0)
+			return (0);
+		if (++try > 5) {
+			device_printf(dev, "iicbus write failed\n");
+			return (-1);
+		}
+		pause("adt746x_write", hz);
+	}
+	return (0);
+}
+
+static int
+adt746x_read(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data)
+{
+	uint8_t buf[4];
+	int err, try = 0;
+
+	struct iic_msg msg[2] = {
+		{addr, IIC_M_WR | IIC_M_NOSTOP, 1, &reg},
+		{addr, IIC_M_RD, 1, buf},
+	};
+
+	for (;;)
+	{
+		err = iicbus_transfer(dev, msg, 2);
+		if (err != 0)
+			goto retry;
+
+		*data = *((uint8_t*)buf);
+		return (0);
+	retry:
+		if (++try > 5) {
+			device_printf(dev, "iicbus read failed\n");
+			return (-1);
+		}
+		pause("adt746x_read", hz);
+	}
+}
+
+static int
+adt746x_probe(device_t dev)
+{
+	const char  *name, *compatible;
+	struct adt746x_softc *sc;
+
+	name = ofw_bus_get_name(dev);
+	compatible = ofw_bus_get_compat(dev);
+
+	if (!name)
+		return (ENXIO);
+
+	if (strcmp(name, "fan") != 0 ||
+	    (strcmp(compatible, "adt7460") != 0 &&
+	     strcmp(compatible, "adt7467") != 0))
+		return (ENXIO);
+
+	sc = device_get_softc(dev);
+	sc->sc_dev = dev;
+	sc->sc_addr = iicbus_get_addr(dev);
+
+	device_set_desc(dev, "Apple Thermostat Unit ADT746X");
+
+	return (0);
+}
+
+static int
+adt746x_attach(device_t dev)
+{
+	struct adt746x_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	sc->enum_hook.ich_func = adt746x_start;
+	sc->enum_hook.ich_arg = dev;
+
+	/* We have to wait until interrupts are enabled. I2C read and write
+	 * only works if the interrupts are available.
+	 * The unin/i2c is controlled by the htpic on unin. But this is not
+	 * the master. The openpic on mac-io is controlling the htpic.
+	 * This one gets attached after the mac-io probing and then the
+	 * interrupts will be available.
+	 */
+
+	if (config_intrhook_establish(&sc->enum_hook) != 0)
+		return (ENOMEM);
+
+	return (0);
+}
+
+static void
+adt746x_start(void *xdev)
+{
+	uint8_t did, cid, rev, conf;
+
+	struct adt746x_softc *sc;
+
+	device_t dev = (device_t)xdev;
+
+	sc = device_get_softc(dev);
+
+	adt746x_read(sc->sc_dev, sc->sc_addr, ADT746X_DEVICE_ID, &did);
+	adt746x_read(sc->sc_dev, sc->sc_addr, ADT746X_COMPANY_ID, &cid);
+	adt746x_read(sc->sc_dev, sc->sc_addr, ADT746X_REV_ID, &rev);
+	adt746x_read(sc->sc_dev, sc->sc_addr, ADT746X_CONFIG, &conf);
+
+	device_printf(dev, "Dev ID %#x, Company ID %#x, Rev ID %#x CNF: %#x\n",
+		      did, cid, rev, conf);
+
+	/* We can get the device id either from 'of' properties or from the chip
+	   itself. This method makes sure we can read the chip, otherwise
+	   we return.  */
+
+	sc->device_id = did;
+
+	conf = 1;
+	/* Start the ADT7460.  */
+	if (sc->device_id == ADT7460_DEV_ID)
+		adt746x_write(sc->sc_dev, sc->sc_addr, ADT746X_CONFIG, &conf);
+
+	/* Detect and attach child devices.  */
+	adt746x_attach_fans(dev);
+	adt746x_attach_sensors(dev);
+	config_intrhook_disestablish(&sc->enum_hook);
+}
+
+/*
+ * Sensor and fan management
+ */
+static int
+adt746x_fan_set_pwm(struct adt746x_fan *fan, int pwm)
+{
+	uint8_t reg = 0, manual, mode = 0;
+	struct adt746x_softc *sc;
+	uint8_t buf;
+
+	sc = device_get_softc(fan->dev);
+
+	/* Clamp to allowed range */
+	pwm = max(fan->fan.min_rpm, pwm);
+	pwm = min(fan->fan.max_rpm, pwm);
+
+	reg = fan->pwm_reg;
+	mode = fan->conf_reg;
+
+	/* From the 7460 datasheet:
+	   PWM dutycycle can be programmed from 0% (0x00) to 100% (0xFF)
+	   in steps of 0.39% (256 steps).
+	 */
+	buf = (pwm * 100 / 39) - (pwm ? 1 : 0);
+	fan->setpoint = buf;
+
+	/* Manual mode.  */
+	adt746x_read(sc->sc_dev, sc->sc_addr, mode, &manual);
+	manual |= ADT746X_MANUAL_MASK;
+	adt746x_write(sc->sc_dev, sc->sc_addr, mode, &manual);
+
+	/* Write speed.  */
+	adt746x_write(sc->sc_dev, sc->sc_addr, reg, &buf);
+
+	return (0);
+}
+
+static int
+adt746x_fan_get_pwm(struct adt746x_fan *fan)
+{
+	uint8_t buf, reg;
+	uint16_t pwm;
+	struct adt746x_softc *sc;
+
+	sc = device_get_softc(fan->dev);
+
+	reg = fan->pwm_reg;
+
+	adt746x_read(sc->sc_dev, sc->sc_addr, reg, &buf);
+
+	pwm = (buf * 39 / 100) + (buf ? 1 : 0);
+	return (pwm);
+}
+
+static int
+adt746x_fill_fan_prop(device_t dev)
+{
+	phandle_t child;
+	struct adt746x_softc *sc;
+	u_int *id;
+	char *location;
+	int i, id_len, len = 0, location_len, prev_len = 0;
+
+	sc = device_get_softc(dev);
+
+	child = ofw_bus_get_node(dev);
+
+	/* Fill the fan location property. */
+	location_len = OF_getprop_alloc(child, "hwctrl-location", 1, (void **)&location);
+	id_len = OF_getprop_alloc(child, "hwctrl-id", sizeof(cell_t), (void **)&id);
+	if (location_len == -1 || id_len == -1) {
+		free(location, M_OFWPROP);
+		free(id, M_OFWPROP);
+		return 0;
+	}
+
+	/* Fill in all the properties for each fan. */
+	for (i = 0; i < id_len; i++) {
+		strlcpy(sc->sc_fans[i].fan.name, location + len, 32);
+		prev_len = strlen(location + len) + 1;
+		len += prev_len;
+		sc->sc_fans[i].id = id[i];
+		if (id[i] == 6) {
+			sc->sc_fans[i].pwm_reg = ADT746X_PWM1;
+			sc->sc_fans[i].conf_reg = ADT746X_PWM1_CONF;
+		} else if (id[i] == 7) {
+			sc->sc_fans[i].pwm_reg = ADT746X_PWM2;
+			sc->sc_fans[i].conf_reg = ADT746X_PWM2_CONF;
+		} else {
+			sc->sc_fans[i].pwm_reg = ADT746X_PWM1 + i;
+			sc->sc_fans[i].conf_reg = ADT746X_PWM1_CONF + i;
+		}
+		sc->sc_fans[i].dev = sc->sc_dev;
+		sc->sc_fans[i].fan.min_rpm = 5;	/* Percent */
+		sc->sc_fans[i].fan.max_rpm = 100;
+		sc->sc_fans[i].fan.read = NULL;
+		sc->sc_fans[i].fan.set =
+			(int (*)(struct pmac_fan *, int))(adt746x_fan_set_pwm);
+		sc->sc_fans[i].fan.default_rpm = sc->sc_fans[i].fan.max_rpm;
+	}
+	free(location, M_OFWPROP);
+	free(id, M_OFWPROP);
+
+	return (i);
+}
+
+static int
+adt746x_fill_sensor_prop(device_t dev)
+{
+	phandle_t child, node;
+	struct adt746x_softc *sc;
+	char sens_type[32];
+	int i = 0, reg, sensid;
+
+	sc = device_get_softc(dev);
+
+	child = ofw_bus_get_node(dev);
+
+	/* Fill in the sensor properties for each child. */
+	for (node = OF_child(child); node != 0; node = OF_peer(node)) {
+		if (OF_getprop(node, "sensor-id", &sensid, sizeof(sensid)) == -1)
+		    continue;
+		OF_getprop(node, "location", sc->sc_sensors[i].therm.name, 32);
+		OF_getprop(node, "device_type", sens_type, sizeof(sens_type));
+		if (strcmp(sens_type, "temperature") == 0)
+			sc->sc_sensors[i].type = ADT746X_SENSOR_TEMP;
+		else if (strcmp(sens_type, "voltage") == 0)
+			sc->sc_sensors[i].type = ADT746X_SENSOR_VOLT;
+		else
+			sc->sc_sensors[i].type = ADT746X_SENSOR_SPEED;
+		OF_getprop(node, "reg", &reg, sizeof(reg));
+		OF_getprop(node, "sensor-id", &sensid,
+			sizeof(sensid));
+		/* This is the i2c register of the sensor.  */
+		sc->sc_sensors[i].reg = reg;
+		sc->sc_sensors[i].id = sensid;
+		OF_getprop(node, "zone", &sc->sc_sensors[i].therm.zone,
+			sizeof(sc->sc_sensors[i].therm.zone));
+		sc->sc_sensors[i].dev = dev;
+		sc->sc_sensors[i].therm.read =
+		    (int (*)(struct pmac_therm *))adt746x_sensor_read;
+		if (sc->sc_sensors[i].type == ADT746X_SENSOR_TEMP) {
+		    /* Make up some ranges */
+		    sc->sc_sensors[i].therm.target_temp = 500 + ZERO_C_TO_K;
+		    sc->sc_sensors[i].therm.max_temp = 800 + ZERO_C_TO_K;
+
+		    pmac_thermal_sensor_register(&sc->sc_sensors[i].therm);
+		}
+		i++;
+	}
+
+	return (i);
+}
+
+static int
+adt746x_fanrpm_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	device_t adt;
+	struct adt746x_softc *sc;
+	struct adt746x_fan *fan;
+	int pwm = 0, error;
+
+	adt = arg1;
+	sc = device_get_softc(adt);
+	fan = &sc->sc_fans[arg2];
+	pwm = adt746x_fan_get_pwm(fan);
+	error = sysctl_handle_int(oidp, &pwm, 0, req);
+
+	if (error || !req->newptr)
+		return (error);
+
+	return (adt746x_fan_set_pwm(fan, pwm));
+}
+
+static void
+adt746x_attach_fans(device_t dev)
+{
+	struct adt746x_softc *sc;
+	struct sysctl_oid *oid, *fanroot_oid;
+	struct sysctl_ctx_list *ctx;
+	phandle_t child;
+	char sysctl_name[32];
+	int i, j;
+
+	sc = device_get_softc(dev);
+
+	sc->sc_nfans = 0;
+
+	child = ofw_bus_get_node(dev);
+
+	/* Count the actual number of fans. */
+	sc->sc_nfans = adt746x_fill_fan_prop(dev);
+
+	device_printf(dev, "%d fans detected!\n", sc->sc_nfans);
+
+	if (sc->sc_nfans == 0) {
+		device_printf(dev, "WARNING: No fans detected!\n");
+		return;
+	}
+
+	ctx = device_get_sysctl_ctx(dev);
+	fanroot_oid = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fans",
+	    CTLFLAG_RD, 0, "ADT Fan Information");
+
+	/* Now we can fill the properties into the allocated struct. */
+	sc->sc_nfans = adt746x_fill_fan_prop(dev);
+
+	/* Register fans with pmac_thermal */
+	for (i = 0; i < sc->sc_nfans; i++)
+		pmac_thermal_fan_register(&sc->sc_fans[i].fan);
+
+	/* Add sysctls for the fans. */
+	for (i = 0; i < sc->sc_nfans; i++) {
+		for (j = 0; j < strlen(sc->sc_fans[i].fan.name); j++) {
+			sysctl_name[j] = tolower(sc->sc_fans[i].fan.name[j]);
+			if (isspace(sysctl_name[j]))
+				sysctl_name[j] = '_';
+		}
+		sysctl_name[j] = 0;
+
+		sc->sc_fans[i].setpoint =
+			adt746x_fan_get_pwm(&sc->sc_fans[i]);
+
+		oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid),
+		    OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information");
+
+		/* I use i to pass the fan id. */
+		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+				"pwm", CTLTYPE_INT | CTLFLAG_RW, dev, i,
+				adt746x_fanrpm_sysctl, "I", "Fan PWM in %");
+	}
+
+	/* Dump fan location & type. */
+	if (bootverbose) {
+		for (i = 0; i < sc->sc_nfans; i++) {
+			device_printf(dev, "Fan location: %s",
+				      sc->sc_fans[i].fan.name);
+			device_printf(dev, " id: %d RPM: %d\n",
+				      sc->sc_fans[i].id,
+				      sc->sc_fans[i].setpoint);
+		}
+	}
+}
+
+static int
+adt746x_sensor_read(struct adt746x_sensor *sens)
+{
+	struct adt746x_softc *sc;
+	uint16_t tmp = 0;
+	uint16_t val;
+	uint8_t temp, data[1], data1[1];
+
+	sc = device_get_softc(sens->dev);
+	if (sens->type != ADT746X_SENSOR_SPEED) {
+		if (adt746x_read(sc->sc_dev, sc->sc_addr, sens->reg,
+				 &temp) < 0)
+			return (-1);
+		if (sens->type == ADT746X_SENSOR_TEMP)
+			tmp = 10 * temp + ZERO_C_TO_K;
+		else
+			tmp = temp;
+	} else {
+		if (adt746x_read(sc->sc_dev, sc->sc_addr, sens->reg,
+				 data) < 0)
+			return (-1);
+		if (adt746x_read(sc->sc_dev, sc->sc_addr, sens->reg + 1,
+				 data1) < 0)
+			return (-1);
+		val = data[0] + (data1[0] << 8);
+		/* A value of 0xffff means the fan is stopped.  */
+		if (val == 0 || val == 0xffff)
+			tmp = 0;
+		else
+			tmp = (90000 * 60) / val;
+	}
+	return (tmp);
+}
+
+static int
+adt746x_sensor_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev;
+	struct adt746x_softc *sc;
+	struct adt746x_sensor *sens;
+	int value, error;
+
+	dev = arg1;
+	sc = device_get_softc(dev);
+	sens = &sc->sc_sensors[arg2];
+
+	value = sens->therm.read(&sens->therm);
+	if (value < 0)
+		return (ENXIO);
+
+	error = sysctl_handle_int(oidp, &value, 0, req);
+
+	return (error);
+}
+
+static void
+adt746x_attach_sensors(device_t dev)
+{
+	struct adt746x_softc *sc;
+	struct sysctl_oid *oid, *sensroot_oid;
+	struct sysctl_ctx_list *ctx;
+	phandle_t child;
+	char sysctl_name[40];
+	const char *unit;
+	const char *desc;
+	int i, j;
+
+
+	sc = device_get_softc(dev);
+	sc->sc_nsensors = 0;
+	child = ofw_bus_get_node(dev);
+
+	/* Count the actual number of sensors. */
+	sc->sc_nsensors = adt746x_fill_sensor_prop(dev);
+	device_printf(dev, "%d sensors detected!\n", sc->sc_nsensors);
+	if (sc->sc_nsensors == 0) {
+		device_printf(dev, "WARNING: No sensors detected!\n");
+		return;
+	}
+
+	ctx = device_get_sysctl_ctx(dev);
+	sensroot_oid = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensors",
+	    CTLFLAG_RD, 0, "ADT Sensor Information");
+
+	/* Add the sysctl for the sensors. */
+	for (i = 0; i < sc->sc_nsensors; i++) {
+		for (j = 0; j < strlen(sc->sc_sensors[i].therm.name); j++) {
+			sysctl_name[j] = tolower(sc->sc_sensors[i].therm.name[j]);
+			if (isspace(sysctl_name[j]))
+				sysctl_name[j] = '_';
+		}
+		sysctl_name[j] = 0;
+		oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sensroot_oid),
+				      OID_AUTO,
+				      sysctl_name, CTLFLAG_RD, 0,
+				      "Sensor Information");
+		if (sc->sc_sensors[i].type == ADT746X_SENSOR_TEMP) {
+			unit = "temp";
+			desc = "sensor unit (C)";
+		} else if (sc->sc_sensors[i].type == ADT746X_SENSOR_VOLT) {
+			unit = "volt";
+			desc = "sensor unit (mV)";
+		} else {
+			unit = "rpm";
+			desc = "sensor unit (RPM)";
+		}
+		/* I use i to pass the sensor id. */
+		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+				unit, CTLTYPE_INT | CTLFLAG_RD, dev, i,
+				adt746x_sensor_sysctl,
+				sc->sc_sensors[i].type == ADT746X_SENSOR_TEMP ?
+				"IK" : "I", desc);
+	}
+
+	/* Dump sensor location & type. */
+	if (bootverbose) {
+		for (i = 0; i < sc->sc_nsensors; i++) {
+			device_printf(dev, "Sensor location: %s",
+				      sc->sc_sensors[i].therm.name);
+			device_printf(dev, " type: %d id: %d reg: 0x%x\n",
+				      sc->sc_sensors[i].type,
+				      sc->sc_sensors[i].id,
+				      sc->sc_sensors[i].reg);
+		}
+	}
+}


Property changes on: trunk/sys/dev/iicbus/adt746x.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/iicbus/ds133x.c
===================================================================
--- trunk/sys/dev/iicbus/ds133x.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/ds133x.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 Stanislav Sedov <stas at FreeBSD.org>,
  *                    Rafal Jaworowski <raj at FreeBSD.org>,
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ds133x.c 246128 2013-01-30 18:01:20Z sbz $");
 /*
  * Dallas Semiconductor DS133X RTC sitting on the I2C bus.
  */
@@ -347,7 +348,7 @@
 	DEVMETHOD(clock_gettime,	ds133x_gettime),
 	DEVMETHOD(clock_settime,	ds133x_settime),
 
-	{0, 0},
+	DEVMETHOD_END
 };
 
 static driver_t ds133x_driver = {

Added: trunk/sys/dev/iicbus/ds1374.c
===================================================================
--- trunk/sys/dev/iicbus/ds1374.c	                        (rev 0)
+++ trunk/sys/dev/iicbus/ds1374.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,138 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2003-2012 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ds1374.c 235652 2012-05-19 17:42:11Z marcel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/clock.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/rman.h>
+
+#include <dev/iicbus/iiconf.h>
+#include <dev/iicbus/iicbus.h>
+
+#include "iicbus_if.h"
+#include "clock_if.h"
+
+#define	DS1374_RTC_COUNTER	0	/* counter (bytes 0-3) */
+
+struct ds1374_softc {
+	uint32_t	sc_addr;
+	device_t	sc_dev;
+};
+
+static int
+ds1374_probe(device_t dev)
+{
+	device_set_desc(dev, "DS1374 RTC");
+	return (0);
+}
+
+static int
+ds1374_attach(device_t dev)
+{
+	struct ds1374_softc *sc = device_get_softc(dev);
+
+	if(sc==NULL) {
+		printf("ds1374_attach device_get_softc failed\n");
+		return (0);
+	}
+	sc->sc_dev = dev;
+	sc->sc_addr = iicbus_get_addr(dev);
+
+	clock_register(dev, 1000);
+	return (0);
+}
+
+static int 
+ds1374_settime(device_t dev, struct timespec *ts)
+{
+	/* NB: register pointer precedes actual data */
+	uint8_t data[5] = { DS1374_RTC_COUNTER };
+	struct ds1374_softc *sc = device_get_softc(dev);
+	struct iic_msg msgs[1] = {
+	     { sc->sc_addr, IIC_M_WR, 5, data },
+	};
+
+	data[1] = (ts->tv_sec >> 0) & 0xff;
+	data[2] = (ts->tv_sec >> 8) & 0xff;
+	data[3] = (ts->tv_sec >> 16) & 0xff;
+	data[4] = (ts->tv_sec >> 24) & 0xff;
+
+	return iicbus_transfer(dev, msgs, 1);
+}
+
+static int
+ds1374_gettime(device_t dev, struct timespec *ts)
+{
+	struct ds1374_softc *sc = device_get_softc(dev);
+	uint8_t addr[1] = { DS1374_RTC_COUNTER };
+	uint8_t secs[4];
+	struct iic_msg msgs[2] = {
+	     { sc->sc_addr, IIC_M_WR, 1, addr },
+	     { sc->sc_addr, IIC_M_RD, 4, secs },
+	};
+	int error;
+
+	error = iicbus_transfer(dev, msgs, 2);
+	if (error == 0) {
+		/* counter has seconds since epoch */
+		ts->tv_sec = (secs[3] << 24) | (secs[2] << 16)
+			   | (secs[1] <<  8) | (secs[0] <<  0);
+		ts->tv_nsec = 0;
+	}
+	return error;
+}
+
+static device_method_t ds1374_methods[] = {
+	DEVMETHOD(device_probe,		ds1374_probe),
+	DEVMETHOD(device_attach,	ds1374_attach),
+
+	DEVMETHOD(clock_gettime,	ds1374_gettime),
+	DEVMETHOD(clock_settime,	ds1374_settime),
+
+	DEVMETHOD_END
+};
+
+static driver_t ds1374_driver = {
+	"ds1374_rtc",
+	ds1374_methods,
+	sizeof(struct ds1374_softc),
+};
+static devclass_t ds1374_devclass;
+
+DRIVER_MODULE(ds1374, iicbus, ds1374_driver, ds1374_devclass, 0, 0);
+MODULE_VERSION(ds1374, 1);
+MODULE_DEPEND(ds1374, iicbus, 1, 1, 1);


Property changes on: trunk/sys/dev/iicbus/ds1374.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/iicbus/ds1631.c
===================================================================
--- trunk/sys/dev/iicbus/ds1631.c	                        (rev 0)
+++ trunk/sys/dev/iicbus/ds1631.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,414 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Andreas Tobler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ds1631.c 260673 2014-01-15 05:52:06Z jhibbits $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/callout.h>
+#include <sys/conf.h>
+#include <sys/cpu.h>
+#include <sys/ctype.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/limits.h>
+
+#include <machine/bus.h>
+#include <machine/md_var.h>
+
+#include <dev/iicbus/iicbus.h>
+#include <dev/iicbus/iiconf.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <powerpc/powermac/powermac_thermal.h>
+
+/* Sensor: Maxim DS1631 */
+
+#define DS1631_STOP            0x22
+#define DS1631_START           0x51
+#define DS1631_RESET           0x54
+#define DS1631_TEMP            0xAA
+#define DS1631_CONTROL         0xAC
+#define DS1631_CONTROL_1SHOT   0x01
+#define DS1631_CONTROL_9BIT    0x00
+#define DS1631_CONTROL_10BIT   0x04
+#define DS1631_CONTROL_11BIT   0x08
+#define DS1631_CONTROL_12BIT   0x0C
+
+
+
+/* Regular bus attachment functions */
+static int  ds1631_probe(device_t);
+static int  ds1631_attach(device_t);
+
+struct ds1631_softc {
+	struct pmac_therm	sc_sensor;
+	device_t		sc_dev;
+	struct intr_config_hook enum_hook;
+	uint32_t                sc_addr;
+	uint32_t		init_done;
+};
+
+struct write_data {
+	uint8_t reg;
+	uint8_t val;
+};
+
+struct read_data {
+	uint8_t reg;
+	uint16_t val;
+};
+
+/* Utility functions */
+static int  ds1631_sensor_read(struct ds1631_softc *sc);
+static int  ds1631_sensor_sysctl(SYSCTL_HANDLER_ARGS);
+static void ds1631_start(void *xdev);
+static int  ds1631_read_1(device_t dev, uint32_t addr, uint8_t reg,
+			  uint8_t *data);
+static int  ds1631_read_2(device_t dev, uint32_t addr, uint8_t reg,
+			  uint16_t *data);
+static int  ds1631_write(device_t dev, uint32_t addr, uint8_t reg,
+			 uint8_t *buff, int len);
+
+static device_method_t  ds1631_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		ds1631_probe),
+	DEVMETHOD(device_attach,	ds1631_attach),
+	{ 0, 0 },
+};
+
+static driver_t ds1631_driver = {
+	"ds1631",
+	ds1631_methods,
+	sizeof(struct ds1631_softc)
+};
+
+static devclass_t ds1631_devclass;
+
+DRIVER_MODULE(ds1631, iicbus, ds1631_driver, ds1631_devclass, 0, 0);
+
+static int
+ds1631_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff, int len)
+{
+	uint8_t buf[4];
+	int try = 0;
+
+	struct iic_msg msg[] = {
+		{ addr, IIC_M_WR, 0, buf }
+	};
+
+	/* Prepare the write msg. */
+	msg[0].len = len + 1;
+	buf[0] = reg;
+	memcpy(buf + 1, buff, len);
+
+	for (;;)
+	{
+		if (iicbus_transfer(dev, msg, 1) == 0)
+			return (0);
+		if (++try > 5) {
+			device_printf(dev, "iicbus write failed\n");
+			return (-1);
+		}
+		pause("ds1631_write", hz);
+	}
+}
+
+static int
+ds1631_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data)
+{
+	uint8_t buf[4];
+	int err, try = 0;
+
+	struct iic_msg msg[2] = {
+		{ addr, IIC_M_WR, 1, &reg },
+		{ addr, IIC_M_RD, 1, buf },
+	};
+
+	for (;;)
+	{
+		err = iicbus_transfer(dev, msg, 2);
+		if (err != 0)
+			goto retry;
+
+		*data = *((uint8_t*)buf);
+		return (0);
+	retry:
+		if (++try > 5) {
+			device_printf(dev, "iicbus read failed\n");
+			return (-1);
+		}
+		pause("ds1631_read_1", hz);
+	}
+}
+
+static int
+ds1631_read_2(device_t dev, uint32_t addr, uint8_t reg, uint16_t *data)
+{
+	uint8_t buf[4];
+	int err, try = 0;
+
+	struct iic_msg msg[2] = {
+		{ addr, IIC_M_WR, 1, &reg },
+		{ addr, IIC_M_RD, 2, buf },
+	};
+
+	for (;;)
+	{
+		err = iicbus_transfer(dev, msg, 2);
+		if (err != 0)
+			goto retry;
+
+		*data = *((uint16_t*)buf);
+		return (0);
+	retry:
+		if (++try > 5) {
+			device_printf(dev, "iicbus read failed\n");
+			return (-1);
+		}
+		pause("ds1631_read_2", hz);
+	}
+}
+
+static int
+ds1631_probe(device_t dev)
+{
+	const char  *name, *compatible;
+	struct ds1631_softc *sc;
+
+	name = ofw_bus_get_name(dev);
+	compatible = ofw_bus_get_compat(dev);
+
+	if (!name)
+		return (ENXIO);
+
+	if (strcmp(name, "temp-monitor") != 0 ||
+	    strcmp(compatible, "ds1631") != 0 )
+		return (ENXIO);
+
+	sc = device_get_softc(dev);
+	sc->sc_dev = dev;
+	sc->sc_addr = iicbus_get_addr(dev);
+
+	device_set_desc(dev, "Temp-Monitor DS1631");
+
+	return (0);
+}
+
+static int
+ds1631_attach(device_t dev)
+{
+	struct ds1631_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	sc->enum_hook.ich_func = ds1631_start;
+	sc->enum_hook.ich_arg = dev;
+
+	/*
+	 * We have to wait until interrupts are enabled. I2C read and write
+	 * only works if the interrupts are available.
+	 * The unin/i2c is controlled by the htpic on unin. But this is not
+	 * the master. The openpic on mac-io is controlling the htpic.
+	 * This one gets attached after the mac-io probing and then the
+	 * interrupts will be available.
+	 */
+
+	if (config_intrhook_establish(&sc->enum_hook) != 0)
+		return (ENOMEM);
+
+	return (0);
+}
+static int
+ds1631_init(device_t dev, uint32_t addr)
+{
+	uint8_t conf;
+	int err;
+	struct ds1631_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	err = ds1631_read_1(dev, addr, DS1631_CONTROL, &conf);
+	if (err < 0) {
+		device_printf(dev, "ds1631 read config failed: %x\n", err);
+		return (-1);
+	}
+
+	/* Stop the conversion if not in 1SHOT mode. */
+	if (conf & ~DS1631_CONTROL_1SHOT)
+		err = ds1631_write(dev, addr, DS1631_STOP, &conf, 0);
+
+	/*
+	 * Setup the resolution, 10-bit is enough. Each bit increase in
+	 * resolution doubles the conversion time.
+	 */
+	conf = DS1631_CONTROL_10BIT;
+
+	err = ds1631_write(dev, addr, DS1631_CONTROL, &conf, 1);
+	if (err < 0) {
+		device_printf(dev, "ds1631 write config failed: %x\n", err);
+		return (-1);
+	}
+
+	/* And now start....*/
+	err = ds1631_write(dev, addr, DS1631_START, &conf, 0);
+
+	if (err < 0) {
+		device_printf(dev, "ds1631 write start failed: %x\n", err);
+		return (-1);
+	}
+
+	sc->init_done = 1;
+
+	return (0);
+
+}
+static void
+ds1631_start(void *xdev)
+{
+	phandle_t child, node;
+	struct ds1631_softc *sc;
+	struct sysctl_oid *oid, *sensroot_oid;
+	struct sysctl_ctx_list *ctx;
+	ssize_t plen;
+	int i;
+	char  sysctl_desc[40], sysctl_name[40];
+
+	device_t dev = (device_t)xdev;
+
+	sc = device_get_softc(dev);
+
+	child = ofw_bus_get_node(dev);
+
+	ctx = device_get_sysctl_ctx(dev);
+	sensroot_oid = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor",
+	    CTLFLAG_RD, 0, "DS1631 Sensor Information");
+
+	if (OF_getprop(child, "hwsensor-zone", &sc->sc_sensor.zone,
+		       sizeof(int)) < 0)
+		sc->sc_sensor.zone = 0;
+
+	plen = OF_getprop(child, "hwsensor-location", sc->sc_sensor.name,
+			  sizeof(sc->sc_sensor.name));
+	if (plen == -1) {
+		/*
+		 * Ok, no hwsensor-location property, so let's look for a
+		 * location property on a sub node.
+		 */
+		for (node = OF_child(child); node; node = OF_peer(node))
+			plen = OF_getprop(node, "location", sc->sc_sensor.name,
+					  sizeof(sc->sc_sensor.name));
+	}
+
+	if (plen == -1) {
+		strcpy(sysctl_name, "sensor");
+	} else {
+		for (i = 0; i < strlen(sc->sc_sensor.name); i++) {
+			sysctl_name[i] = tolower(sc->sc_sensor.name[i]);
+			if (isspace(sysctl_name[i]))
+				sysctl_name[i] = '_';
+		}
+		sysctl_name[i] = 0;
+	}
+
+	/* Make up target temperatures. These are low, for the drive bay. */
+	if (sc->sc_sensor.zone == 0) {
+		sc->sc_sensor.target_temp = 400 + ZERO_C_TO_K;
+		sc->sc_sensor.max_temp = 500 + ZERO_C_TO_K;
+	} else {
+		sc->sc_sensor.target_temp = 300 + ZERO_C_TO_K;
+		sc->sc_sensor.max_temp = 500 + ZERO_C_TO_K;
+	}
+
+	sc->sc_sensor.read =
+	    (int (*)(struct pmac_therm *sc))(ds1631_sensor_read);
+	pmac_thermal_sensor_register(&sc->sc_sensor);
+
+	sprintf(sysctl_desc,"%s %s", sc->sc_sensor.name, "(C)");
+	oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sensroot_oid),
+			      OID_AUTO, sysctl_name, CTLFLAG_RD, 0,
+			      "Sensor Information");
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "temp",
+			CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev,
+			0, ds1631_sensor_sysctl, "IK", sysctl_desc);
+
+	config_intrhook_disestablish(&sc->enum_hook);
+}
+
+static int
+ds1631_sensor_read(struct ds1631_softc *sc)
+{
+	uint16_t buf[2];
+	uint16_t read;
+	int err;
+
+	if (!sc->init_done)
+		ds1631_init(sc->sc_dev, sc->sc_addr);
+
+	err = ds1631_read_2(sc->sc_dev, sc->sc_addr, DS1631_TEMP, buf);
+	if (err < 0) {
+		device_printf(sc->sc_dev, "ds1631 read TEMP failed: %x\n", err);
+		return (-1);
+	}
+
+	read = *((int16_t *)buf);
+
+	/*
+	 * The default mode of the ADC is 12-bit, the resolution is 0.0625 C
+	 * per bit. The temperature is in tenth kelvin.
+	 * We use 10-bit resolution which seems enough, resolution is 0.25 C.
+	 */
+
+	return (((int16_t)(read) >> 6) * 25 / 10 + ZERO_C_TO_K);
+}
+
+static int
+ds1631_sensor_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev;
+	struct ds1631_softc *sc;
+	int error;
+	int temp;
+
+	dev = arg1;
+	sc = device_get_softc(dev);
+
+	temp = ds1631_sensor_read(sc);
+	if (temp < 0)
+		return (EIO);
+
+	error = sysctl_handle_int(oidp, &temp, 0, req);
+
+	return (error);
+}


Property changes on: trunk/sys/dev/iicbus/ds1631.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/iicbus/ds1672.c
===================================================================
--- trunk/sys/dev/iicbus/ds1672.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/ds1672.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 Sam Leffler.  All rights reserved.
  *
@@ -23,7 +24,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ds1672.c 246128 2013-01-30 18:01:20Z sbz $");
 /*
  * Dallas Semiconductor DS1672 RTC sitting on the I2C bus.
  */
@@ -167,7 +168,7 @@
 	DEVMETHOD(clock_gettime,	ds1672_gettime),
 	DEVMETHOD(clock_settime,	ds1672_settime),
 
-	{0, 0},
+	DEVMETHOD_END
 };
 
 static driver_t ds1672_driver = {

Modified: trunk/sys/dev/iicbus/ds1775.c
===================================================================
--- trunk/sys/dev/iicbus/ds1775.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/ds1775.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Andreas Tobler
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/ds1775.c 260673 2014-01-15 05:52:06Z jhibbits $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -87,7 +88,7 @@
 
 static devclass_t ds1775_devclass;
 
-DRIVER_MODULE(ds1755, iicbus, ds1775_driver, ds1775_devclass, 0, 0);
+DRIVER_MODULE(ds1775, iicbus, ds1775_driver, ds1775_devclass, 0, 0);
 
 static int
 ds1775_read_2(device_t dev, uint32_t addr, uint8_t reg, uint16_t *data)
@@ -172,12 +173,11 @@
 {
 	phandle_t child;
 	struct ds1775_softc *sc;
-	struct sysctl_oid *sensroot_oid;
+	struct sysctl_oid *oid, *sensroot_oid;
 	struct sysctl_ctx_list *ctx;
 	ssize_t plen;
 	int i;
 	char sysctl_name[40], sysctl_desc[40];
-	const char *units;
 
 	device_t dev = (device_t)xdev;
 
@@ -186,7 +186,9 @@
 	child = ofw_bus_get_node(dev);
 
 	ctx = device_get_sysctl_ctx(dev);
-	sensroot_oid = device_get_sysctl_tree(dev);
+	sensroot_oid = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor",
+	    CTLFLAG_RD, 0, "DS1775 Sensor Information");
 
 	if (OF_getprop(child, "hwsensor-zone", &sc->sc_sensor.zone,
 		       sizeof(int)) < 0)
@@ -194,7 +196,6 @@
 
 	plen = OF_getprop(child, "hwsensor-location", sc->sc_sensor.name,
 			  sizeof(sc->sc_sensor.name));
-	units = "C";
 
 	if (plen == -1) {
 		strcpy(sysctl_name, "sensor");
@@ -221,9 +222,11 @@
 	    (int (*)(struct pmac_therm *sc))(ds1775_sensor_read);
 	pmac_thermal_sensor_register(&sc->sc_sensor);
 
-	sprintf(sysctl_desc,"%s (%s)", sc->sc_sensor.name, units);
-	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(sensroot_oid), OID_AUTO,
-			sysctl_name,
+	sprintf(sysctl_desc,"%s %s", sc->sc_sensor.name, "(C)");
+	oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sensroot_oid),
+			      OID_AUTO, sysctl_name, CTLFLAG_RD, 0,
+			      "Sensor Information");
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "temp",
 			CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev,
 			0, ds1775_sensor_sysctl, "IK", sysctl_desc);
 
@@ -255,7 +258,7 @@
 	device_t dev;
 	struct ds1775_softc *sc;
 	int error;
-	unsigned int temp;
+	int temp;
 
 	dev = arg1;
 	sc = device_get_softc(dev);

Modified: trunk/sys/dev/iicbus/icee.c
===================================================================
--- trunk/sys/dev/iicbus/icee.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/icee.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 Warner Losh.  All rights reserved.
  *
@@ -23,10 +24,13 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/icee.c 294674 2016-01-24 18:54:55Z ian $");
 /*
  * Generic IIC eeprom support, modeled after the AT24C family of products.
  */
+
+#include "opt_platform.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
@@ -37,32 +41,73 @@
 #include <sys/sx.h>
 #include <sys/uio.h>
 #include <machine/bus.h>
+
+#ifdef FDT
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#endif
+
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
 
 #include "iicbus_if.h"
 
-#define	IIC_M_WR	0	/* write operation */
+/*
+ * AT24 parts have a "write page size" that differs per-device, and a "read page
+ * size" that is always equal to the full device size.  We define maximum values
+ * here to limit how long we occupy the bus with a single transfer, and because
+ * there are temporary buffers of these sizes allocated on the stack.
+ */
 #define	MAX_RD_SZ	256	/* Largest read size we support */
-#define MAX_WR_SZ	256	/* Largest write size we support */
+#define	MAX_WR_SZ	256	/* Largest write size we support */
 
 struct icee_softc {
-	device_t	sc_dev;		/* Myself */
-	struct sx	sc_lock;	/* basically a perimeter lock */
+	device_t	dev;		/* Myself */
 	struct cdev	*cdev;		/* user interface */
-	int		addr;
+	int		addr;		/* Slave address on the bus */
 	int		size;		/* How big am I? */
-	int		type;		/* What type 8 or 16 bit? */
-	int		rd_sz;		/* What's the read page size */
+	int		type;		/* What address type 8 or 16 bit? */
 	int		wr_sz;		/* What's the write page size */
 };
 
-#define ICEE_LOCK(_sc)		sx_xlock(&(_sc)->sc_lock)
-#define	ICEE_UNLOCK(_sc)	sx_xunlock(&(_sc)->sc_lock)
-#define ICEE_LOCK_INIT(_sc)	sx_init(&_sc->sc_lock, "icee")
-#define ICEE_LOCK_DESTROY(_sc)	sx_destroy(&_sc->sc_lock);
-#define ICEE_ASSERT_LOCKED(_sc)	sx_assert(&_sc->sc_lock, SA_XLOCKED);
-#define ICEE_ASSERT_UNLOCKED(_sc) sx_assert(&_sc->sc_lock, SA_UNLOCKED);
+#ifdef FDT
+struct eeprom_desc {
+	int	    type;
+	int	    size;
+	int	    wr_sz;
+	const char *name;
+};
+
+static struct eeprom_desc type_desc[] = {
+	{ 8,        128,   8, "AT24C01"},
+	{ 8,        256,   8, "AT24C02"},
+	{ 8,        512,  16, "AT24C04"},
+	{ 8,       1024,  16, "AT24C08"},
+	{ 8,   2 * 1024,  16, "AT24C16"},
+	{16,   4 * 1024,  32, "AT24C32"},
+	{16,   8 * 1024,  32, "AT24C64"},
+	{16,  16 * 1024,  64, "AT24C128"},
+	{16,  32 * 1024,  64, "AT24C256"},
+	{16,  64 * 1024, 128, "AT24C512"},
+	{16, 128 * 1024, 256, "AT24CM01"},
+};
+
+static struct ofw_compat_data compat_data[] = {
+	{"atmel,24c01",	  (uintptr_t)(&type_desc[0])},
+	{"atmel,24c02",	  (uintptr_t)(&type_desc[1])},
+	{"atmel,24c04",	  (uintptr_t)(&type_desc[2])},
+	{"atmel,24c08",	  (uintptr_t)(&type_desc[3])},
+	{"atmel,24c16",	  (uintptr_t)(&type_desc[4])},
+	{"atmel,24c32",	  (uintptr_t)(&type_desc[5])},
+	{"atmel,24c64",	  (uintptr_t)(&type_desc[6])},
+	{"atmel,24c128",  (uintptr_t)(&type_desc[7])},
+	{"atmel,24c256",  (uintptr_t)(&type_desc[8])},
+	{"atmel,24c512",  (uintptr_t)(&type_desc[9])},
+	{"atmel,24c1024", (uintptr_t)(&type_desc[10])},
+	{NULL,		  (uintptr_t)NULL},
+};
+#endif
+
 #define CDEV2SOFTC(dev)		((dev)->si_drv1)
 
 /* cdev routines */
@@ -81,45 +126,85 @@
 	.d_write = icee_write
 };
 
+#ifdef FDT
 static int
 icee_probe(device_t dev)
 {
-	/* XXX really probe? -- not until we know the size... */
+	struct eeprom_desc *d;
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	d = (struct eeprom_desc *)
+	    ofw_bus_search_compatible(dev, compat_data)->ocd_data;
+	if (d == NULL)
+		return (ENXIO);
+
+	device_set_desc(dev, d->name);
+	return (BUS_PROBE_DEFAULT);
+}
+
+static void
+icee_init(struct icee_softc *sc)
+{
+	struct eeprom_desc *d;
+
+	d = (struct eeprom_desc *)
+	    ofw_bus_search_compatible(sc->dev, compat_data)->ocd_data;
+	if (d == NULL)
+		return; /* attach will see sc->size == 0 and return error */
+
+	sc->size  = d->size;
+	sc->type  = d->type;
+	sc->wr_sz = d->wr_sz;
+}
+#else /* !FDT */
+static int
+icee_probe(device_t dev)
+{
+
 	device_set_desc(dev, "I2C EEPROM");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
+static void
+icee_init(struct icee_softc *sc)
+{
+	const char *dname;
+	int dunit;
+
+	dname = device_get_name(sc->dev);
+	dunit = device_get_unit(sc->dev);
+	resource_int_value(dname, dunit, "size", &sc->size);
+	resource_int_value(dname, dunit, "type", &sc->type);
+	resource_int_value(dname, dunit, "wr_sz", &sc->wr_sz);
+}
+#endif /* FDT */
+
 static int
 icee_attach(device_t dev)
 {
 	struct icee_softc *sc = device_get_softc(dev);
-	const char *dname;
-	int dunit, err;
 
-	sc->sc_dev = dev;
+	sc->dev = dev;
 	sc->addr = iicbus_get_addr(dev);
-	err = 0;
-	dname = device_get_name(dev);
-	dunit = device_get_unit(dev);
-	resource_int_value(dname, dunit, "size", &sc->size);
-	resource_int_value(dname, dunit, "type", &sc->type);
-	resource_int_value(dname, dunit, "rd_sz", &sc->rd_sz);
-	if (sc->rd_sz > MAX_RD_SZ)
-		sc->rd_sz = MAX_RD_SZ;
-	resource_int_value(dname, dunit, "wr_sz", &sc->wr_sz);
+	icee_init(sc);
+	if (sc->size == 0 || sc->type == 0 || sc->wr_sz == 0) {
+		device_printf(sc->dev, "Missing config data, "
+		    "these cannot be zero: size %d type %d wr_sz %d\n",
+		    sc->size, sc->type, sc->wr_sz);
+		return (EINVAL);
+	}
 	if (bootverbose)
-		device_printf(dev, "size: %d bytes bus_width: %d-bits\n",
+		device_printf(dev, "size: %d bytes, addressing: %d-bits\n",
 		    sc->size, sc->type);
 	sc->cdev = make_dev(&icee_cdevsw, device_get_unit(dev), UID_ROOT,
 	    GID_WHEEL, 0600, "icee%d", device_get_unit(dev));
 	if (sc->cdev == NULL) {
-		err = ENOMEM;
-		goto out;
+		return (ENOMEM);
 	}
 	sc->cdev->si_drv1 = sc;
-	ICEE_LOCK_INIT(sc);
-out:;
-	return (err);
+	return (0);
 }
 
 static int 
@@ -126,7 +211,7 @@
 icee_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 {
 
-    	return (0);
+	return (0);
 }
 
 static int
@@ -155,12 +240,11 @@
 		return (EIO);
 	if (sc->type != 8 && sc->type != 16)
 		return (EINVAL);
-	ICEE_LOCK(sc);
 	slave = error = 0;
 	while (uio->uio_resid > 0) {
 		if (uio->uio_offset >= sc->size)
 			break;
-		len = MIN(sc->rd_sz - (uio->uio_offset & (sc->rd_sz - 1)),
+		len = MIN(MAX_RD_SZ - (uio->uio_offset & (MAX_RD_SZ - 1)),
 		    uio->uio_resid);
 		switch (sc->type) {
 		case 8:
@@ -179,14 +263,15 @@
 		}
 		for (i = 0; i < 2; i++)
 			msgs[i].slave = slave;
-		error = iicbus_transfer(sc->sc_dev, msgs, 2);
-		if (error)
+		error = iicbus_transfer_excl(sc->dev, msgs, 2, IIC_INTRWAIT);
+		if (error) {
+			error = iic2errno(error);
 			break;
+		}
 		error = uiomove(data, len, uio);
 		if (error)
 			break;
 	}
-	ICEE_UNLOCK(sc);
 	return (error);
 }
 
@@ -214,7 +299,7 @@
 		return (EIO);
 	if (sc->type != 8 && sc->type != 16)
 		return (EINVAL);
-	ICEE_LOCK(sc);
+
 	slave = error = 0;
 	while (uio->uio_resid > 0) {
 		if (uio->uio_offset >= sc->size)
@@ -238,23 +323,23 @@
 		error = uiomove(data + sc->type / 8, len, uio);
 		if (error)
 			break;
-		error = iicbus_transfer(sc->sc_dev, wr, 1);
-		if (error)
+		error = iicbus_transfer_excl(sc->dev, wr, 1, IIC_INTRWAIT);
+		if (error) {
+			error = iic2errno(error);
 			break;
-		// Now wait for the write to be done by trying to read
-		// the part.
+		}
+		/* Read after write to wait for write-done. */
 		waitlimit = 10000;
 		rd[0].slave = slave;
-		do 
-		{
-		    error = iicbus_transfer(sc->sc_dev, rd, 1);
+		do {
+			error = iicbus_transfer_excl(sc->dev, rd, 1,
+			    IIC_INTRWAIT);
 		} while (waitlimit-- > 0 && error != 0);
 		if (error) {
-		    printf("waiting for write failed %d\n", error);
-		    break;
+			error = iic2errno(error);
+			break;
 		}
 	}
-	ICEE_UNLOCK(sc);
 	return error;
 }
 
@@ -262,7 +347,7 @@
 	DEVMETHOD(device_probe,		icee_probe),
 	DEVMETHOD(device_attach,	icee_attach),
 
-	{0, 0},
+	DEVMETHOD_END
 };
 
 static driver_t icee_driver = {

Modified: trunk/sys/dev/iicbus/if_ic.c
===================================================================
--- trunk/sys/dev/iicbus/if_ic.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/if_ic.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/if_ic.c 255471 2013-09-11 09:19:44Z glebius $");
 
 /*
  * I2C bus IP driver
@@ -99,7 +100,7 @@
 static int icattach(device_t);
 
 static int icioctl(struct ifnet *, u_long, caddr_t);
-static int icoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int icoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
                struct route *);
 
 static int icintr(device_t, int, char *);
@@ -204,7 +205,6 @@
 
 	switch (cmd) {
 
-	case SIOCSIFDSTADDR:
 	case SIOCAIFADDR:
 	case SIOCSIFADDR:
 		if (ifa->ifa_addr->sa_family != AF_INET)
@@ -351,7 +351,7 @@
  * icoutput()
  */
 static int
-icoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+icoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct ic_softc *sc = ifp->if_softc;

Modified: trunk/sys/dev/iicbus/iic.c
===================================================================
--- trunk/sys/dev/iicbus/iic.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iic.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/iicbus/iic.c 300948 2016-05-29 07:14:51Z jah $
  *
  */
 #include <sys/param.h>
@@ -37,6 +38,7 @@
 #include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
+#include <sys/errno.h>
 
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
@@ -44,28 +46,32 @@
 
 #include "iicbus_if.h"
 
-#define BUFSIZE 1024
-
 struct iic_softc {
-
 	device_t sc_dev;
-	u_char sc_addr;			/* 7 bit address on iicbus */
-	int sc_count;			/* >0 if device opened */
-
-	char sc_buffer[BUFSIZE];	/* output buffer */
-	char sc_inbuf[BUFSIZE];		/* input buffer */
-
 	struct cdev *sc_devnode;
-	struct sx sc_lock;
 };
 
-#define	IIC_LOCK(sc)			sx_xlock(&(sc)->sc_lock)
-#define	IIC_UNLOCK(sc)			sx_xunlock(&(sc)->sc_lock)
+struct iic_cdevpriv {
+	struct sx lock;
+	struct iic_softc *sc;
+	bool started;
+	uint8_t addr;
+};
 
+
+#define	IIC_LOCK(cdp)			sx_xlock(&(cdp)->lock)
+#define	IIC_UNLOCK(cdp)			sx_xunlock(&(cdp)->lock)
+
+static MALLOC_DEFINE(M_IIC, "iic", "I2C device data");
+
 static int iic_probe(device_t);
 static int iic_attach(device_t);
 static int iic_detach(device_t);
 static void iic_identify(driver_t *driver, device_t parent);
+static void iicdtor(void *data);
+static int iicuio_move(struct iic_cdevpriv *priv, struct uio *uio, int last);
+static int iicuio(struct cdev *dev, struct uio *uio, int ioflag);
+static int iicrdwr(struct iic_cdevpriv *priv, struct iic_rdwr_data *d, int flags);
 
 static devclass_t iic_devclass;
 
@@ -89,18 +95,13 @@
 };
 
 static	d_open_t	iicopen;
-static	d_close_t	iicclose;
-static	d_write_t	iicwrite;
-static	d_read_t	iicread;
 static	d_ioctl_t	iicioctl;
 
 static struct cdevsw iic_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_TRACKCLOSE,
 	.d_open =	iicopen,
-	.d_close =	iicclose,
-	.d_read =	iicread,
-	.d_write =	iicwrite,
+	.d_read =	iicuio,
+	.d_write =	iicuio,
 	.d_ioctl =	iicioctl,
 	.d_name =	"iic",
 };
@@ -127,16 +128,15 @@
 static int
 iic_attach(device_t dev)
 {
-	struct iic_softc *sc = (struct iic_softc *)device_get_softc(dev);
+	struct iic_softc *sc;
 
+	sc = device_get_softc(dev);
 	sc->sc_dev = dev;
-	sx_init(&sc->sc_lock, "iic");
 	sc->sc_devnode = make_dev(&iic_cdevsw, device_get_unit(dev),
 			UID_ROOT, GID_WHEEL,
 			0600, "iic%d", device_get_unit(dev));
 	if (sc->sc_devnode == NULL) {
 		device_printf(dev, "failed to create character device\n");
-		sx_destroy(&sc->sc_lock);
 		return (ENXIO);
 	}
 	sc->sc_devnode->si_drv1 = sc;
@@ -147,11 +147,12 @@
 static int
 iic_detach(device_t dev)
 {
-	struct iic_softc *sc = (struct iic_softc *)device_get_softc(dev);
+	struct iic_softc *sc;
 
+	sc = device_get_softc(dev);
+
 	if (sc->sc_devnode)
 		destroy_dev(sc->sc_devnode);
-	sx_destroy(&sc->sc_lock);
 
 	return (0);
 }
@@ -159,123 +160,197 @@
 static int
 iicopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
-	struct iic_softc *sc = dev->si_drv1;
+	struct iic_cdevpriv *priv;
+	int error;
 
-	IIC_LOCK(sc);
-	if (sc->sc_count > 0) {
-		IIC_UNLOCK(sc);
-		return (EBUSY);
-	}
+	priv = malloc(sizeof(*priv), M_IIC, M_WAITOK | M_ZERO);
 
-	sc->sc_count++;
-	IIC_UNLOCK(sc);
+	sx_init(&priv->lock, "iic");
+	priv->sc = dev->si_drv1;
 
-	return (0);
+	error = devfs_set_cdevpriv(priv, iicdtor); 
+	if (error != 0)
+		free(priv, M_IIC);
+
+	return (error);
 }
 
-static int
-iicclose(struct cdev *dev, int flags, int fmt, struct thread *td)
+static void
+iicdtor(void *data)
 {
-	struct iic_softc *sc = dev->si_drv1;
+	device_t iicdev, parent;
+	struct iic_cdevpriv *priv;
 
-	IIC_LOCK(sc);
-	if (!sc->sc_count) {
-		/* XXX: I don't think this can happen. */
-		IIC_UNLOCK(sc);
-		return (EINVAL);
-	}
+	priv = data;
+	KASSERT(priv != NULL, ("iic cdevpriv should not be NULL!"));
 
-	sc->sc_count--;
+	iicdev = priv->sc->sc_dev;
+	parent = device_get_parent(iicdev);
 
-	if (sc->sc_count < 0)
-		panic("%s: iic_count < 0!", __func__);
-	IIC_UNLOCK(sc);
+	if (priv->started) {
+		iicbus_stop(parent);
+		iicbus_reset(parent, IIC_UNKNOWN, 0, NULL);
+		iicbus_release_bus(parent, iicdev);
+	}
 
-	return (0);
+	sx_destroy(&priv->lock);
+	free(priv, M_IIC);
 }
 
 static int
-iicwrite(struct cdev *dev, struct uio * uio, int ioflag)
+iicuio_move(struct iic_cdevpriv *priv, struct uio *uio, int last)
 {
-	struct iic_softc *sc = dev->si_drv1;
-	device_t iicdev = sc->sc_dev;
-	int sent, error, count;
+	device_t parent;
+	int error, num_bytes, transferred_bytes, written_bytes;
+	char buffer[128];
 
-	IIC_LOCK(sc);
-	if (!sc->sc_addr) {
-		IIC_UNLOCK(sc);
-		return (EINVAL);
+	parent = device_get_parent(priv->sc->sc_dev);
+	error = 0;
+
+	/*
+	 * We can only transfer up to sizeof(buffer) bytes in 1 shot, so loop until
+	 * everything has been transferred.
+	*/
+	while ((error == 0) && (uio->uio_resid > 0)) {
+
+		num_bytes = MIN(uio->uio_resid, sizeof(buffer));
+		transferred_bytes = 0;
+
+		if (uio->uio_rw == UIO_WRITE) {
+			error = uiomove(buffer, num_bytes, uio);
+
+			while ((error == 0) && (transferred_bytes < num_bytes)) {
+				written_bytes = 0;
+				error = iicbus_write(parent, &buffer[transferred_bytes],
+				    num_bytes - transferred_bytes, &written_bytes, 0);
+				transferred_bytes += written_bytes;
+			}
+				
+		} else if (uio->uio_rw == UIO_READ) {
+			error = iicbus_read(parent, buffer,
+			    num_bytes, &transferred_bytes,
+			    ((uio->uio_resid <= sizeof(buffer)) ? last : 0), 0);
+			if (error == 0)
+				error = uiomove(buffer, transferred_bytes, uio);
+		}
 	}
 
-	if (sc->sc_count == 0) {
-		/* XXX: I don't think this can happen. */
-		IIC_UNLOCK(sc);
-		return (EINVAL);
+	return (error);
+}
+
+static int
+iicuio(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	device_t parent;
+	struct iic_cdevpriv *priv;
+	int error;
+	uint8_t addr;
+
+	priv = NULL;
+	error = devfs_get_cdevpriv((void**)&priv);
+
+	if (error != 0)
+		return (error);
+	KASSERT(priv != NULL, ("iic cdevpriv should not be NULL!"));
+
+	IIC_LOCK(priv);
+	if (priv->started || (priv->addr == 0)) {
+		IIC_UNLOCK(priv);
+		return (ENXIO);
 	}
+	parent = device_get_parent(priv->sc->sc_dev);
 
-	error = iicbus_request_bus(device_get_parent(iicdev), iicdev,
-	    IIC_DONTWAIT);
-	if (error) {
-		IIC_UNLOCK(sc);
+	error = iicbus_request_bus(parent, priv->sc->sc_dev,
+	    (ioflag & O_NONBLOCK) ? IIC_DONTWAIT : (IIC_WAIT | IIC_INTR));
+	if (error != 0) {
+		IIC_UNLOCK(priv);
 		return (error);
 	}
 
-	count = min(uio->uio_resid, BUFSIZE);
-	uiomove(sc->sc_buffer, count, uio);
+	if (uio->uio_rw == UIO_READ)
+		addr = priv->addr | LSB;
+	else
+		addr = priv->addr & ~LSB;
 
-	error = iicbus_block_write(device_get_parent(iicdev), sc->sc_addr,
-					sc->sc_buffer, count, &sent);
+	error = iicbus_start(parent, addr, 0);
+	if (error != 0)
+	{
+		iicbus_release_bus(parent, priv->sc->sc_dev);
+		IIC_UNLOCK(priv);
+		return (error);
+	}
 
-	iicbus_release_bus(device_get_parent(iicdev), iicdev);
-	IIC_UNLOCK(sc);
+	error = iicuio_move(priv, uio, IIC_LAST_READ);
 
+	iicbus_stop(parent);
+	iicbus_release_bus(parent, priv->sc->sc_dev);
+	IIC_UNLOCK(priv);
 	return (error);
 }
 
 static int
-iicread(struct cdev *dev, struct uio * uio, int ioflag)
+iicrdwr(struct iic_cdevpriv *priv, struct iic_rdwr_data *d, int flags)
 {
-	struct iic_softc *sc = dev->si_drv1;
-	device_t iicdev = sc->sc_dev;
-	int len, error = 0;
-	int bufsize;
+	struct iic_msg *buf, *m;
+	void **usrbufs;
+	device_t iicdev, parent;
+	int error, i;
 
-	IIC_LOCK(sc);
-	if (!sc->sc_addr) {
-		IIC_UNLOCK(sc);
-		return (EINVAL);
-	}
+	iicdev = priv->sc->sc_dev;
+	parent = device_get_parent(iicdev);
+	error = 0;
 
-	if (sc->sc_count == 0) {
-		/* XXX: I don't think this can happen. */
-		IIC_UNLOCK(sc);
+	if (d->nmsgs > IIC_RDRW_MAX_MSGS)
 		return (EINVAL);
-	}
 
-	error = iicbus_request_bus(device_get_parent(iicdev), iicdev,
-	    IIC_DONTWAIT);
-	if (error) {
-		IIC_UNLOCK(sc);
+	buf = malloc(sizeof(*d->msgs) * d->nmsgs, M_IIC, M_WAITOK);
+
+	error = copyin(d->msgs, buf, sizeof(*d->msgs) * d->nmsgs);
+	if (error != 0) {
+		free(buf, M_IIC);
 		return (error);
 	}
 
-	/* max amount of data to read */
-	len = min(uio->uio_resid, BUFSIZE);
+	/* Alloc kernel buffers for userland data, copyin write data */
+	usrbufs = malloc(sizeof(void *) * d->nmsgs, M_IIC, M_WAITOK | M_ZERO);
 
-	error = iicbus_block_read(device_get_parent(iicdev), sc->sc_addr,
-	    sc->sc_inbuf, len, &bufsize);
-	if (error) {
-		IIC_UNLOCK(sc);
-		return (error);
+	for (i = 0; i < d->nmsgs; i++) {
+		m = &(buf[i]);
+		usrbufs[i] = m->buf;
+
+		/*
+		 * At least init the buffer to NULL so we can safely free() it later.
+		 * If the copyin() to buf failed, don't try to malloc bogus m->len.
+		 */
+		m->buf = NULL;
+		if (error != 0)
+			continue;
+
+		/* m->len is uint16_t, so allocation size is capped at 64K. */
+		m->buf = malloc(m->len, M_IIC, M_WAITOK);
+		if (!(m->flags & IIC_M_RD))
+			error = copyin(usrbufs[i], m->buf, m->len);
 	}
 
-	if (bufsize > uio->uio_resid)
-		panic("%s: too much data read!", __func__);
+	if (error == 0)
+		error = iicbus_request_bus(parent, iicdev,
+		    (flags & O_NONBLOCK) ? IIC_DONTWAIT : (IIC_WAIT | IIC_INTR));
 
-	iicbus_release_bus(device_get_parent(iicdev), iicdev);
+	if (error == 0) {
+		error = iicbus_transfer(iicdev, buf, d->nmsgs);
+		iicbus_release_bus(parent, iicdev);
+	}
 
-	error = uiomove(sc->sc_inbuf, bufsize, uio);
-	IIC_UNLOCK(sc);
+	/* Copyout all read segments, free up kernel buffers */
+	for (i = 0; i < d->nmsgs; i++) {
+		m = &(buf[i]);
+		if ((error == 0) && (m->flags & IIC_M_RD))
+			error = copyout(m->buf, usrbufs[i], m->len);
+		free(m->buf, M_IIC);
+	}
+
+	free(usrbufs, M_IIC);
+	free(buf, M_IIC);
 	return (error);
 }
 
@@ -282,105 +357,143 @@
 static int
 iicioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, struct thread *td)
 {
-	struct iic_softc *sc = dev->si_drv1;
-	device_t iicdev = sc->sc_dev;
-	device_t parent = device_get_parent(iicdev);
-	struct iiccmd *s = (struct iiccmd *)data;
-	struct iic_rdwr_data *d = (struct iic_rdwr_data *)data;
-	struct iic_msg *m;
-	int error, count, i;
-	char *buf = NULL;
-	void **usrbufs = NULL;
+	device_t parent, iicdev;
+	struct iiccmd *s;
+	struct uio ubuf;
+	struct iovec uvec;
+	struct iic_cdevpriv *priv;
+	int error;
 
-	if ((error = iicbus_request_bus(parent, iicdev,
-	    (flags & O_NONBLOCK) ? IIC_DONTWAIT : (IIC_WAIT | IIC_INTR))))
+	s = (struct iiccmd *)data;
+	error = devfs_get_cdevpriv((void**)&priv);
+	if (error != 0)
 		return (error);
 
+	KASSERT(priv != NULL, ("iic cdevpriv should not be NULL!"));
+
+	iicdev = priv->sc->sc_dev;
+	parent = device_get_parent(iicdev);
+	IIC_LOCK(priv);
+
+
 	switch (cmd) {
 	case I2CSTART:
-		IIC_LOCK(sc);
-		error = iicbus_start(parent, s->slave, 0);
+		if (priv->started) {
+			error = EINVAL;
+			break;
+		}
+		error = iicbus_request_bus(parent, iicdev,
+		    (flags & O_NONBLOCK) ? IIC_DONTWAIT : (IIC_WAIT | IIC_INTR));
 
-		/*
-		 * Implicitly set the chip addr to the slave addr passed as
-		 * parameter. Consequently, start/stop shall be called before
-		 * the read or the write of a block.
-		 */
-		if (!error)
-			sc->sc_addr = s->slave;
-		IIC_UNLOCK(sc);
+		if (error == 0)
+			error = iicbus_start(parent, s->slave, 0);
 
+		if (error == 0) {
+			priv->addr = s->slave;
+			priv->started = true;
+		} else
+			iicbus_release_bus(parent, iicdev);
+
 		break;
 
 	case I2CSTOP:
-		error = iicbus_stop(parent);
+		if (priv->started) {
+			error = iicbus_stop(parent);
+			iicbus_release_bus(parent, iicdev);
+			priv->started = false;
+		}
+
 		break;
 
 	case I2CRSTCARD:
-		error = iicbus_reset(parent, IIC_UNKNOWN, 0, NULL);
+		/*
+		 * Bus should be owned before we reset it.
+		 * We allow the bus to be already owned as the result of an in-progress
+		 * sequence; however, bus reset will always be followed by release
+		 * (a new start is presumably needed for I/O anyway). */ 
+		if (!priv->started)	
+			error = iicbus_request_bus(parent, iicdev,
+			    (flags & O_NONBLOCK) ? IIC_DONTWAIT : (IIC_WAIT | IIC_INTR));
+
+		if (error == 0) {
+			error = iicbus_reset(parent, IIC_UNKNOWN, 0, NULL);
+			/*
+			 * Ignore IIC_ENOADDR as it only means we have a master-only
+			 * controller.
+			 */
+			if (error == IIC_ENOADDR)
+				error = 0;
+
+			iicbus_release_bus(parent, iicdev);
+			priv->started = false;
+		}
 		break;
 
 	case I2CWRITE:
-		if (s->count <= 0) {
+		if (!priv->started) {
 			error = EINVAL;
 			break;
 		}
-		buf = malloc((unsigned long)s->count, M_TEMP, M_WAITOK);
-		error = copyin(s->buf, buf, s->count);
-		if (error)
-			break;
-		error = iicbus_write(parent, buf, s->count, &count, 10);
+		uvec.iov_base = s->buf;
+		uvec.iov_len = s->count;
+		ubuf.uio_iov = &uvec;
+		ubuf.uio_iovcnt = 1;
+		ubuf.uio_segflg = UIO_USERSPACE;
+		ubuf.uio_td = td;
+		ubuf.uio_resid = s->count;
+		ubuf.uio_offset = 0;
+		ubuf.uio_rw = UIO_WRITE;
+		error = iicuio_move(priv, &ubuf, 0);
 		break;
 
 	case I2CREAD:
-		if (s->count <= 0) {
+		if (!priv->started) {
 			error = EINVAL;
 			break;
 		}
-		buf = malloc((unsigned long)s->count, M_TEMP, M_WAITOK);
-		error = iicbus_read(parent, buf, s->count, &count, s->last, 10);
-		if (error)
-			break;
-		error = copyout(buf, s->buf, s->count);
+		uvec.iov_base = s->buf;
+		uvec.iov_len = s->count;
+		ubuf.uio_iov = &uvec;
+		ubuf.uio_iovcnt = 1;
+		ubuf.uio_segflg = UIO_USERSPACE;
+		ubuf.uio_td = td;
+		ubuf.uio_resid = s->count;
+		ubuf.uio_offset = 0;
+		ubuf.uio_rw = UIO_READ;
+		error = iicuio_move(priv, &ubuf, s->last);
 		break;
 
 	case I2CRDWR:
-		buf = malloc(sizeof(*d->msgs) * d->nmsgs, M_TEMP, M_WAITOK);
-		error = copyin(d->msgs, buf, sizeof(*d->msgs) * d->nmsgs);
-		if (error)
+		/*
+		 * The rdwr list should be a self-contained set of
+		 * transactions.  Fail if another transaction is in progress.
+                 */
+		if (priv->started) {
+			error = EINVAL;
 			break;
-		/* Alloc kernel buffers for userland data, copyin write data */
-		usrbufs = malloc(sizeof(void *) * d->nmsgs, M_TEMP, M_ZERO | M_WAITOK);
-		for (i = 0; i < d->nmsgs; i++) {
-			m = &((struct iic_msg *)buf)[i];
-			usrbufs[i] = m->buf;
-			m->buf = malloc(m->len, M_TEMP, M_WAITOK);
-			if (!(m->flags & IIC_M_RD))
-				copyin(usrbufs[i], m->buf, m->len);
 		}
-		error = iicbus_transfer(iicdev, (struct iic_msg *)buf, d->nmsgs);
-		/* Copyout all read segments, free up kernel buffers */
-		for (i = 0; i < d->nmsgs; i++) {
-			m = &((struct iic_msg *)buf)[i];
-			if (m->flags & IIC_M_RD)
-				copyout(m->buf, usrbufs[i], m->len);
-			free(m->buf, M_TEMP);
-		}
-		free(usrbufs, M_TEMP);
+
+		error = iicrdwr(priv, (struct iic_rdwr_data *)data, flags);
+
 		break;
 
 	case I2CRPTSTART:
+		if (!priv->started) {
+			error = EINVAL;
+			break;
+		}
 		error = iicbus_repeated_start(parent, s->slave, 0);
 		break;
 
+	case I2CSADDR:
+		priv->addr = *((uint8_t*)data);
+		break;
+
 	default:
 		error = ENOTTY;
 	}
 
-	iicbus_release_bus(parent, iicdev);
-
-	if (buf != NULL)
-		free(buf, M_TEMP);
+	IIC_UNLOCK(priv);
 	return (error);
 }
 

Modified: trunk/sys/dev/iicbus/iic.h
===================================================================
--- trunk/sys/dev/iicbus/iic.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iic.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Nicolas Souchu
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/iicbus/iic.h 300948 2016-05-29 07:14:51Z jah $
  *
  */
 #ifndef __IIC_H
@@ -56,6 +57,8 @@
 	uint32_t nmsgs;
 };
 
+#define IIC_RDRW_MAX_MSGS	42
+
 #define I2CSTART	_IOW('i', 1, struct iiccmd)	/* start condition */
 #define I2CSTOP		_IO('i', 2)			/* stop condition */
 #define I2CRSTCARD	_IOW('i', 3, struct iiccmd)	/* reset the card */
@@ -63,5 +66,6 @@
 #define I2CREAD		_IOW('i', 5, struct iiccmd)	/* receive data */
 #define I2CRDWR		_IOW('i', 6, struct iic_rdwr_data)	/* General read/write interface */
 #define I2CRPTSTART	_IOW('i', 7, struct iiccmd)	/* repeated start */
+#define I2CSADDR	_IOW('i', 8, uint8_t)		/* set slave address for future I/O */
 
 #endif

Modified: trunk/sys/dev/iicbus/iicbb.c
===================================================================
--- trunk/sys/dev/iicbus/iicbb.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicbb.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/iicbb.c 294490 2016-01-21 08:32:11Z dumbbell $");
 
 /*
  * Generic I2C bit-banging code
@@ -43,6 +44,8 @@
  *
  */
 
+#include "opt_platform.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
@@ -50,6 +53,11 @@
 #include <sys/bus.h>
 #include <sys/uio.h>
 
+#ifdef FDT
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/fdt/fdt_common.h>
+#endif
 
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
@@ -61,6 +69,7 @@
 
 struct iicbb_softc {
 	device_t iicbus;
+	int udelay;		/* signal toggle delay in usec */
 };
 
 static int iicbb_attach(device_t);
@@ -75,6 +84,10 @@
 static int iicbb_write(device_t, const char *, int, int *, int);
 static int iicbb_read(device_t, char *, int, int *, int, int);
 static int iicbb_reset(device_t, u_char, u_char, u_char *);
+static int iicbb_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs);
+#ifdef FDT
+static phandle_t iicbb_get_node(device_t, device_t);
+#endif
 
 static device_method_t iicbb_methods[] = {
 	/* device interface */
@@ -94,8 +107,13 @@
 	DEVMETHOD(iicbus_write,		iicbb_write),
 	DEVMETHOD(iicbus_read,		iicbb_read),
 	DEVMETHOD(iicbus_reset,		iicbb_reset),
-	DEVMETHOD(iicbus_transfer,	iicbus_transfer_gen),
+	DEVMETHOD(iicbus_transfer,	iicbb_transfer),
 
+#ifdef FDT
+	/* ofw_bus interface */
+	DEVMETHOD(ofw_bus_get_node,	iicbb_get_node),
+#endif
+
 	{ 0, 0 }
 };
 
@@ -123,6 +141,7 @@
 	sc->iicbus = device_add_child(dev, "iicbus", -1);
 	if (!sc->iicbus)
 		return (ENXIO);
+	sc->udelay = 10;		/* 10 uS default */
 	bus_generic_attach(dev);
 
 	return (0);
@@ -131,26 +150,23 @@
 static int
 iicbb_detach(device_t dev)
 {
-	struct iicbb_softc *sc = (struct iicbb_softc *)device_get_softc(dev);
-	device_t child;
 
-	/*
-	 * We need to save child because the detach indirectly causes
-	 * sc->iicbus to be zeroed.  Since we added the device
-	 * unconditionally in iicbb_attach, we need to make sure we
-	 * delete it here.  See iicbb_child_detached.  We need that
-	 * callback in case newbus detached our children w/o detaching
-	 * us (say iicbus is a module and unloaded w/o iicbb being
-	 * unloaded).
-	 */
-	child = sc->iicbus;
 	bus_generic_detach(dev);
-	if (child)
-		device_delete_child(dev, child);
+	device_delete_children(dev);
 
 	return (0);
 }
 
+#ifdef FDT
+static phandle_t
+iicbb_get_node(device_t bus, device_t dev)
+{
+
+	/* We only have one child, the I2C bus, which needs our own node. */
+	return (ofw_bus_get_node(bus));
+}
+#endif
+
 static void
 iicbb_child_detached( device_t dev, device_t child )
 {
@@ -184,11 +200,9 @@
 	return (retval);
 }
 
-#define IIC_DELAY	10
-
-#define I2C_SETSDA(dev,val) do {			\
+#define I2C_SETSDA(sc,dev,val) do {			\
 	IICBB_SETSDA(device_get_parent(dev), val);	\
-	DELAY(IIC_DELAY);				\
+	DELAY(sc->udelay);				\
 	} while (0)
 
 #define I2C_SETSCL(dev,val) do {			\
@@ -195,9 +209,9 @@
 	iicbb_setscl(dev, val, 100);			\
 	} while (0)
 
-#define I2C_SET(dev,ctrl,data) do {			\
+#define I2C_SET(sc,dev,ctrl,data) do {			\
 	I2C_SETSCL(dev, ctrl);				\
-	I2C_SETSDA(dev, data);				\
+	I2C_SETSDA(sc, dev, data);			\
 	} while (0)
 
 #define I2C_GETSDA(dev) (IICBB_GETSDA(device_get_parent(dev)))
@@ -216,16 +230,17 @@
 static void
 iicbb_setscl(device_t dev, int val, int timeout)
 {
+	struct iicbb_softc *sc = device_get_softc(dev);
 	int k = 0;
 
 	IICBB_SETSCL(device_get_parent(dev), val);
-	DELAY(IIC_DELAY);
+	DELAY(sc->udelay);
 
 	while (val && !I2C_GETSCL(dev) && k++ < timeout) {
 		IICBB_SETSCL(device_get_parent(dev), val);
-		DELAY(IIC_DELAY);
+		DELAY(sc->udelay);
 	}
-		
+
 	return;
 }
 
@@ -232,9 +247,11 @@
 static void
 iicbb_one(device_t dev, int timeout)
 {
-	I2C_SET(dev,0,1);
-	I2C_SET(dev,1,1);
-	I2C_SET(dev,0,1);
+	struct iicbb_softc *sc = device_get_softc(dev);
+
+	I2C_SET(sc,dev,0,1);
+	I2C_SET(sc,dev,1,1);
+	I2C_SET(sc,dev,0,1);
 	return;
 }
 
@@ -241,9 +258,11 @@
 static void
 iicbb_zero(device_t dev, int timeout)
 {
-	I2C_SET(dev,0,0);
-	I2C_SET(dev,1,0);
-	I2C_SET(dev,0,0);
+	struct iicbb_softc *sc = device_get_softc(dev);
+
+	I2C_SET(sc,dev,0,0);
+	I2C_SET(sc,dev,1,0);
+	I2C_SET(sc,dev,0,0);
 	return;
 }
 
@@ -264,20 +283,21 @@
 static int
 iicbb_ack(device_t dev, int timeout)
 {
+	struct iicbb_softc *sc = device_get_softc(dev);
 	int noack;
 	int k = 0;
-    
-	I2C_SET(dev,0,1);
-	I2C_SET(dev,1,1);
+
+	I2C_SET(sc,dev,0,1);
+	I2C_SET(sc,dev,1,1);
 	do {
 		noack = I2C_GETSDA(dev);
 		if (!noack)
 			break;
-		DELAY(10);
-		k += 10;
+		DELAY(1);
+		k++;
 	} while (k < timeout);
 
-	I2C_SET(dev,0,1);
+	I2C_SET(sc,dev,0,1);
 	I2C_DEBUG(printf("%c ",noack?'-':'+'));
 
 	return (noack);
@@ -302,16 +322,17 @@
 static u_char
 iicbb_readbyte(device_t dev, int last, int timeout)
 {
+	struct iicbb_softc *sc = device_get_softc(dev);
 	int i;
 	unsigned char data=0;
-    
-	I2C_SET(dev,0,1);
+
+	I2C_SET(sc,dev,0,1);
 	for (i=7; i>=0; i--) 
 	{
-		I2C_SET(dev,1,1);
+		I2C_SET(sc,dev,1,1);
 		if (I2C_GETSDA(dev))
 			data |= (1<<i);
-		I2C_SET(dev,0,1);
+		I2C_SET(sc,dev,0,1);
 	}
 	if (last) {
 		iicbb_one(dev, timeout);
@@ -337,13 +358,14 @@
 static int
 iicbb_start(device_t dev, u_char slave, int timeout)
 {
+	struct iicbb_softc *sc = device_get_softc(dev);
 	int error;
 
 	I2C_DEBUG(printf("<"));
 
-	I2C_SET(dev,1,1);
-	I2C_SET(dev,1,0);
-	I2C_SET(dev,0,0);
+	I2C_SET(sc,dev,1,1);
+	I2C_SET(sc,dev,1,0);
+	I2C_SET(sc,dev,0,0);
 
 	/* send address */
 	iicbb_sendbyte(dev, slave, timeout);
@@ -364,10 +386,13 @@
 static int
 iicbb_stop(device_t dev)
 {
-	I2C_SET(dev,0,0);
-	I2C_SET(dev,1,0);
-	I2C_SET(dev,1,1);
+	struct iicbb_softc *sc = device_get_softc(dev);
+
+	I2C_SET(sc,dev,0,0);
+	I2C_SET(sc,dev,1,0);
+	I2C_SET(sc,dev,1,1);
 	I2C_DEBUG(printf(">"));
+	I2C_DEBUG(printf("\n"));
 	return (0);
 }
 
@@ -413,6 +438,21 @@
 	return (0);
 }
 
+static int
+iicbb_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs)
+{
+	int error;
+
+	error = IICBB_PRE_XFER(device_get_parent(dev));
+	if (error)
+		return (error);
+
+	error = iicbus_transfer_gen(dev, msgs, nmsgs);
+
+	IICBB_POST_XFER(device_get_parent(dev));
+	return (error);
+}
+
 DRIVER_MODULE(iicbus, iicbb, iicbus_driver, iicbus_devclass, 0, 0);
 
 MODULE_DEPEND(iicbb, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER);

Modified: trunk/sys/dev/iicbus/iicbb_if.m
===================================================================
--- trunk/sys/dev/iicbus/iicbb_if.m	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicbb_if.m	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 #-
 # Copyright (c) 1998 Nicolas Souchu
 # All rights reserved.
@@ -23,7 +24,7 @@
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
-# $MidnightBSD$
+# $FreeBSD: stable/10/sys/dev/iicbus/iicbb_if.m 232365 2012-03-01 20:58:20Z kan $
 #
 
 #include <sys/bus.h>
@@ -31,6 +32,29 @@
 INTERFACE iicbb;
 
 #
+# Default implementation of optional methods
+#
+CODE {
+	static int
+	null_pre_xfer(device_t dev)
+	{
+		return 0;
+	}
+
+	static void
+	null_post_xfer(device_t dev)
+
+	{
+	}
+
+	static int
+	null_callback(device_t dev, int index, caddr_t data)
+	{
+		return 0;
+	}
+};
+
+#
 # iicbus callback
 #
 METHOD int callback {
@@ -37,9 +61,23 @@
 	device_t dev;
 	int index;
 	caddr_t data;
-};
+} DEFAULT null_callback;
 
 #
+# Prepare device for I2C transfer
+#
+METHOD int pre_xfer {
+	device_t dev;
+} DEFAULT null_pre_xfer;
+
+#
+# Cleanup device after I2C transfer
+#
+METHOD void post_xfer {
+	device_t dev;
+} DEFAULT null_post_xfer;
+
+#
 # Set I2C bus data line
 #
 METHOD void setsda {


Property changes on: trunk/sys/dev/iicbus/iicbb_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/dev/iicbus/iicbus.c
===================================================================
--- trunk/sys/dev/iicbus/iicbus.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicbus.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/iicbus.c 310071 2016-12-14 16:21:10Z avg $");
 
 /*
  * Autoconfiguration and support routines for the Philips serial I2C bus
@@ -38,7 +39,8 @@
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
-#include <sys/bus.h> 
+#include <sys/sysctl.h>
+#include <sys/bus.h>
 
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
@@ -59,7 +61,7 @@
 }
 
 #if SCAN_IICBUS
-static int 
+static int
 iic_probe_device(device_t dev, u_char addr)
 {
 	int count;
@@ -92,10 +94,17 @@
 	unsigned char addr;
 #endif
 	struct iicbus_softc *sc = IICBUS_SOFTC(dev);
+	int strict;
 
 	sc->dev = dev;
 	mtx_init(&sc->lock, "iicbus", NULL, MTX_DEF);
+	iicbus_init_frequency(dev, 0);
 	iicbus_reset(dev, IIC_FASTEST, 0, NULL);
+	if (resource_int_value(device_get_name(dev),
+		device_get_unit(dev), "strict", &strict) == 0)
+		sc->strict = strict;
+	else
+		sc->strict = 1;
 
 	/* device probing is meaningless since the bus is supposed to be
 	 * hot-plug. Moreover, some I2C chips do not appreciate random
@@ -118,7 +127,7 @@
 	bus_generic_attach(dev);
         return (0);
 }
-  
+
 static int
 iicbus_detach(device_t dev)
 {
@@ -126,10 +135,11 @@
 
 	iicbus_reset(dev, IIC_FASTEST, 0, NULL);
 	bus_generic_detach(dev);
+	device_delete_children(dev);
 	mtx_destroy(&sc->lock);
 	return (0);
 }
-  
+
 static int
 iicbus_print_child(device_t dev, device_t child)
 {
@@ -183,10 +193,30 @@
 	case IICBUS_IVAR_ADDR:
 		*result = devi->addr;
 		break;
+	case IICBUS_IVAR_NOSTOP:
+		*result = devi->nostop;
+		break;
 	}
 	return (0);
 }
 
+static int
+iicbus_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
+{
+	struct iicbus_ivar *devi = IICBUS_IVAR(child);
+
+	switch (which) {
+	default:
+		return (EINVAL);
+	case IICBUS_IVAR_ADDR:
+		return (EINVAL);
+	case IICBUS_IVAR_NOSTOP:
+		devi->nostop = value;
+		break;
+	}
+	return (0);
+}
+
 static device_t
 iicbus_add_child(device_t dev, u_int order, const char *name, int unit)
 {
@@ -237,6 +267,51 @@
 	return (IIC_ENOTSUPP);
 }
 
+void
+iicbus_init_frequency(device_t dev, u_int bus_freq)
+{
+	struct iicbus_softc *sc = IICBUS_SOFTC(dev);
+
+	/*
+	 * If a bus frequency value was passed in, use it.  Otherwise initialize
+	 * it first to the standard i2c 100KHz frequency, then override that
+	 * from a hint if one exists.
+	 */
+	if (bus_freq > 0)
+		sc->bus_freq = bus_freq;
+	else {
+		sc->bus_freq = 100000;
+		resource_int_value(device_get_name(dev), device_get_unit(dev),
+		    "frequency", (int *)&sc->bus_freq);
+	}
+	/*
+	 * Set up the sysctl that allows the bus frequency to be changed.
+	 * It is flagged as a tunable so that the user can set the value in
+	 * loader(8), and that will override any other setting from any source.
+	 * The sysctl tunable/value is the one most directly controlled by the
+	 * user and thus the one that always takes precedence.
+	 */
+	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "frequency", CTLFLAG_RW | CTLFLAG_TUN, &sc->bus_freq,
+	    sc->bus_freq, "Bus frequency in Hz");
+}
+
+static u_int
+iicbus_get_frequency(device_t dev, u_char speed)
+{
+	struct iicbus_softc *sc = IICBUS_SOFTC(dev);
+
+	/*
+	 * If the frequency has not been configured for the bus, or the request
+	 * is specifically for SLOW speed, use the standard 100KHz rate, else
+	 * use the configured bus speed.
+	 */
+	if (sc->bus_freq == 0 || speed == IIC_SLOW)
+		return (100000);
+	return (sc->bus_freq);
+}
+
 static device_method_t iicbus_methods[] = {
         /* device interface */
         DEVMETHOD(device_probe,         iicbus_probe),
@@ -248,6 +323,7 @@
         DEVMETHOD(bus_print_child,      iicbus_print_child),
 	DEVMETHOD(bus_probe_nomatch,	iicbus_probe_nomatch),
 	DEVMETHOD(bus_read_ivar,	iicbus_read_ivar),
+	DEVMETHOD(bus_write_ivar,	iicbus_write_ivar),
 	DEVMETHOD(bus_child_pnpinfo_str, iicbus_child_pnpinfo_str),
 	DEVMETHOD(bus_child_location_str, iicbus_child_location_str),
 	DEVMETHOD(bus_hinted_child,	iicbus_hinted_child),
@@ -254,6 +330,7 @@
 
 	/* iicbus interface */
 	DEVMETHOD(iicbus_transfer,	iicbus_transfer),
+	DEVMETHOD(iicbus_get_frequency,	iicbus_get_frequency),
 
 	DEVMETHOD_END
 };

Modified: trunk/sys/dev/iicbus/iicbus.h
===================================================================
--- trunk/sys/dev/iicbus/iicbus.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicbus.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Nicolas Souchu
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/iicbus/iicbus.h 276278 2014-12-27 02:37:52Z ian $
  *
  */
 #ifndef __IICBUS_H
@@ -41,16 +42,21 @@
 	device_t owner;		/* iicbus owner device structure */
 	u_char started;		/* address of the 'started' slave
 				 * 0 if no start condition succeeded */
+	u_char strict;		/* deny operations that violate the
+				 * I2C protocol */
 	struct mtx lock;
+	u_int bus_freq;		/* Configured bus Hz. */
 };
 
 struct iicbus_ivar
 {
 	uint32_t	addr;
+	bool		nostop;
 };
 
 enum {
-	IICBUS_IVAR_ADDR		/* Address or base address */
+	IICBUS_IVAR_ADDR,		/* Address or base address */
+	IICBUS_IVAR_NOSTOP,		/* nostop defaults */
 };
 
 #define IICBUS_ACCESSOR(A, B, T)					\
@@ -57,12 +63,14 @@
 	__BUS_ACCESSOR(iicbus, A, IICBUS, B, T)
 	
 IICBUS_ACCESSOR(addr,		ADDR,		uint32_t)
+IICBUS_ACCESSOR(nostop,		NOSTOP,		bool)
 
 #define	IICBUS_LOCK(sc)			mtx_lock(&(sc)->lock)
 #define	IICBUS_UNLOCK(sc)      		mtx_unlock(&(sc)->lock)
 #define	IICBUS_ASSERT_LOCKED(sc)       	mtx_assert(&(sc)->lock, MA_OWNED)
 
-extern int iicbus_generic_intr(device_t dev, int event, char *buf);
+int  iicbus_generic_intr(device_t dev, int event, char *buf);
+void iicbus_init_frequency(device_t dev, u_int bus_freq);
 
 extern driver_t iicbus_driver;
 extern devclass_t iicbus_devclass;

Modified: trunk/sys/dev/iicbus/iicbus_if.m
===================================================================
--- trunk/sys/dev/iicbus/iicbus_if.m	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicbus_if.m	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 #-
 # Copyright (c) 1998 Nicolas Souchu
 # All rights reserved.
@@ -23,7 +24,7 @@
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
-# $MidnightBSD$
+# $FreeBSD: stable/10/sys/dev/iicbus/iicbus_if.m 289666 2015-10-20 21:20:34Z ian $
 #
 
 #include <sys/bus.h>
@@ -31,6 +32,15 @@
 
 INTERFACE iicbus;
 
+CODE {
+	static u_int
+	iicbus_default_frequency(device_t bus, u_char speed)
+	{
+
+		return (100000);
+	}
+};
+
 #
 # Interpret interrupt
 #
@@ -42,6 +52,10 @@
 
 #
 # iicbus callback
+# Request ownership of bus
+# index: IIC_REQUEST_BUS or IIC_RELEASE_BUS
+# data: pointer to int containing IIC_WAIT or IIC_DONTWAIT and either IIC_INTR or IIC_NOINTR
+# This function is allowed to sleep if *data contains IIC_WAIT.
 #
 METHOD int callback {
 	device_t dev;
@@ -115,3 +129,14 @@
 	struct iic_msg *msgs;
 	uint32_t nmsgs;
 };
+
+#
+# Return the frequency in Hz for the bus running at the given 
+# symbolic speed.  Only the IIC_SLOW speed has meaning, it is always
+# 100KHz.  The UNKNOWN, FAST, and FASTEST rates all map to the
+# configured bus frequency, or 100KHz when not otherwise configured.
+#
+METHOD u_int get_frequency {
+	device_t dev;
+	u_char speed;
+} DEFAULT iicbus_default_frequency;


Property changes on: trunk/sys/dev/iicbus/iicbus_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/iicbus/iicoc.c
===================================================================
--- trunk/sys/dev/iicbus/iicoc.c	                        (rev 0)
+++ trunk/sys/dev/iicbus/iicoc.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,392 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2003-2012 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/iicoc.c 294490 2016-01-21 08:32:11Z dumbbell $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/bus.h>
+
+#include <dev/iicbus/iiconf.h>
+#include <dev/iicbus/iicbus.h>
+#include <dev/iicbus/iicoc.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "iicbus_if.h"
+
+static devclass_t iicoc_devclass;
+
+/*
+ * Device methods
+ */
+static int iicoc_probe(device_t);
+static int iicoc_attach(device_t);
+static int iicoc_detach(device_t);
+
+static int iicoc_start(device_t dev, u_char slave, int timeout);
+static int iicoc_stop(device_t dev);
+static int iicoc_read(device_t dev, char *buf,
+    int len, int *read, int last, int delay);
+static int iicoc_write(device_t dev, const char *buf, 
+    int len, int *sent, int timeout);
+static int iicoc_repeated_start(device_t dev, u_char slave, int timeout);
+
+struct iicoc_softc {
+	device_t 	dev;		/* Self */
+	u_int		reg_shift;	/* Chip specific */
+	u_int		clockfreq;
+	u_int		i2cfreq;
+	struct resource *mem_res;	/* Memory resource */
+	int		mem_rid;
+	int 		sc_started;
+	uint8_t		i2cdev_addr;
+	device_t	iicbus;
+	struct mtx	sc_mtx;
+};
+
+static void 
+iicoc_dev_write(device_t dev, int reg, int value)
+{
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus_write_1(sc->mem_res, reg<<sc->reg_shift, value);
+}
+
+static int 
+iicoc_dev_read(device_t dev, int reg)
+{
+	uint8_t val;
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	val = bus_read_1(sc->mem_res, reg<<sc->reg_shift);
+	return (val);
+}
+
+static int
+iicoc_wait_on_status(device_t dev, uint8_t bit)
+{
+	int tries = I2C_TIMEOUT;
+	uint8_t status;
+
+	do {
+		status = iicoc_dev_read(dev, OC_I2C_STATUS_REG);
+	} while ((status & bit) != 0 && --tries > 0);
+
+	return (tries == 0 ? -1: 0);
+}
+
+static int
+iicoc_rd_cmd(device_t dev, uint8_t cmd)
+{
+	uint8_t data;
+
+	iicoc_dev_write(dev, OC_I2C_CMD_REG, cmd);
+	if (iicoc_wait_on_status(dev, OC_STATUS_TIP) < 0) {
+		device_printf(dev, "read: Timeout waiting for TIP clear.\n");
+		return (-1);
+	}
+	data = iicoc_dev_read(dev, OC_I2C_DATA_REG); 
+	return (data);
+}
+
+static int
+iicoc_wr_cmd(device_t dev, uint8_t data, uint8_t cmd)
+{
+
+	iicoc_dev_write(dev, OC_I2C_DATA_REG, data);
+	iicoc_dev_write(dev, OC_I2C_CMD_REG, cmd);
+	if (iicoc_wait_on_status(dev, OC_STATUS_TIP) < 0) {
+		device_printf(dev, "write: Timeout waiting for TIP clear.\n");
+		return (-1);
+	}
+	return (0);
+}
+
+static int
+iicoc_wr_ack_cmd(device_t dev, uint8_t data, uint8_t cmd)
+{
+	if (iicoc_wr_cmd(dev, data, cmd) < 0) 
+		return (-1);	
+	
+	if (iicoc_dev_read(dev, OC_I2C_STATUS_REG) & OC_STATUS_NACK) {
+		device_printf(dev, "write: I2C command ACK Error.\n");
+		return (IIC_ENOACK);
+	}
+	return (0);
+}
+
+static int 
+iicoc_init(device_t dev)
+{
+	struct iicoc_softc *sc;
+	int value;
+
+	sc = device_get_softc(dev);
+	value = iicoc_dev_read(dev, OC_I2C_CTRL_REG);
+	iicoc_dev_write(dev, OC_I2C_CTRL_REG, 
+	    value & ~(OC_CONTROL_EN | OC_CONTROL_IEN));
+	value = (sc->clockfreq/(5 * sc->i2cfreq)) - 1;
+	iicoc_dev_write(dev, OC_I2C_PRESCALE_LO_REG, value & 0xff);
+	iicoc_dev_write(dev, OC_I2C_PRESCALE_HI_REG, value >> 8);
+	value = iicoc_dev_read(dev, OC_I2C_CTRL_REG);
+	iicoc_dev_write(dev, OC_I2C_CTRL_REG, value | OC_CONTROL_EN);
+
+	value = iicoc_dev_read(dev, OC_I2C_CTRL_REG);
+	/* return 0 on success, 1 on error */
+	return ((value & OC_CONTROL_EN) == 0);
+}
+
+static int
+iicoc_probe(device_t dev)
+{
+	struct iicoc_softc *sc;
+	
+	sc = device_get_softc(dev);
+	if ((pci_get_vendor(dev) == 0x184e) &&
+	    (pci_get_device(dev) == 0x1011)) {
+		sc->clockfreq = XLP_I2C_CLKFREQ;
+		sc->i2cfreq = XLP_I2C_FREQ;
+		sc->reg_shift = 2;
+		device_set_desc(dev, "Netlogic XLP I2C Controller");
+		return (BUS_PROBE_DEFAULT);
+	}
+	return (ENXIO);
+}
+
+
+/*
+ * We add all the devices which we know about.
+ * The generic attach routine will attach them if they are alive.
+ */
+static int
+iicoc_attach(device_t dev)
+{
+	int bus;
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	bus = device_get_unit(dev);
+
+	sc->dev = dev;
+	mtx_init(&sc->sc_mtx, "iicoc", "iicoc", MTX_DEF);
+	sc->mem_rid = 0;
+	sc->mem_res = bus_alloc_resource(dev,
+	    SYS_RES_MEMORY, &sc->mem_rid, 0ul, ~0ul, 0x100, RF_ACTIVE);
+
+	if (sc->mem_res == NULL) {
+		device_printf(dev, "Could not allocate bus resource.\n");
+		return (-1);
+	}
+	iicoc_init(dev);
+	sc->iicbus = device_add_child(dev, "iicbus", -1);
+	if (sc->iicbus == NULL) {
+		device_printf(dev, "Could not allocate iicbus instance.\n");
+		return (-1);
+	}
+	bus_generic_attach(dev);
+
+	return (0);
+}
+
+static int
+iicoc_detach(device_t dev)
+{
+	bus_generic_detach(dev);
+	device_delete_children(dev);
+
+	return (0);
+}
+
+static int 
+iicoc_start(device_t dev, u_char slave, int timeout)
+{
+	int error = IIC_EBUSERR;
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	mtx_lock(&sc->sc_mtx);
+	sc->i2cdev_addr = (slave >> 1);
+
+	/* Verify the bus is idle */
+	if (iicoc_wait_on_status(dev, OC_STATUS_BUSY) < 0)
+		goto i2c_stx_error;
+
+	/* Write Slave Address */
+	if (iicoc_wr_ack_cmd(dev, slave, OC_COMMAND_START)) {
+		device_printf(dev, 
+		    "I2C write slave address [0x%x] failed.\n", slave);
+		error = IIC_ENOACK;
+		goto i2c_stx_error;	
+	}
+	
+	/* Verify Arbitration is not Lost */
+	if (iicoc_dev_read(dev, OC_I2C_STATUS_REG) & OC_STATUS_AL) {
+		device_printf(dev, "I2C Bus Arbitration Lost, Aborting.\n");
+		error = IIC_EBUSERR;
+		goto i2c_stx_error;
+	}
+	error = IIC_NOERR;
+	mtx_unlock(&sc->sc_mtx);
+	return (error);
+i2c_stx_error:
+	iicoc_dev_write(dev, OC_I2C_CMD_REG, OC_COMMAND_STOP);
+	iicoc_wait_on_status(dev, OC_STATUS_BUSY);  /* wait for idle */
+	mtx_unlock(&sc->sc_mtx);
+	return (error);
+}
+
+static int 
+iicoc_stop(device_t dev)
+{
+	int error = 0;
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	mtx_lock(&sc->sc_mtx);
+	iicoc_dev_write(dev, OC_I2C_CMD_REG, OC_COMMAND_STOP);
+	iicoc_wait_on_status(dev, OC_STATUS_BUSY);  /* wait for idle */
+	mtx_unlock(&sc->sc_mtx);
+	return (error);
+
+}
+
+static int 
+iicoc_write(device_t dev, const char *buf, int len,
+    int *sent, int timeout /* us */ )
+{
+	uint8_t value;
+	int i;
+
+	value = buf[0];
+	/* Write Slave Offset */
+	if (iicoc_wr_ack_cmd(dev, value, OC_COMMAND_WRITE)) {
+		device_printf(dev, "I2C write slave offset failed.\n");
+		goto i2c_tx_error;	
+	}
+
+	for (i = 1; i < len; i++) {
+		/* Write data byte */
+		value = buf[i];
+		if (iicoc_wr_cmd(dev, value, OC_COMMAND_WRITE)) {
+			device_printf(dev, "I2C write data byte %d failed.\n",
+			    i);
+			goto i2c_tx_error;	
+		}
+	}
+	*sent = len;
+	return (IIC_NOERR);
+
+i2c_tx_error:
+	return (IIC_EBUSERR);
+}
+
+static int 
+iicoc_read(device_t dev, char *buf, int len, int *read, int last,
+    int delay)
+{
+	int data, i;
+	uint8_t cmd;
+
+	for (i = 0; i < len; i++) {
+		/* Read data byte */
+		cmd = (i == len - 1) ? OC_COMMAND_RDNACK : OC_COMMAND_READ;
+		data = iicoc_rd_cmd(dev, cmd);
+		if (data < 0) {
+			device_printf(dev, 
+			    "I2C read data byte %d failed.\n", i);
+			goto i2c_rx_error;
+		}
+		buf[i] = (uint8_t)data;
+	}
+	
+	*read = len;
+	return (IIC_NOERR);
+
+i2c_rx_error:	
+	return (IIC_EBUSERR);
+}
+
+static int
+iicoc_reset(device_t dev, u_char speed, u_char addr, u_char *oldadr)
+{
+	int error;
+	struct iicoc_softc *sc;
+
+	sc = device_get_softc(dev);
+	mtx_lock(&sc->sc_mtx);
+	error = iicoc_init(dev);
+	mtx_unlock(&sc->sc_mtx);
+	return (error);
+}
+
+static int
+iicoc_repeated_start(device_t dev, u_char slave, int timeout)
+{
+	return 0;
+}
+
+static device_method_t iicoc_methods[] = {
+	/* device interface */
+	DEVMETHOD(device_probe, iicoc_probe),
+	DEVMETHOD(device_attach, iicoc_attach),
+	DEVMETHOD(device_detach, iicoc_detach),
+
+	/* iicbus interface */
+	DEVMETHOD(iicbus_callback, iicbus_null_callback),
+	DEVMETHOD(iicbus_repeated_start, iicoc_repeated_start),
+	DEVMETHOD(iicbus_start, iicoc_start),
+	DEVMETHOD(iicbus_stop, iicoc_stop),
+	DEVMETHOD(iicbus_reset, iicoc_reset),	
+	DEVMETHOD(iicbus_write, iicoc_write),
+	DEVMETHOD(iicbus_read, iicoc_read),
+	DEVMETHOD(iicbus_transfer, iicbus_transfer_gen),
+
+	DEVMETHOD_END
+};
+
+static driver_t iicoc_driver = {
+	"iicoc",
+	iicoc_methods,
+	sizeof(struct iicoc_softc),
+};
+
+DRIVER_MODULE(iicoc, pci, iicoc_driver, iicoc_devclass, 0, 0);
+DRIVER_MODULE(iicbus, iicoc, iicbus_driver, iicbus_devclass, 0, 0);


Property changes on: trunk/sys/dev/iicbus/iicoc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/iicbus/iicoc.h
===================================================================
--- trunk/sys/dev/iicbus/iicoc.h	                        (rev 0)
+++ trunk/sys/dev/iicbus/iicoc.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,79 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2003-2012 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/iicbus/iicoc.h 233539 2012-03-27 10:44:32Z jchandra $
+ */
+
+#ifndef __OPENCORE_I2C_H__
+#define __OPENCORE_I2C_H__
+
+/* I2C specific registers */
+#define OC_I2C_PRESCALE_LO_REG		0x00
+#define OC_I2C_PRESCALE_HI_REG		0x01
+#define OC_I2C_CTRL_REG			0x02
+#define OC_I2C_TRANSMIT_REG		0x03  /* tx and rx - same reg */
+#define OC_I2C_RECV_REG			0x03  /* tx and rx - same reg */
+#define OC_I2C_DATA_REG			0x03  /* tx and rx - same reg */
+#define OC_I2C_CMD_REG			0x04  /* cmd and status - same reg */
+#define OC_I2C_STATUS_REG		0x04  /* cmd and status - same reg */
+
+#define XLP_I2C_CLKFREQ			133333333 /* XLP 133 MHz IO clock */
+#define XLP_I2C_FREQ			100000	/* default 100kHz */
+#define I2C_TIMEOUT			500000
+
+/*
+ * These defines pertain to the OpenCores
+ * I2C Master Host Controller used in XLP
+ */
+
+#define OC_PRESCALER_LO			0
+#define OC_PRESCALER_HI			1
+
+#define OC_CONTROL			2
+#define OC_CONTROL_EN			0x80
+#define OC_CONTROL_IEN			0x40
+
+#define OC_DATA				3	/* Data TX & RX Reg */
+
+#define OC_COMMAND			4
+#define OC_COMMAND_START		0x90
+#define OC_COMMAND_STOP			0x40
+#define OC_COMMAND_READ			0x20
+#define OC_COMMAND_WRITE		0x10
+#define OC_COMMAND_RDACK		0x20
+#define OC_COMMAND_RDNACK		0x28
+#define OC_COMMAND_IACK			0x01	/* Not used */
+
+#define OC_STATUS			4	/* Same as 'command' */
+#define OC_STATUS_NACK			0x80	/* Did not get an ACK */
+#define OC_STATUS_BUSY			0x40
+#define OC_STATUS_AL			0x20	/* Arbitration Lost */
+#define OC_STATUS_TIP			0x02	/* Transfer in Progress  */
+#define OC_STATUS_IF			0x01	/* Intr. Pending Flag */
+
+#endif


Property changes on: trunk/sys/dev/iicbus/iicoc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/iicbus/iiconf.c
===================================================================
--- trunk/sys/dev/iicbus/iiconf.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iiconf.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Nicolas Souchu
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/iiconf.c 294673 2016-01-24 18:54:11Z ian $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -40,6 +41,28 @@
 #include "iicbus_if.h"
 
 /*
+ * Translate IIC_Exxxxx status values to vaguely-equivelent errno values.
+ */
+int
+iic2errno(int iic_status)
+{
+	switch (iic_status) {
+	case IIC_NOERR:         return (0);
+	case IIC_EBUSERR:       return (EALREADY);
+	case IIC_ENOACK:        return (EIO);
+	case IIC_ETIMEOUT:      return (ETIMEDOUT);
+	case IIC_EBUSBSY:       return (EWOULDBLOCK);
+	case IIC_ESTATUS:       return (EPROTO);
+	case IIC_EUNDERFLOW:    return (EIO);
+	case IIC_EOVERFLOW:     return (EOVERFLOW);
+	case IIC_ENOTSUPP:      return (EOPNOTSUPP);
+	case IIC_ENOADDR:       return (EADDRNOTAVAIL);
+	case IIC_ERESOURCE:     return (ENOMEM);
+	default:                return (EIO);
+	}
+}
+
+/*
  * iicbus_intr()
  */
 void
@@ -70,8 +93,7 @@
 		break;
 
 	default:
-		return (EWOULDBLOCK);
-		break;
+		return (IIC_EBUSBSY);
 	}
 
 	return (error);
@@ -90,31 +112,32 @@
 	struct iicbus_softc *sc = (struct iicbus_softc *)device_get_softc(bus);
 	int error = 0;
 
-	/* first, ask the underlying layers if the request is ok */
 	IICBUS_LOCK(sc);
-	do {
-		error = IICBUS_CALLBACK(device_get_parent(bus),
-						IIC_REQUEST_BUS, (caddr_t)&how);
-		if (error)
-			error = iicbus_poll(sc, how);
-	} while (error == EWOULDBLOCK);
 
-	while (!error) {
-		if (sc->owner && sc->owner != dev) {
+	while ((error == 0) && (sc->owner != NULL))
+		error = iicbus_poll(sc, how);
 
-			error = iicbus_poll(sc, how);
-		} else {
-			sc->owner = dev;
+	if (error == 0) {
+		sc->owner = dev;
+		/* 
+		 * Drop the lock around the call to the bus driver. 
+		 * This call should be allowed to sleep in the IIC_WAIT case.
+		 * Drivers might also need to grab locks that would cause LOR
+		 * if our lock is held.
+		 */
+		IICBUS_UNLOCK(sc);
+		/* Ask the underlying layers if the request is ok */
+		error = IICBUS_CALLBACK(device_get_parent(bus),
+		    IIC_REQUEST_BUS, (caddr_t)&how);
+		IICBUS_LOCK(sc);
 
-			IICBUS_UNLOCK(sc);
-			return (0);
+		if (error != 0) {
+			sc->owner = NULL;
+			wakeup_one(sc);
 		}
+	}
 
-		/* free any allocated resource */
-		if (error)
-			IICBUS_CALLBACK(device_get_parent(bus), IIC_RELEASE_BUS,
-					(caddr_t)&how);
-	}
+
 	IICBUS_UNLOCK(sc);
 
 	return (error);
@@ -131,26 +154,33 @@
 	struct iicbus_softc *sc = (struct iicbus_softc *)device_get_softc(bus);
 	int error;
 
-	/* first, ask the underlying layers if the release is ok */
-	error = IICBUS_CALLBACK(device_get_parent(bus), IIC_RELEASE_BUS, NULL);
-
-	if (error)
-		return (error);
-
 	IICBUS_LOCK(sc);
 
 	if (sc->owner != dev) {
 		IICBUS_UNLOCK(sc);
-		return (EACCES);
+		return (IIC_EBUSBSY);
 	}
 
-	sc->owner = NULL;
-
-	/* wakeup waiting processes */
-	wakeup(sc);
+	/* 
+	 * Drop the lock around the call to the bus driver. 
+	 * This call should be allowed to sleep in the IIC_WAIT case.
+	 * Drivers might also need to grab locks that would cause LOR
+	 * if our lock is held.
+	 */
 	IICBUS_UNLOCK(sc);
+	/* Ask the underlying layers if the release is ok */
+	error = IICBUS_CALLBACK(device_get_parent(bus), IIC_RELEASE_BUS, NULL);
 
-	return (0);
+	if (error == 0) {
+		IICBUS_LOCK(sc);
+		sc->owner = NULL;
+
+		/* wakeup a waiting thread */
+		wakeup_one(sc);
+		IICBUS_UNLOCK(sc);
+	}
+
+	return (error);
 }
 
 /*
@@ -178,7 +208,7 @@
 	int error = 0;
 
 	if (sc->started)
-		return (EINVAL);		/* bus already started */
+		return (IIC_ESTATUS); /* protocol error, bus already started */
 
 	if (!(error = IICBUS_START(device_get_parent(bus), slave, timeout)))
 		sc->started = slave;
@@ -200,7 +230,7 @@
 	int error = 0;
 
 	if (!sc->started)
-		return (EINVAL);     /* bus should have been already started */
+		return (IIC_ESTATUS); /* protocol error, bus not started */
 
 	if (!(error = IICBUS_REPEATED_START(device_get_parent(bus), slave, timeout)))
 		sc->started = slave;
@@ -222,7 +252,7 @@
 	int error = 0;
 
 	if (!sc->started)
-		return (EINVAL);		/* bus not started */
+		return (IIC_ESTATUS); /* protocol error, bus not started */
 
 	error = IICBUS_STOP(device_get_parent(bus));
 
@@ -243,9 +273,9 @@
 {
 	struct iicbus_softc *sc = (struct iicbus_softc *)device_get_softc(bus);
 	
-	/* a slave must have been started with the appropriate address */
-	if (!sc->started || (sc->started & LSB))
-		return (EINVAL);
+	/* a slave must have been started for writing */
+	if (sc->started == 0 || (sc->strict != 0 && (sc->started & LSB) != 0))
+		return (IIC_ESTATUS);
 
 	return (IICBUS_WRITE(device_get_parent(bus), buf, len, sent, timeout));
 }
@@ -261,9 +291,9 @@
 {
 	struct iicbus_softc *sc = (struct iicbus_softc *)device_get_softc(bus);
 	
-	/* a slave must have been started with the appropriate address */
-	if (!sc->started || !(sc->started & LSB))
-		return (EINVAL);
+	/* a slave must have been started for reading */
+	if (sc->started == 0 || (sc->strict != 0 && (sc->started & LSB) == 0))
+		return (IIC_ESTATUS);
 
 	return (IICBUS_READ(device_get_parent(bus), buf, len, read, last, delay));
 }
@@ -276,9 +306,14 @@
 int
 iicbus_write_byte(device_t bus, char byte, int timeout)
 {
+	struct iicbus_softc *sc = device_get_softc(bus);
 	char data = byte;
 	int sent;
 
+	/* a slave must have been started for writing */
+	if (sc->started == 0 || (sc->strict != 0 && (sc->started & LSB) != 0))
+		return (IIC_ESTATUS);
+
 	return (iicbus_write(bus, &data, 1, &sent, timeout));
 }
 
@@ -290,8 +325,13 @@
 int
 iicbus_read_byte(device_t bus, char *byte, int timeout)
 {
+	struct iicbus_softc *sc = device_get_softc(bus);
 	int read;
 
+	/* a slave must have been started for reading */
+	if (sc->started == 0 || (sc->strict != 0 && (sc->started & LSB) == 0))
+		return (IIC_ESTATUS);
+
 	return (iicbus_read(bus, byte, 1, &read, IIC_LAST_READ, timeout));
 }
 
@@ -352,9 +392,25 @@
 int
 iicbus_transfer(device_t bus, struct iic_msg *msgs, uint32_t nmsgs)
 {
+
 	return (IICBUS_TRANSFER(device_get_parent(bus), msgs, nmsgs));
 }
 
+int
+iicbus_transfer_excl(device_t dev, struct iic_msg *msgs, uint32_t nmsgs,
+    int how)
+{
+	device_t bus;
+	int error;
+
+	bus = device_get_parent(dev);
+	error = iicbus_request_bus(bus, dev, how);
+	if (error == 0)
+		error = IICBUS_TRANSFER(bus, msgs, nmsgs);
+	iicbus_release_bus(bus, dev);
+	return (error);
+}
+
 /*
  * Generic version of iicbus_transfer that calls the appropriate
  * routines to accomplish this.  See note above about acceptable
@@ -365,16 +421,18 @@
 {
 	int i, error, lenread, lenwrote, nkid, rpstart, addr;
 	device_t *children, bus;
+	bool nostop;
 
 	if ((error = device_get_children(dev, &children, &nkid)) != 0)
-		return (error);
+		return (IIC_ERESOURCE);
 	if (nkid != 1) {
 		free(children, M_TEMP);
-		return (EIO);
+		return (IIC_ENOTSUPP);
 	}
 	bus = children[0];
 	rpstart = 0;
 	free(children, M_TEMP);
+	nostop = iicbus_get_nostop(dev);
 	for (i = 0, error = 0; i < nmsgs && error == 0; i++) {
 		addr = msgs[i].slave;
 		if (msgs[i].flags & IIC_M_RD)
@@ -388,8 +446,7 @@
 			else
 				error = iicbus_start(bus, addr, 0);
 		}
-
-		if (error)
+		if (error != 0)
 			break;
 
 		if (msgs[i].flags & IIC_M_RD)
@@ -398,13 +455,18 @@
 		else
 			error = iicbus_write(bus, msgs[i].buf, msgs[i].len,
 			    &lenwrote, 0);
+		if (error != 0)
+			break;
 
-		if (!(msgs[i].flags & IIC_M_NOSTOP)) {
+		if ((msgs[i].flags & IIC_M_NOSTOP) != 0 ||
+		    (nostop && i + 1 < nmsgs)) {
+			rpstart = 1;	/* Next message gets repeated start */
+		} else {
 			rpstart = 0;
 			iicbus_stop(bus);
-		} else {
-			rpstart = 1;	/* Next message gets repeated start */
 		}
 	}
+	if (error != 0 && !nostop)
+		iicbus_stop(bus);
 	return (error);
 }

Modified: trunk/sys/dev/iicbus/iiconf.h
===================================================================
--- trunk/sys/dev/iicbus/iiconf.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iiconf.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/iicbus/iiconf.h 294673 2016-01-24 18:54:11Z ian $
  */
 #ifndef __IICONF_H
 #define __IICONF_H
@@ -43,6 +44,7 @@
 #define IIC_NOINTR	0
 #define IIC_WAIT	0x1
 #define IIC_INTR	0x2
+#define IIC_INTRWAIT	(IIC_INTR | IIC_WAIT)
 
 /*
  * i2c modes
@@ -82,16 +84,22 @@
  * adapter layer errors
  */
 #define IIC_NOERR	0x0	/* no error occured */
-#define IIC_EBUSERR	0x1	/* bus error */
+#define IIC_EBUSERR	0x1	/* bus error (hardware not in expected state) */
 #define IIC_ENOACK	0x2	/* ack not received until timeout */
 #define IIC_ETIMEOUT	0x3	/* timeout */
-#define IIC_EBUSBSY	0x4	/* bus busy */
+#define IIC_EBUSBSY	0x4	/* bus busy (reserved by another client) */
 #define IIC_ESTATUS	0x5	/* status error */
 #define IIC_EUNDERFLOW	0x6	/* slave ready for more data */
 #define IIC_EOVERFLOW	0x7	/* too much data */
 #define IIC_ENOTSUPP	0x8	/* request not supported */
 #define IIC_ENOADDR	0x9	/* no address assigned to the interface */
+#define IIC_ERESOURCE	0xa	/* resources (memory, whatever) unavailable */
 
+/*
+ * Note that all iicbus functions return IIC_Exxxxx status values,
+ * except iic2errno() (obviously) and iicbus_started() (returns bool).
+ */
+extern int iic2errno(int);
 extern int iicbus_request_bus(device_t, device_t, int);
 extern int iicbus_release_bus(device_t, device_t);
 extern device_t iicbus_alloc_bus(device_t);
@@ -122,6 +130,8 @@
 
 /* vectors of iic operations to pass to bridge */
 int iicbus_transfer(device_t bus, struct iic_msg *msgs, uint32_t nmsgs);
+int iicbus_transfer_excl(device_t bus, struct iic_msg *msgs, uint32_t nmsgs,
+    int how);
 int iicbus_transfer_gen(device_t bus, struct iic_msg *msgs, uint32_t nmsgs);
 
 #define IICBUS_MODVER	1

Modified: trunk/sys/dev/iicbus/iicsmb.c
===================================================================
--- trunk/sys/dev/iicbus/iicsmb.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/iicsmb.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/iicsmb.c 310521 2016-12-24 14:48:28Z avg $");
 
 /*
  * I2C to SMB bridge
@@ -57,6 +58,7 @@
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
 
+#include <dev/smbus/smb.h>
 #include <dev/smbus/smbconf.h>
 
 #include "iicbus_if.h"
@@ -131,8 +133,6 @@
 	sizeof(struct iicsmb_softc),
 };
 
-#define IICBUS_TIMEOUT	100	/* us */
-
 static void
 iicsmb_identify(driver_t *driver, device_t parent)
 {
@@ -167,11 +167,9 @@
 iicsmb_detach(device_t dev)
 {
 	struct iicsmb_softc *sc = (struct iicsmb_softc *)device_get_softc(dev);
-	
+
 	bus_generic_detach(dev);
-	if (sc->smbus) {
-		device_delete_child(dev, sc->smbus);
-	}
+	device_delete_children(dev);
 	mtx_destroy(&sc->lock);
 
 	return (0);
@@ -278,237 +276,202 @@
 }
 
 static int
+iic2smb_error(int error)
+{
+	switch (error) {
+	case IIC_NOERR:
+		return (SMB_ENOERR);
+	case IIC_EBUSERR:
+		return (SMB_EBUSERR);
+	case IIC_ENOACK:
+		return (SMB_ENOACK);
+	case IIC_ETIMEOUT:
+		return (SMB_ETIMEOUT);
+	case IIC_EBUSBSY:
+		return (SMB_EBUSY);
+	case IIC_ESTATUS:
+		return (SMB_EBUSERR);
+	case IIC_EUNDERFLOW:
+		return (SMB_EBUSERR);
+	case IIC_EOVERFLOW:
+		return (SMB_EBUSERR);
+	case IIC_ENOTSUPP:
+		return (SMB_ENOTSUPP);
+	case IIC_ENOADDR:
+		return (SMB_EBUSERR);
+	case IIC_ERESOURCE:
+		return (SMB_EBUSERR);
+	default:
+		return (SMB_EBUSERR);
+	}
+}
+
+#define	TRANSFER_MSGS(dev, msgs)	iicbus_transfer(dev, msgs, nitems(msgs))
+
+static int
 iicsmb_quick(device_t dev, u_char slave, int how)
 {
-	device_t parent = device_get_parent(dev);
+	struct iic_msg msgs[] = {
+	     { slave, how == SMB_QWRITE ? IIC_M_WR : IIC_M_RD, 0, NULL },
+	};
 	int error;
 
 	switch (how) {
 	case SMB_QWRITE:
-		error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT);
-		break;
-
 	case SMB_QREAD:
-		error = iicbus_start(parent, slave | LSB, IICBUS_TIMEOUT);
 		break;
-
 	default:
-		error = EINVAL;
-		break;
+		return (SMB_EINVAL);
 	}
 
-	if (!error)
-		error = iicbus_stop(parent);
-		
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_sendb(device_t dev, u_char slave, char byte)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent;
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR, 1, &byte },
+	};
+	int error;
 
-	error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT);
-
-	if (!error) {
-		error = iicbus_write(parent, &byte, 1, &sent, IICBUS_TIMEOUT);
-
-		iicbus_stop(parent);
-	}
-
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_recvb(device_t dev, u_char slave, char *byte)
 {
-	device_t parent = device_get_parent(dev);
-	int error, read;
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_RD, 1, byte },
+	};
+	int error;
 
-	error = iicbus_start(parent, slave | LSB, 0);
-
-	if (!error) {
-		error = iicbus_read(parent, byte, 1, &read, IIC_LAST_READ, IICBUS_TIMEOUT);
-
-		iicbus_stop(parent);
-	}
-
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_writeb(device_t dev, u_char slave, char cmd, char byte)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent;
+	uint8_t bytes[] = { cmd, byte };
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR, nitems(bytes), bytes },
+	};
+	int error;
 
-	error = iicbus_start(parent, slave & ~LSB, 0);
-
-	if (!error) {
-		if (!(error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-			error = iicbus_write(parent, &byte, 1, &sent, IICBUS_TIMEOUT);
-
-		iicbus_stop(parent);
-	}
-
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_writew(device_t dev, u_char slave, char cmd, short word)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent;
+	uint8_t bytes[] = { cmd, word & 0xff, word >> 8 };
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR, nitems(bytes), bytes },
+	};
+	int error;
 
-	char low = (char)(word & 0xff);
-	char high = (char)((word & 0xff00) >> 8);
-
-	error = iicbus_start(parent, slave & ~LSB, 0);
-
-	if (!error) {
-		if (!(error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		  if (!(error = iicbus_write(parent, &low, 1, &sent, IICBUS_TIMEOUT)))
-		    error = iicbus_write(parent, &high, 1, &sent, IICBUS_TIMEOUT);
-
-		iicbus_stop(parent);
-	}
-
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_readb(device_t dev, u_char slave, char cmd, char *byte)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent, read;
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR | IIC_M_NOSTOP, 1, &cmd },
+	     { slave, IIC_M_RD, 1, byte },
+	};
+	int error;
 
-	if ((error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT)))
-		return (error);
-
-	if ((error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_repeated_start(parent, slave | LSB, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_read(parent, byte, 1, &read, IIC_LAST_READ, IICBUS_TIMEOUT)))
-		goto error;
-
-error:
-	iicbus_stop(parent);
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
-#define BUF2SHORT(low,high) \
-	((short)(((high) & 0xff) << 8) | (short)((low) & 0xff))
-
 static int
 iicsmb_readw(device_t dev, u_char slave, char cmd, short *word)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent, read;
-	char buf[2];
+	uint8_t buf[2];
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR | IIC_M_NOSTOP, 1, &cmd },
+	     { slave, IIC_M_RD, nitems(buf), buf },
+	};
+	int error;
 
-	if ((error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT)))
-		return (error);
-
-	if ((error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_repeated_start(parent, slave | LSB, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_read(parent, buf, 2, &read, IIC_LAST_READ, IICBUS_TIMEOUT)))
-		goto error;
-
-	/* first, receive low, then high byte */
-	*word = BUF2SHORT(buf[0], buf[1]);
-
-error:
-	iicbus_stop(parent);
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	if (error == 0)
+		*word = ((uint16_t)buf[1] << 8) | buf[0];
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_pcall(device_t dev, u_char slave, char cmd, short sdata, short *rdata)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent, read;
-	char buf[2];
+	uint8_t in[3] = { cmd, sdata & 0xff, sdata >> 8 };
+	uint8_t out[2];
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR | IIC_M_NOSTOP, nitems(in), in },
+	     { slave, IIC_M_RD, nitems(out), out },
+	};
+	int error;
 
-	if ((error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT)))
-		return (error);
-
-	if ((error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	/* first, send low, then high byte */
-	buf[0] = (char)(sdata & 0xff);
-	buf[1] = (char)((sdata & 0xff00) >> 8);
-
-	if ((error = iicbus_write(parent, buf, 2, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_repeated_start(parent, slave | LSB, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_read(parent, buf, 2, &read, IIC_LAST_READ, IICBUS_TIMEOUT)))
-		goto error;
-
-	/* first, receive low, then high byte */
-	*rdata = BUF2SHORT(buf[0], buf[1]);
-
-error:
-	iicbus_stop(parent);
-	return (error);
+	error = TRANSFER_MSGS(dev, msgs);
+	if (error == 0)
+		*rdata = ((uint16_t)out[1] << 8) | out[0];
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_bwrite(device_t dev, u_char slave, char cmd, u_char count, char *buf)
 {
-	device_t parent = device_get_parent(dev);
-	int error, sent;
+	uint8_t bytes[2] = { cmd, count };
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR | IIC_M_NOSTOP, nitems(bytes), bytes },
+	     { slave, IIC_M_WR | IIC_M_NOSTART, count, buf },
+	};
+	int error;
 
-	if ((error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_write(parent, buf, (int)count, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_stop(parent)))
-		goto error;
-
-error:
-	return (error);
+	if (count > SMB_MAXBLOCKSIZE || count == 0)
+		return (SMB_EINVAL);
+	error = TRANSFER_MSGS(dev, msgs);
+	return (iic2smb_error(error));
 }
 
 static int
 iicsmb_bread(device_t dev, u_char slave, char cmd, u_char *count, char *buf)
 {
+	struct iic_msg msgs[] = {
+	     { slave, IIC_M_WR | IIC_M_NOSTOP, 1, &cmd },
+	     { slave, IIC_M_RD | IIC_M_NOSTOP, 1, count },
+	};
+	struct iic_msg block_msg[] = {
+	     { slave, IIC_M_RD | IIC_M_NOSTART, 0, buf },
+	};
 	device_t parent = device_get_parent(dev);
-	int error, sent, read;
+	int error;
 
-	if ((error = iicbus_start(parent, slave & ~LSB, IICBUS_TIMEOUT)))
-		return (error);
-
-	if ((error = iicbus_write(parent, &cmd, 1, &sent, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_repeated_start(parent, slave | LSB, IICBUS_TIMEOUT)))
-		goto error;
-
-	if ((error = iicbus_read(parent, buf, (int)*count, &read,
-						IIC_LAST_READ, IICBUS_TIMEOUT)))
-		goto error;
-	*count = read;
-
-error:
-	iicbus_stop(parent);
-	return (error);
+	/* Have to do this because the command is split in two transfers. */
+	error = iicbus_request_bus(parent, dev, IIC_WAIT);
+	if (error == 0)
+		error = TRANSFER_MSGS(dev, msgs);
+	if (error == 0) {
+		/*
+		 * If the slave offers an empty or a too long reply,
+		 * read one byte to generate the stop or abort.
+		 */
+		if (*count > SMB_MAXBLOCKSIZE || *count == 0)
+			block_msg[0].len = 1;
+		else
+			block_msg[0].len = *count;
+		error = TRANSFER_MSGS(dev, block_msg);
+		if (*count > SMB_MAXBLOCKSIZE || *count == 0)
+			error = SMB_EINVAL;
+	}
+	(void)iicbus_release_bus(parent, dev);
+	return (iic2smb_error(error));
 }
 
 DRIVER_MODULE(iicsmb, iicbus, iicsmb_driver, iicsmb_devclass, 0, 0);

Modified: trunk/sys/dev/iicbus/max6690.c
===================================================================
--- trunk/sys/dev/iicbus/max6690.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/max6690.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Andreas Tobler
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/max6690.c 239398 2012-08-19 19:32:38Z andreast $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -251,7 +252,7 @@
 	struct max6690_softc *sc;
 	struct sysctl_oid *oid, *sensroot_oid;
 	struct sysctl_ctx_list *ctx;
-	char sysctl_name[32];
+	char sysctl_desc[40], sysctl_name[32];
 	int i, j;
 
 	device_t dev = (device_t)xdev;
@@ -293,6 +294,8 @@
 		}
 		sysctl_name[j] = 0;
 
+		sprintf(sysctl_desc,"%s %s", sc->sc_sensors[i].therm.name,
+			"(C)");
 		oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sensroot_oid),
 				      OID_AUTO,
 				      sysctl_name, CTLFLAG_RD, 0,
@@ -300,8 +303,7 @@
 		/* I use i to pass the sensor id. */
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "temp",
 				CTLTYPE_INT | CTLFLAG_RD, dev, i % 2,
-				max6690_sensor_sysctl, "IK",
-				"Sensor Temp in °C");
+				max6690_sensor_sysctl, "IK", sysctl_desc);
 
 	}
 	/* Dump sensor location & ID. */

Modified: trunk/sys/dev/iicbus/pcf8563.c
===================================================================
--- trunk/sys/dev/iicbus/pcf8563.c	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/pcf8563.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2012 Marius Strobl <marius at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/pcf8563.c 241679 2012-10-18 10:29:16Z marius $");
 
 /*
  * Driver for NXP PCF8563 real-time clock/calendar

Modified: trunk/sys/dev/iicbus/pcf8563reg.h
===================================================================
--- trunk/sys/dev/iicbus/pcf8563reg.h	2018-05-27 23:44:07 UTC (rev 10103)
+++ trunk/sys/dev/iicbus/pcf8563reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*	$NetBSD: pcf8563reg.h,v 1.1 2011/01/21 19:11:47 jakllsch Exp $	*/
 
 /*-
@@ -5,7 +6,7 @@
  *
  * This file is in the public domain.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/iicbus/pcf8563reg.h 234248 2012-04-13 23:07:32Z marius $
  */
 
 /*

Added: trunk/sys/dev/iicbus/s35390a.c
===================================================================
--- trunk/sys/dev/iicbus/s35390a.c	                        (rev 0)
+++ trunk/sys/dev/iicbus/s35390a.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,330 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Yusuke Tanaka
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2011 Frank Wille.
+ * All rights reserved.
+ *
+ * Written by Frank Wille for The NetBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/iicbus/s35390a.c 241048 2012-09-29 16:15:27Z kevlo $");
+
+/*
+ * Driver for Seiko Instruments S-35390A Real-time Clock
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/clock.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+
+#include <dev/iicbus/iicbus.h>
+#include <dev/iicbus/iiconf.h>
+
+#include "clock_if.h"
+#include "iicbus_if.h"
+
+#define S390_DEVNAME		"s35390a_rtc"
+#define S390_DEVCODE		0x6	/* 0110 */
+/*
+ * S-35390A uses 4-bit device code + 3-bit command in the slave address
+ * field.  The possible combination is 0x60-0x6f including the R/W bit.
+ * 0x60 means an write access to status register 1.
+ */
+#define S390_ADDR		(S390_DEVCODE << 4)
+
+/* Registers are encoded into the slave address */
+#define S390_STATUS1		(0 << 1)
+#define S390_STATUS2		(1 << 1)
+#define S390_REALTIME1		(2 << 1)
+#define S390_REALTIME2		(3 << 1)
+#define S390_INT1_1		(4 << 1)
+#define S390_INT1_2		(5 << 1)
+#define S390_CLOCKADJ		(6 << 1)
+#define S390_FREE		(7 << 1)
+
+/* Status1 bits */
+#define S390_ST1_POC		(1 << 7)
+#define S390_ST1_BLD		(1 << 6)
+#define S390_ST1_24H		(1 << 1)
+#define S390_ST1_RESET		(1 << 0)
+
+/* Status2 bits */
+#define S390_ST2_TEST		(1 << 7)
+
+/* Realtime1 data bytes */
+#define S390_RT1_NBYTES		7
+#define S390_RT1_YEAR		0
+#define S390_RT1_MONTH		1
+#define S390_RT1_DAY		2
+#define S390_RT1_WDAY		3
+#define S390_RT1_HOUR		4
+#define S390_RT1_MINUTE		5
+#define S390_RT1_SECOND		6
+
+struct s390rtc_softc {
+	device_t	sc_dev;
+	uint16_t	sc_addr;
+};
+
+/*
+ * S-35390A interprets bits in each byte on SDA in reverse order.
+ * bitreverse() reverses the bits in uint8_t.
+ */
+static const uint8_t nibbletab[] = {
+	/* 0x0 0000 -> 0000 */	0x0,
+	/* 0x1 0001 -> 1000 */	0x8,
+	/* 0x2 0010 -> 0100 */	0x4,
+	/* 0x3 0011 -> 1100 */	0xc,
+	/* 0x4 0100 -> 0010 */	0x2,
+	/* 0x5 0101 -> 1010 */	0xa,
+	/* 0x6 0110 -> 0110 */	0x6,
+	/* 0x7 0111 -> 1110 */	0xe,
+	/* 0x8 1000 -> 0001 */	0x1,
+	/* 0x9 1001 -> 1001 */	0x9,
+	/* 0xa 1010 -> 0101 */	0x5,
+	/* 0xb 1011 -> 1101 */	0xd,
+	/* 0xc 1100 -> 0011 */	0x3,
+	/* 0xd 1101 -> 1011 */	0xb,
+	/* 0xe 1110 -> 0111 */	0x7,
+	/* 0xf 1111 -> 1111 */	0xf, };
+
+static uint8_t
+bitreverse(uint8_t x)
+{
+
+	return (nibbletab[x & 0xf] << 4) | nibbletab[x >> 4];
+}
+
+static int
+s390rtc_read(device_t dev, uint8_t reg, uint8_t *buf, size_t len)
+{
+	struct s390rtc_softc *sc = device_get_softc(dev);
+	struct iic_msg msg[] = {
+		{
+			.slave = sc->sc_addr | reg,
+			.flags = IIC_M_RD,
+			.len = len,
+			.buf = buf,
+		},
+	};
+	int i;
+	int error;
+
+	error = iicbus_transfer(dev, msg, 1);
+	if (error)
+		return (error);
+
+	/* this chip returns each byte in reverse order */
+	for (i = 0; i < len; ++i)
+		buf[i] = bitreverse(buf[i]);
+
+	return (0);
+}
+
+static int
+s390rtc_write(device_t dev, uint8_t reg, uint8_t *buf, size_t len)
+{
+	struct s390rtc_softc *sc = device_get_softc(dev);
+	struct iic_msg msg[] = {
+		{
+			.slave = sc->sc_addr | reg,
+			.flags = IIC_M_WR,
+			.len = len,
+			.buf = buf,
+		},
+	};
+	int i;
+
+	/* this chip expects each byte in reverse order */
+	for (i = 0; i < len; ++i)
+		buf[i] = bitreverse(buf[i]);
+
+	return (iicbus_transfer(dev, msg, 1));
+}
+
+static int
+s390rtc_probe(device_t dev)
+{
+
+	if (iicbus_get_addr(dev) != S390_ADDR) {
+		if (bootverbose)
+			device_printf(dev, "slave address mismatch. "
+			    "(%02x != %02x)\n", iicbus_get_addr(dev),
+			    S390_ADDR);
+		return (ENXIO);
+	}
+	device_set_desc(dev, "Seiko Instruments S-35390A Real-time Clock");
+
+	return (BUS_PROBE_SPECIFIC);
+}
+
+static int
+s390rtc_attach(device_t dev)
+{
+	struct s390rtc_softc *sc;
+	uint8_t reg;
+	int error;
+
+	sc = device_get_softc(dev);
+	sc->sc_dev = dev;
+	sc->sc_addr = iicbus_get_addr(dev);
+
+	/* Reset the chip and turn on 24h mode, after power-off or battery. */
+	error = s390rtc_read(dev, S390_STATUS1, &reg, 1);
+	if (error) {
+		device_printf(dev, "%s: cannot read status1 register\n",
+		     __func__);
+		return (error);
+	}
+	if (reg & (S390_ST1_POC | S390_ST1_BLD)) {
+		reg |= S390_ST1_24H | S390_ST1_RESET;
+		error = s390rtc_write(dev, S390_STATUS1, &reg, 1);
+		if (error) {
+			device_printf(dev, "%s: cannot initialize\n", __func__);
+			return (error);
+		}
+	}
+
+	/* Disable the test mode, when enabled. */
+	error = s390rtc_read(dev, S390_STATUS2, &reg, 1);
+	if (error) {
+		device_printf(dev, "%s: cannot read status2 register\n",
+		    __func__);
+		return (error);
+	}
+	if (reg & S390_ST2_TEST) {
+		reg &= ~S390_ST2_TEST;
+		error = s390rtc_write(dev, S390_STATUS2, &reg, 1);
+		if (error) {
+			device_printf(dev,
+			    "%s: cannot disable the test mode\n", __func__);
+			return (error);
+		}
+	}
+
+	clock_register(dev, 1000000);   /* 1 second resolution */
+	return (0);
+}
+
+static int
+s390rtc_gettime(device_t dev, struct timespec *ts)
+{
+	uint8_t bcd[S390_RT1_NBYTES];
+	struct clocktime ct;
+	int error;
+
+	error = s390rtc_read(dev, S390_REALTIME1, bcd, S390_RT1_NBYTES);
+	if (error) {
+		device_printf(dev, "%s: cannot read realtime1 register\n",
+		    __func__);
+		return (error);
+	}
+
+	/*
+	 * Convert the register values into something useable.
+	 */
+	ct.nsec = 0;
+	ct.sec = FROMBCD(bcd[S390_RT1_SECOND]);
+	ct.min = FROMBCD(bcd[S390_RT1_MINUTE]);
+	ct.hour = FROMBCD(bcd[S390_RT1_HOUR] & 0x3f);
+	ct.day = FROMBCD(bcd[S390_RT1_DAY]);
+	ct.dow = bcd[S390_RT1_WDAY] & 0x07;
+	ct.mon = FROMBCD(bcd[S390_RT1_MONTH]);
+	ct.year = FROMBCD(bcd[S390_RT1_YEAR]) + 2000;
+
+	return (clock_ct_to_ts(&ct, ts));
+}
+
+static int
+s390rtc_settime(device_t dev, struct timespec *ts)
+{
+	uint8_t bcd[S390_RT1_NBYTES];
+	struct clocktime ct;
+
+	clock_ts_to_ct(ts, &ct);
+
+	/*
+	 * Convert our time representation into something the S-xx390
+	 * can understand.
+	 */
+	bcd[S390_RT1_SECOND] = TOBCD(ct.sec);
+	bcd[S390_RT1_MINUTE] = TOBCD(ct.min);
+	bcd[S390_RT1_HOUR] = TOBCD(ct.hour);
+	bcd[S390_RT1_DAY] = TOBCD(ct.day);
+	bcd[S390_RT1_WDAY] = ct.dow;
+	bcd[S390_RT1_MONTH] = TOBCD(ct.mon);
+	bcd[S390_RT1_YEAR] = TOBCD(ct.year % 100);
+
+	return (s390rtc_write(dev, S390_REALTIME1, bcd, S390_RT1_NBYTES));
+}
+
+static device_method_t s390rtc_methods[] = {
+	DEVMETHOD(device_probe,		s390rtc_probe),
+	DEVMETHOD(device_attach,	s390rtc_attach),
+
+	DEVMETHOD(clock_gettime,	s390rtc_gettime),
+	DEVMETHOD(clock_settime,	s390rtc_settime),
+
+	DEVMETHOD_END
+};
+
+static driver_t s390rtc_driver = {
+	S390_DEVNAME,
+	s390rtc_methods,
+	sizeof(struct s390rtc_softc),
+};
+static devclass_t s390rtc_devclass;
+
+DRIVER_MODULE(s35390a, iicbus, s390rtc_driver, s390rtc_devclass, NULL, NULL);
+MODULE_VERSION(s35390a, 1);
+MODULE_DEPEND(s35390a, iicbus, 1, 1, 1);


Property changes on: trunk/sys/dev/iicbus/s35390a.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/imcsmb/imcsmb.c
===================================================================
--- trunk/sys/dev/imcsmb/imcsmb.c	                        (rev 0)
+++ trunk/sys/dev/imcsmb/imcsmb.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,558 @@
+/* $MidnightBSD$ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Authors: Joe Kloss; Ravi Pokala (rpokala at freebsd.org)
+ *
+ * Copyright (c) 2017-2018 Panasas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/imcsmb/imcsmb.c 330304 2018-03-03 01:53:51Z rpokala $
+ */
+
+/* A detailed description of this device is present in imcsmb_pci.c */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/syslog.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+#include <machine/atomic.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <dev/smbus/smbconf.h>
+
+#include "imcsmb_reg.h"
+#include "imcsmb_var.h"
+
+/* Device methods */
+static int imcsmb_attach(device_t dev);
+static int imcsmb_detach(device_t dev);
+static int imcsmb_probe(device_t dev);
+
+/* SMBus methods */
+static int imcsmb_callback(device_t dev, int index, void *data);
+static int imcsmb_readb(device_t dev, u_char slave, char cmd, char *byte);
+static int imcsmb_readw(device_t dev, u_char slave, char cmd, short *word);
+static int imcsmb_writeb(device_t dev, u_char slave, char cmd, char byte);
+static int imcsmb_writew(device_t dev, u_char slave, char cmd, short word);
+
+/* All the read/write methods wrap around this. */
+static int imcsmb_transfer(device_t dev, u_char slave, char cmd, void *data,
+    int word_op, int write_op);
+
+/**
+ * device_attach() method. Set up the softc, including getting the set of the
+ * parent imcsmb_pci's registers that we will use. Create the smbus(4) device,
+ * which any SMBus slave device drivers will connect to.
+ *
+ * @author rpokala
+ *
+ * @param[in,out] dev
+ *      Device being attached.
+ */
+static int
+imcsmb_attach(device_t dev)
+{
+	struct imcsmb_softc *sc;
+	int rc;
+
+	/* Initialize private state */
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+	sc->imcsmb_pci = device_get_parent(dev);
+	sc->regs = device_get_ivars(dev);
+
+	/* Create the smbus child */
+	sc->smbus = device_add_child(dev, "smbus", -1);
+	if (sc->smbus == NULL) {
+		/* Nothing has been allocated, so there's no cleanup. */
+		device_printf(dev, "Child smbus not added\n");
+		rc = ENXIO;
+		goto out;
+	}
+
+	/* Attach the smbus child. */
+	if ((rc = bus_generic_attach(dev)) != 0) {
+		device_printf(dev, "Failed to attach smbus: %d\n", rc);
+	}
+
+out:
+	return (rc);
+}
+
+/**
+ * device_detach() method. attach() didn't do any allocations, so all that's
+ * needed here is to free up any downstream drivers and children.
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *      Device being detached.
+ */
+static int
+imcsmb_detach(device_t dev)
+{
+	int rc;
+
+	/* Detach any attached drivers */
+	rc = bus_generic_detach(dev);
+	if (rc == 0) {
+		/* Remove all children */
+		rc = device_delete_children(dev);
+	}
+
+	return (rc);
+}
+
+/**
+ * device_probe() method. All the actual probing was done by the imcsmb_pci
+ * parent, so just report success.
+ *
+ * @author Joe Kloss
+ *
+ * @param[in,out] dev
+ *      Device being probed.
+ */
+static int
+imcsmb_probe(device_t dev)
+{
+
+	device_set_desc(dev, "iMC SMBus controller");
+	return (BUS_PROBE_DEFAULT);
+}
+
+/**
+ * smbus_callback() method. Call the parent imcsmb_pci's request or release
+ * function to quiesce / restart firmware tasks which might use the SMBus.
+ *
+ * @author rpokala
+ *
+ * @param[in] dev
+ *      Device being requested or released.
+ *
+ * @param[in] index
+ *      Either SMB_REQUEST_BUS or SMB_RELEASE_BUS.
+ *
+ * @param[in] data
+ *      Tell's the rest of the SMBus subsystem to allow or disallow waiting;
+ *      this driver only works with SMB_DONTWAIT.
+ */
+static int
+imcsmb_callback(device_t dev, int index, void *data)
+{
+	struct imcsmb_softc *sc;
+	int *how;
+	int rc;
+
+	sc = device_get_softc(dev);
+	how = (int *) data;
+
+	switch (index) {
+	case SMB_REQUEST_BUS: {
+		if (*how != SMB_DONTWAIT) {
+			rc = EINVAL;
+			goto out;
+		}
+		rc = imcsmb_pci_request_bus(sc->imcsmb_pci);
+		break;
+	}
+	case SMB_RELEASE_BUS:
+		imcsmb_pci_release_bus(sc->imcsmb_pci);
+		rc = 0;
+		break;
+	default:
+		rc = EINVAL;
+		break;
+	}
+
+out:
+	return (rc);
+}
+
+/**
+ * smbus_readb() method. Thin wrapper around imcsmb_transfer().
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *
+ * @param[in] slave
+ *      The SMBus address of the target device.
+ *
+ * @param[in] cmd
+ *      The SMBus command for the target device; this is the offset for SPDs,
+ *      or the register number for TSODs.
+ *
+ * @param[out] byte
+ *      The byte which was read.
+ */
+static int
+imcsmb_readb(device_t dev, u_char slave, char cmd, char *byte)
+{
+
+	return (imcsmb_transfer(dev, slave, cmd, byte, FALSE, FALSE));
+}
+
+/**
+ * smbus_readw() method. Thin wrapper around imcsmb_transfer().
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *
+ * @param[in] slave
+ *      The SMBus address of the target device.
+ *
+ * @param[in] cmd
+ *      The SMBus command for the target device; this is the offset for SPDs,
+ *      or the register number for TSODs.
+ *
+ * @param[out] word
+ *      The word which was read.
+ */
+static int
+imcsmb_readw(device_t dev, u_char slave, char cmd, short *word)
+{
+
+	return (imcsmb_transfer(dev, slave, cmd, word, TRUE, FALSE));
+}
+
+/**
+ * smbus_writeb() method. Thin wrapper around imcsmb_transfer().
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *
+ * @param[in] slave
+ *      The SMBus address of the target device.
+ *
+ * @param[in] cmd
+ *      The SMBus command for the target device; this is the offset for SPDs,
+ *      or the register number for TSODs.
+ *
+ * @param[in] byte
+ *      The byte to write.
+ */
+static int
+imcsmb_writeb(device_t dev, u_char slave, char cmd, char byte)
+{
+
+	return (imcsmb_transfer(dev, slave, cmd, &byte, FALSE, TRUE));
+}
+
+/**
+ * smbus_writew() method. Thin wrapper around imcsmb_transfer().
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *
+ * @param[in] slave
+ *      The SMBus address of the target device.
+ *
+ * @param[in] cmd
+ *      The SMBus command for the target device; this is the offset for SPDs,
+ *      or the register number for TSODs.
+ *
+ * @param[in] word
+ *      The word to write.
+ */
+static int
+imcsmb_writew(device_t dev, u_char slave, char cmd, short word)
+{
+
+	return (imcsmb_transfer(dev, slave, cmd, &word, TRUE, TRUE));
+}
+
+/**
+ * Manipulate the PCI control registers to read data from or write data to the
+ * SMBus controller.
+ *
+ * @author Joe Kloss, rpokala
+ *
+ * @param[in] dev
+ *
+ * @param[in] slave
+ *      The SMBus address of the target device.
+ *
+ * @param[in] cmd
+ *      The SMBus command for the target device; this is the offset for SPDs,
+ *      or the register number for TSODs.
+ *
+ * @param[in,out] data
+ *      Pointer to either the value to be written, or where to place the value
+ *      which was read.
+ *
+ * @param[in] word_op
+ *      Bool: is this a word operation?
+ *
+ * @param[in] write_op
+ *      Bool: is this a write operation?
+ */
+static int
+imcsmb_transfer(device_t dev, u_char slave, char cmd, void *data, int word_op,
+    int write_op)
+{
+	struct imcsmb_softc *sc;
+	int i;
+	int rc;
+	uint32_t cmd_val;
+	uint32_t cntl_val;
+	uint32_t orig_cntl_val;
+	uint32_t stat_val;
+	uint16_t *word;
+	uint16_t lword;
+	uint8_t *byte;
+	uint8_t lbyte;
+
+	sc = device_get_softc(dev);
+	byte = data;
+	word = data;
+	lbyte = *byte;
+	lword = *word;
+
+	/* We modify the value of the control register; save the original, so
+	 * we can restore it later
+	 */
+	orig_cntl_val = pci_read_config(sc->imcsmb_pci,
+	    sc->regs->smb_cntl, 4);
+	cntl_val = orig_cntl_val;
+
+	/*
+	 * Set up the SMBCNTL register
+	 */
+
+	/* [31:28] Clear the existing value of the DTI bits, then set them to
+	 * the four high bits of the slave address.
+	 */
+	cntl_val &= ~IMCSMB_CNTL_DTI_MASK;
+	cntl_val |= ((uint32_t) slave & 0xf0) << 24;
+
+	/* [27:27] Set the CLK_OVERRIDE bit, to enable normal operation */
+	cntl_val |= IMCSMB_CNTL_CLK_OVERRIDE;
+
+	/* [26:26] Clear the WRITE_DISABLE bit; the datasheet says this isn't
+	 * necessary, but empirically, it is.
+	 */
+	cntl_val &= ~IMCSMB_CNTL_WRITE_DISABLE_BIT;
+
+	/* [9:9] Clear the POLL_EN bit, to stop the hardware TSOD polling. */
+	cntl_val &= ~IMCSMB_CNTL_POLL_EN;
+
+	/*
+	 * Set up the SMBCMD register
+	 */
+
+	/* [31:31] Set the TRIGGER bit; when this gets written, the controller
+	 * will issue the command.
+	 */
+	cmd_val = IMCSMB_CMD_TRIGGER_BIT;
+
+	/* [29:29] For word operations, set the WORD_ACCESS bit. */
+	if (word_op) {
+		cmd_val |= IMCSMB_CMD_WORD_ACCESS;
+	}
+
+	/* [27:27] For write operations, set the WRITE bit. */
+	if (write_op) {
+		cmd_val |= IMCSMB_CMD_WRITE_BIT;
+	}
+
+	/* [26:24] The three non-DTI, non-R/W bits of the slave address. */
+	cmd_val |= (uint32_t) ((slave & 0xe) << 23);
+
+	/* [23:16] The command (offset in the case of an EEPROM, or register in
+	 * the case of TSOD or NVDIMM controller).
+	 */
+	cmd_val |= (uint32_t) ((uint8_t) cmd << 16);
+
+	/* [15:0] The data to be written for a write operation. */
+	if (write_op) {
+		if (word_op) {
+			/* The datasheet says the controller uses different
+			 * endianness for word operations on I2C vs SMBus!
+			 *      I2C: [15:8] = MSB; [7:0] = LSB
+			 *      SMB: [15:8] = LSB; [7:0] = MSB
+			 * As a practical matter, this controller is very
+			 * specifically for use with DIMMs, the SPD (and
+			 * NVDIMM controllers) are only accessed as bytes,
+			 * the temperature sensor is only accessed as words, and
+			 * the temperature sensors are I2C. Thus, byte-swap the
+			 * word.
+			 */
+			lword = htobe16(lword);
+		} else {
+			/* For byte operations, the data goes in the LSB, and
+			 * the MSB is a don't care.
+			 */
+			lword = (uint16_t) (lbyte & 0xff);
+		}
+		cmd_val |= lword;
+	}
+
+	/* Write the updated value to the control register first, to disable
+	 * the hardware TSOD polling.
+	 */
+	pci_write_config(sc->imcsmb_pci, sc->regs->smb_cntl, cntl_val, 4);
+
+	/* Poll on the BUSY bit in the status register until clear, or timeout.
+	 * We just cleared the auto-poll bit, so we need to make sure the device
+	 * is idle before issuing a command. We can safely timeout after 35 ms,
+	 * as this is the maximum time the SMBus spec allows for a transaction.
+	 */
+	for (i = 4; i != 0; i--) {
+		stat_val = pci_read_config(sc->imcsmb_pci, sc->regs->smb_stat,
+		    4);
+		if ((stat_val & IMCSMB_STATUS_BUSY_BIT) == 0) {
+			break;
+		}
+		pause("imcsmb", 10 * hz / 1000);
+	}
+
+	if (i == 0) {
+		device_printf(sc->dev,
+		    "transfer: timeout waiting for device to settle\n");
+	}
+
+	/* Now that polling has stopped, we can write the command register. This
+	 * starts the SMBus command.
+	 */
+	pci_write_config(sc->imcsmb_pci, sc->regs->smb_cmd, cmd_val, 4);
+
+	/* Wait for WRITE_DATA_DONE/READ_DATA_VALID to be set, or timeout and
+	 * fail. We wait up to 35ms.
+	 */
+	for (i = 35000; i != 0; i -= 10)
+	{
+		DELAY(10);
+		stat_val = pci_read_config(sc->imcsmb_pci, sc->regs->smb_stat,
+		    4);
+		/* For a write, the bits holding the data contain the data being
+		 * written. You'd think that would cause the READ_DATA_VALID bit
+		 * to be cleared, because the data bits no longer contain valid
+		 * data from the most recent read operation. While that would be
+		 * logical, that's not the case here: READ_DATA_VALID is only
+		 * cleared when starting a read operation, and WRITE_DATA_DONE
+		 * is only cleared when starting a write operation.
+		 */
+		if (write_op) {
+			if ((stat_val & IMCSMB_STATUS_WRITE_DATA_DONE) != 0) {
+				break;
+			}
+		} else {
+			if ((stat_val & IMCSMB_STATUS_READ_DATA_VALID) != 0) {
+				break;
+			}
+		}
+	}
+	if (i == 0) {
+		rc = SMB_ETIMEOUT;
+		device_printf(dev, "transfer timeout\n");
+		goto out;
+	}
+
+	/* It is generally the case that this bit indicates non-ACK, but it
+	 * could also indicate other bus errors. There's no way to tell the
+	 * difference.
+	 */
+	if ((stat_val & IMCSMB_STATUS_BUS_ERROR_BIT) != 0) {
+		/* While it is not documented, empirically, SPD page-change
+		 * commands (writes with DTI = 0x60) always complete with the
+		 * error bit set. So, ignore it in those cases.
+		 */
+		if ((slave & 0xf0) != 0x60) {
+			rc = SMB_ENOACK;
+			goto out;
+		}
+	}
+
+	/* For a read operation, copy the data out */
+	if (write_op == 0) {
+		if (word_op) {
+			/* The data is returned in bits [15:0]; as discussed
+			 * above, byte-swap.
+			 */
+			lword = (uint16_t) (stat_val & 0xffff);
+			lword = htobe16(lword);
+			*word = lword;
+		} else {
+			/* The data is returned in bits [7:0] */
+			lbyte = (uint8_t) (stat_val & 0xff);
+			*byte = lbyte;
+		}
+	}
+
+	/* A lack of an error is, de facto, success. */
+	rc = SMB_ENOERR;
+
+out:
+	/* Restore the original value of the control register. */
+	pci_write_config(sc->imcsmb_pci, sc->regs->smb_cntl, orig_cntl_val, 4);
+	return (rc);
+}
+
+/* Our device class */
+static devclass_t imcsmb_devclass;
+
+/* Device methods */
+static device_method_t imcsmb_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_attach,	imcsmb_attach),
+	DEVMETHOD(device_detach,	imcsmb_detach),
+	DEVMETHOD(device_probe,		imcsmb_probe),
+
+	/* smbus methods */
+	DEVMETHOD(smbus_callback,	imcsmb_callback),
+	DEVMETHOD(smbus_readb,		imcsmb_readb),
+	DEVMETHOD(smbus_readw,		imcsmb_readw),
+	DEVMETHOD(smbus_writeb,		imcsmb_writeb),
+	DEVMETHOD(smbus_writew,		imcsmb_writew),
+
+	DEVMETHOD_END
+};
+
+static driver_t imcsmb_driver = {
+	.name = "imcsmb",
+	.methods = imcsmb_methods,
+	.size = sizeof(struct imcsmb_softc),
+};
+
+DRIVER_MODULE(imcsmb, imcsmb_pci, imcsmb_driver, imcsmb_devclass, 0, 0);
+MODULE_DEPEND(imcsmb, smbus, SMBUS_MINVER, SMBUS_PREFVER, SMBUS_MAXVER);
+MODULE_VERSION(imcsmb, 1);
+
+DRIVER_MODULE(smbus, imcsmb, smbus_driver, smbus_devclass, 0, 0);
+
+/* vi: set ts=8 sw=4 sts=8 noet: */


Property changes on: trunk/sys/dev/imcsmb/imcsmb.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/imcsmb/imcsmb_pci.c
===================================================================
--- trunk/sys/dev/imcsmb/imcsmb_pci.c	                        (rev 0)
+++ trunk/sys/dev/imcsmb/imcsmb_pci.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,346 @@
+/* $MidnightBSD$ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Authors: Joe Kloss; Ravi Pokala (rpokala at freebsd.org)
+ *
+ * Copyright (c) 2017-2018 Panasas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/imcsmb/imcsmb_pci.c 330304 2018-03-03 01:53:51Z rpokala $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/syslog.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+#include <machine/atomic.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <dev/smbus/smbconf.h>
+
+#include "imcsmb_reg.h"
+#include "imcsmb_var.h"
+
+/* (Sandy,Ivy)bridge-Xeon and (Has,Broad)well-Xeon CPUs contain one or two
+ * "Integrated Memory Controllers" (iMCs), and each iMC contains two separate
+ * SMBus controllers. These are used for reading SPD data from the DIMMs, and
+ * for reading the "Thermal Sensor on DIMM" (TSODs). The iMC SMBus controllers
+ * are very simple devices, and have limited functionality compared to
+ * full-fledged SMBus controllers, like the one in Intel ICHs and PCHs.
+ *
+ * The publicly available documentation for the iMC SMBus controllers can be
+ * found in the CPU datasheets for (Sandy,Ivy)bridge-Xeon and
+ * (Has,broad)well-Xeon, respectively:
+ *
+ * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/
+ *      Sandybridge     xeon-e5-1600-2600-vol-2-datasheet.pdf
+ *      Ivybridge       xeon-e5-v2-datasheet-vol-2.pdf
+ *      Haswell         xeon-e5-v3-datasheet-vol-2.pdf
+ *      Broadwell       xeon-e5-v4-datasheet-vol-2.pdf
+ *
+ * Another useful resource is the Linux driver. It is not in the main tree.
+ *
+ * https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg840043.html
+ *
+ * The iMC SMBus controllers do not support interrupts (thus, they must be
+ * polled for IO completion). All of the iMC registers are in PCI configuration
+ * space; there is no support for PIO or MMIO. As a result, this driver does
+ * not need to perform and newbus resource manipulation.
+ *
+ * Because there are multiple SMBus controllers sharing the same PCI device,
+ * this driver is actually *two* drivers:
+ *
+ * - "imcsmb" is an smbus(4)-compliant SMBus controller driver
+ *
+ * - "imcsmb_pci" recognizes the PCI device and assigns the appropriate set of
+ *    PCI config registers to a specific "imcsmb" instance.
+ */
+
+/* Depending on the motherboard and firmware, the TSODs might be polled by
+ * firmware. Therefore, when this driver accesses these SMBus controllers, the
+ * firmware polling must be disabled as part of requesting the bus, and
+ * re-enabled when releasing the bus. Unfortunately, the details of how to do
+ * this are vendor-specific. Contact your motherboard vendor to get the
+ * information you need to do proper implementations.
+ *
+ * For NVDIMMs which conform to the ACPI "NFIT" standard, the ACPI firmware
+ * manages the NVDIMM; for those which pre-date the standard, the operating
+ * system interacts with the NVDIMM controller using a vendor-proprietary API
+ * over the SMBus. In that case, the NVDIMM driver would be an SMBus slave
+ * device driver, and would interface with the hardware via an SMBus controller
+ * driver such as this one.
+ */
+
+/* PCIe device IDs for (Sandy,Ivy)bridge)-Xeon and (Has,Broad)well-Xeon */
+#define PCI_VENDOR_INTEL		0x8086
+#define IMCSMB_PCI_DEV_ID_IMC0_SBX	0x3ca8
+#define IMCSMB_PCI_DEV_ID_IMC0_IBX	0x0ea8
+#define IMCSMB_PCI_DEV_ID_IMC0_HSX	0x2fa8
+#define IMCSMB_PCI_DEV_ID_IMC0_BDX	0x6fa8
+/* (Sandy,Ivy)bridge-Xeon only have a single memory controller per socket */
+#define IMCSMB_PCI_DEV_ID_IMC1_HSX	0x2f68
+#define IMCSMB_PCI_DEV_ID_IMC1_BDX	0x6f68
+
+/* There are two SMBus controllers in each device. These define the registers
+ * for each of these devices.
+ */
+static struct imcsmb_reg_set imcsmb_regs[] = {
+	{
+		.smb_stat = IMCSMB_REG_STATUS0,
+		.smb_cmd = IMCSMB_REG_COMMAND0,
+		.smb_cntl = IMCSMB_REG_CONTROL0
+	},
+	{
+		.smb_stat = IMCSMB_REG_STATUS1,
+		.smb_cmd = IMCSMB_REG_COMMAND1,
+		.smb_cntl = IMCSMB_REG_CONTROL1
+	},
+};
+
+static struct imcsmb_pci_device {
+	uint16_t	id;
+	char		*name;
+} imcsmb_pci_devices[] = {
+	{IMCSMB_PCI_DEV_ID_IMC0_SBX,
+	    "Intel Sandybridge Xeon iMC 0 SMBus controllers"	},
+	{IMCSMB_PCI_DEV_ID_IMC0_IBX,
+	    "Intel Ivybridge Xeon iMC 0 SMBus controllers"	},
+	{IMCSMB_PCI_DEV_ID_IMC0_HSX,
+	    "Intel Haswell Xeon iMC 0 SMBus controllers"	},
+	{IMCSMB_PCI_DEV_ID_IMC1_HSX,
+	    "Intel Haswell Xeon iMC 1 SMBus controllers"	},
+	{IMCSMB_PCI_DEV_ID_IMC0_BDX,
+	    "Intel Broadwell Xeon iMC 0 SMBus controllers"	},
+	{IMCSMB_PCI_DEV_ID_IMC1_BDX,
+	    "Intel Broadwell Xeon iMC 1 SMBus controllers"	},
+	{0, NULL},
+};
+
+/* Device methods. */
+static int imcsmb_pci_attach(device_t dev);
+static int imcsmb_pci_detach(device_t dev);
+static int imcsmb_pci_probe(device_t dev);
+
+/**
+ * device_attach() method. Set up the PCI device's softc, then explicitly create
+ * children for the actual imcsmbX controllers. Set up the child's ivars to
+ * point to the proper set of the PCI device's config registers.
+ *
+ * @author Joe Kloss, rpokala
+ *
+ * @param[in,out] dev
+ *      Device being attached.
+ */
+static int
+imcsmb_pci_attach(device_t dev)
+{
+	struct imcsmb_pci_softc *sc;
+	device_t child;
+	int rc;
+	int unit;
+
+	/* Initialize private state */
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+	sc->semaphore = 0;
+
+	/* Create the imcsmbX children */
+	for (unit = 0; unit < 2; unit++) {
+		child = device_add_child(dev, "imcsmb", -1);
+		if (child == NULL) {
+			/* Nothing has been allocated, so there's no cleanup. */
+			device_printf(dev, "Child imcsmb not added\n");
+			rc = ENXIO;
+			goto out;
+		}
+		/* Set the child's ivars to point to the appropriate set of
+		 * the PCI device's registers.
+		 */
+		device_set_ivars(child, &imcsmb_regs[unit]);
+	}
+
+	/* Attach the imcsmbX children. */
+	if ((rc = bus_generic_attach(dev)) != 0) {
+		device_printf(dev, "failed to attach children: %d\n", rc);
+		goto out;
+	}
+
+out:
+	return (rc);
+}
+
+/**
+ * device_detach() method. attach() didn't do any allocations, so all that's
+ * needed here is to free up any downstream drivers and children.
+ *
+ * @author Joe Kloss
+ *
+ * @param[in] dev
+ *      Device being detached.
+ */
+static int
+imcsmb_pci_detach(device_t dev)
+{
+	int rc;
+
+	/* Detach any attached drivers */
+	rc = bus_generic_detach(dev);
+	if (rc == 0) {
+		/* Remove all children */
+		rc = device_delete_children(dev);
+	}
+
+	return (rc);
+}
+
+/**
+ * device_probe() method. Look for the right PCI vendor/device IDs.
+ *
+ * @author Joe Kloss, rpokala
+ *
+ * @param[in,out] dev
+ *      Device being probed.
+ */
+static int
+imcsmb_pci_probe(device_t dev)
+{
+	struct imcsmb_pci_device *pci_device;
+	int rc;
+	uint16_t pci_dev_id;
+
+	rc = ENXIO;
+
+	if (pci_get_vendor(dev) != PCI_VENDOR_INTEL) {
+		goto out;
+	}
+
+	pci_dev_id = pci_get_device(dev);
+	for (pci_device = imcsmb_pci_devices;
+	    pci_device->name != NULL;
+	    pci_device++) {
+		if (pci_dev_id == pci_device->id) {
+			device_set_desc(dev, pci_device->name);
+			rc = BUS_PROBE_DEFAULT;
+			goto out;
+		}
+	}
+
+out:
+	return (rc);
+}
+
+/**
+ * Invoked via smbus_callback() -> imcsmb_callback(); clear the semaphore, and
+ * re-enable motherboard-specific DIMM temperature monitoring if needed. This
+ * gets called after the transaction completes.
+ *
+ * @author Joe Kloss
+ *
+ * @param[in,out] dev
+ *      The device whose busses to release.
+ */
+void
+imcsmb_pci_release_bus(device_t dev)
+{
+	struct imcsmb_pci_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	/*
+	 * IF NEEDED, INSERT MOTHERBOARD-SPECIFIC CODE TO RE-ENABLE DIMM
+	 * TEMPERATURE MONITORING HERE.
+	 */
+
+	atomic_store_rel_int(&sc->semaphore, 0);
+}
+
+/**
+ * Invoked via smbus_callback() -> imcsmb_callback(); set the semaphore, and
+ * disable motherboard-specific DIMM temperature monitoring if needed. This gets
+ * called before the transaction starts.
+ *
+ * @author Joe Kloss
+ *
+ * @param[in,out] dev
+ *      The device whose busses to request.
+ */
+int
+imcsmb_pci_request_bus(device_t dev)
+{
+	struct imcsmb_pci_softc *sc;
+	int rc;
+
+	sc = device_get_softc(dev);
+	rc = 0;
+
+	/* We don't want to block. Use a simple test-and-set semaphore to
+	 * protect the bus.
+	 */
+	if (atomic_cmpset_acq_int(&sc->semaphore, 0, 1) == 0) {
+		rc = EWOULDBLOCK;
+	}
+
+	/*
+	 * IF NEEDED, INSERT MOTHERBOARD-SPECIFIC CODE TO DISABLE DIMM
+	 * TEMPERATURE MONITORING HERE.
+	 */
+
+	return (rc);
+}
+
+/* Our device class */
+static devclass_t imcsmb_pci_devclass;
+
+/* Device methods */
+static device_method_t imcsmb_pci_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_attach,	imcsmb_pci_attach),
+	DEVMETHOD(device_detach,	imcsmb_pci_detach),
+	DEVMETHOD(device_probe,		imcsmb_pci_probe),
+
+	DEVMETHOD_END
+};
+
+static driver_t imcsmb_pci_driver = {
+	.name = "imcsmb_pci",
+	.methods = imcsmb_pci_methods,
+	.size = sizeof(struct imcsmb_pci_softc),
+};
+
+DRIVER_MODULE(imcsmb_pci, pci, imcsmb_pci_driver, imcsmb_pci_devclass, 0, 0);
+MODULE_DEPEND(imcsmb_pci, pci, 1, 1, 1);
+MODULE_VERSION(imcsmb_pci, 1);
+
+/* vi: set ts=8 sw=4 sts=8 noet: */


Property changes on: trunk/sys/dev/imcsmb/imcsmb_pci.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/imcsmb/imcsmb_reg.h
===================================================================
--- trunk/sys/dev/imcsmb/imcsmb_reg.h	                        (rev 0)
+++ trunk/sys/dev/imcsmb/imcsmb_reg.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,87 @@
+/* $MidnightBSD$ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Authors: Joe Kloss; Ravi Pokala (rpokala at freebsd.org)
+ *
+ * Copyright (c) 2017-2018 Panasas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/imcsmb/imcsmb_reg.h 330304 2018-03-03 01:53:51Z rpokala $
+ */
+
+#ifndef _DEV__IMCSMB__IMCSMB_REG_H_
+#define _DEV__IMCSMB__IMCSMB_REG_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/syslog.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+#include <machine/atomic.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <dev/smbus/smbconf.h>
+
+/* Intel (Sandy,Ivy)bridge and (Has,Broad)well CPUs have integrated memory
+ * controllers (iMCs), each of which having up to two SMBus controllers. They
+ * are programmed via sets of registers in the same PCI device, which are
+ * identical other than the register numbers.
+ *
+ * The full documentation for these registers can be found in volume two of the
+ * datasheets for the CPUs. Refer to the links in imcsmb_pci.c
+ */
+
+#define	IMCSMB_REG_STATUS0			0x0180
+#define	IMCSMB_REG_STATUS1			0x0190
+#define		IMCSMB_STATUS_BUSY_BIT		0x10000000
+#define		IMCSMB_STATUS_BUS_ERROR_BIT	0x20000000
+#define		IMCSMB_STATUS_WRITE_DATA_DONE	0x40000000
+#define		IMCSMB_STATUS_READ_DATA_VALID	0x80000000
+
+#define	IMCSMB_REG_COMMAND0			0x0184
+#define	IMCSMB_REG_COMMAND1			0x0194
+#define		IMCSMB_CMD_WORD_ACCESS		0x20000000
+#define		IMCSMB_CMD_WRITE_BIT		0x08000000
+#define		IMCSMB_CMD_TRIGGER_BIT		0x80000000
+
+#define	IMCSMB_REG_CONTROL0			0x0188
+#define	IMCSMB_REG_CONTROL1			0x0198
+#define		IMCSMB_CNTL_POLL_EN		0x00000100
+#define		IMCSMB_CNTL_CLK_OVERRIDE	0x08000000
+#define		IMCSMB_CNTL_DTI_MASK		0xf0000000
+#define		IMCSMB_CNTL_WRITE_DISABLE_BIT	0x04000000
+
+#endif /* _DEV__IMCSMB__IMCSMB_REG_H_ */
+
+/* vi: set ts=8 sw=4 sts=8 noet: */


Property changes on: trunk/sys/dev/imcsmb/imcsmb_reg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/imcsmb/imcsmb_var.h
===================================================================
--- trunk/sys/dev/imcsmb/imcsmb_var.h	                        (rev 0)
+++ trunk/sys/dev/imcsmb/imcsmb_var.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,108 @@
+/* $MidnightBSD$ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Authors: Joe Kloss; Ravi Pokala (rpokala at freebsd.org)
+ *
+ * Copyright (c) 2017-2018 Panasas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/imcsmb/imcsmb_var.h 330304 2018-03-03 01:53:51Z rpokala $
+ */
+
+#ifndef _DEV__IMCSMB__IMCSMB_VAR_H_
+#define _DEV__IMCSMB__IMCSMB_VAR_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/syslog.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+#include <machine/atomic.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <dev/smbus/smbconf.h>
+
+#include "smbus_if.h"
+
+/* A detailed description of this device is present in imcsmb_pci.c */
+
+/**
+ * The softc for a particular instance of the PCI device associated with a pair
+ * of iMC-SMB controllers.
+ *
+ * Ordinarily, locking would be done with a mutex. However, we might have an
+ * NVDIMM connected to this SMBus, and we might need to issue the SAVE command
+ * to the NVDIMM from a panic context. Mutex operations are not allowed while
+ * the scheduler is stopped, so just use a simple semaphore.
+ *
+ * If, as described in the manpage, additional steps are needed to stop/restart
+ * firmware operations before/after using the controller, then additional fields
+ * can be added to this softc.
+ */
+struct imcsmb_pci_softc {
+	device_t dev;
+	volatile int semaphore;
+};
+
+void imcsmb_pci_release_bus(device_t dev);
+int imcsmb_pci_request_bus(device_t dev);
+
+/**
+ * PCI config registers for each individual SMBus controller within the iMC.
+ * Each iMC-SMB has a separate set of registers. There is an array of these
+ * structures for the PCI device, and one of them is passed to driver for the
+ * actual iMC-SMB as the IVAR.
+ */
+struct imcsmb_reg_set {
+	uint16_t smb_stat;
+	uint16_t smb_cmd;
+	uint16_t smb_cntl;
+};
+
+/**
+ * The softc for the device associated with a particular iMC-SMB controller.
+ * There are two such controllers for each of the PCI devices. The PCI driver
+ * tells the iMC-SMB driver which set of registers to use via the IVAR. This
+ * technique was suggested by John Baldwin (jhb@).
+ */
+struct imcsmb_softc {
+	device_t dev;
+	device_t imcsmb_pci;	/* The SMBus controller's parent iMC */
+	device_t smbus;		/* The child smbusX interface */
+	struct imcsmb_reg_set *regs;	/* The registers this controller uses */
+};
+
+#endif /* _DEV__IMCSMB__IMCSMB_VAR_H_ */
+
+/* vi: set ts=8 sw=4 sts=8 noet: */


Property changes on: trunk/sys/dev/imcsmb/imcsmb_var.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat.c
===================================================================
--- trunk/sys/dev/ioat/ioat.c	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,2099 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat.c 301812 2016-06-10 18:40:03Z ngie $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sbuf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/stdarg.h>
+
+#include "ioat.h"
+#include "ioat_hw.h"
+#include "ioat_internal.h"
+
+#ifndef	BUS_SPACE_MAXADDR_40BIT
+#define	BUS_SPACE_MAXADDR_40BIT	0xFFFFFFFFFFULL
+#endif
+#define	IOAT_INTR_TIMO	(hz / 10)
+#define	IOAT_REFLK	(&ioat->submit_lock)
+
+static int ioat_probe(device_t device);
+static int ioat_attach(device_t device);
+static int ioat_detach(device_t device);
+static int ioat_setup_intr(struct ioat_softc *ioat);
+static int ioat_teardown_intr(struct ioat_softc *ioat);
+static int ioat3_attach(device_t device);
+static int ioat_start_channel(struct ioat_softc *ioat);
+static int ioat_map_pci_bar(struct ioat_softc *ioat);
+static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
+    int error);
+static void ioat_interrupt_handler(void *arg);
+static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
+static int chanerr_to_errno(uint32_t);
+static void ioat_process_events(struct ioat_softc *ioat);
+static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
+static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
+static void ioat_free_ring(struct ioat_softc *, uint32_t size,
+    struct ioat_descriptor **);
+static void ioat_free_ring_entry(struct ioat_softc *ioat,
+    struct ioat_descriptor *desc);
+static struct ioat_descriptor *ioat_alloc_ring_entry(struct ioat_softc *,
+    int mflags);
+static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags);
+static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *ioat,
+    uint32_t index);
+static struct ioat_descriptor **ioat_prealloc_ring(struct ioat_softc *,
+    uint32_t size, boolean_t need_dscr, int mflags);
+static int ring_grow(struct ioat_softc *, uint32_t oldorder,
+    struct ioat_descriptor **);
+static int ring_shrink(struct ioat_softc *, uint32_t oldorder,
+    struct ioat_descriptor **);
+static void ioat_halted_debug(struct ioat_softc *, uint32_t);
+static void ioat_timer_callback(void *arg);
+static void dump_descriptor(void *hw_desc);
+static void ioat_submit_single(struct ioat_softc *ioat);
+static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
+    int error);
+static int ioat_reset_hw(struct ioat_softc *ioat);
+static void ioat_reset_hw_task(void *, int);
+static void ioat_setup_sysctl(device_t device);
+static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
+static inline struct ioat_softc *ioat_get(struct ioat_softc *,
+    enum ioat_ref_kind);
+static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind);
+static inline void _ioat_putn(struct ioat_softc *, uint32_t,
+    enum ioat_ref_kind, boolean_t);
+static inline void ioat_putn(struct ioat_softc *, uint32_t,
+    enum ioat_ref_kind);
+static inline void ioat_putn_locked(struct ioat_softc *, uint32_t,
+    enum ioat_ref_kind);
+static void ioat_drain_locked(struct ioat_softc *);
+
+#define	ioat_log_message(v, ...) do {					\
+	if ((v) <= g_ioat_debug_level) {				\
+		device_printf(ioat->device, __VA_ARGS__);		\
+	}								\
+} while (0)
+
+MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
+SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node");
+
+static int g_force_legacy_interrupts;
+SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
+    &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
+
+int g_ioat_debug_level = 0;
+SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
+    0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
+
+/*
+ * OS <-> Driver interface structures
+ */
+static device_method_t ioat_pci_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     ioat_probe),
+	DEVMETHOD(device_attach,    ioat_attach),
+	DEVMETHOD(device_detach,    ioat_detach),
+	DEVMETHOD_END
+};
+
+static driver_t ioat_pci_driver = {
+	"ioat",
+	ioat_pci_methods,
+	sizeof(struct ioat_softc),
+};
+
+static devclass_t ioat_devclass;
+DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0);
+MODULE_VERSION(ioat, 1);
+
+/*
+ * Private data structures
+ */
+static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
+static unsigned ioat_channel_index = 0;
+SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
+    "Number of IOAT channels attached");
+
+static struct _pcsid
+{
+	u_int32_t   type;
+	const char  *desc;
+} pci_ids[] = {
+	{ 0x34308086, "TBG IOAT Ch0" },
+	{ 0x34318086, "TBG IOAT Ch1" },
+	{ 0x34328086, "TBG IOAT Ch2" },
+	{ 0x34338086, "TBG IOAT Ch3" },
+	{ 0x34298086, "TBG IOAT Ch4" },
+	{ 0x342a8086, "TBG IOAT Ch5" },
+	{ 0x342b8086, "TBG IOAT Ch6" },
+	{ 0x342c8086, "TBG IOAT Ch7" },
+
+	{ 0x37108086, "JSF IOAT Ch0" },
+	{ 0x37118086, "JSF IOAT Ch1" },
+	{ 0x37128086, "JSF IOAT Ch2" },
+	{ 0x37138086, "JSF IOAT Ch3" },
+	{ 0x37148086, "JSF IOAT Ch4" },
+	{ 0x37158086, "JSF IOAT Ch5" },
+	{ 0x37168086, "JSF IOAT Ch6" },
+	{ 0x37178086, "JSF IOAT Ch7" },
+	{ 0x37188086, "JSF IOAT Ch0 (RAID)" },
+	{ 0x37198086, "JSF IOAT Ch1 (RAID)" },
+
+	{ 0x3c208086, "SNB IOAT Ch0" },
+	{ 0x3c218086, "SNB IOAT Ch1" },
+	{ 0x3c228086, "SNB IOAT Ch2" },
+	{ 0x3c238086, "SNB IOAT Ch3" },
+	{ 0x3c248086, "SNB IOAT Ch4" },
+	{ 0x3c258086, "SNB IOAT Ch5" },
+	{ 0x3c268086, "SNB IOAT Ch6" },
+	{ 0x3c278086, "SNB IOAT Ch7" },
+	{ 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
+	{ 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
+
+	{ 0x0e208086, "IVB IOAT Ch0" },
+	{ 0x0e218086, "IVB IOAT Ch1" },
+	{ 0x0e228086, "IVB IOAT Ch2" },
+	{ 0x0e238086, "IVB IOAT Ch3" },
+	{ 0x0e248086, "IVB IOAT Ch4" },
+	{ 0x0e258086, "IVB IOAT Ch5" },
+	{ 0x0e268086, "IVB IOAT Ch6" },
+	{ 0x0e278086, "IVB IOAT Ch7" },
+	{ 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
+	{ 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
+
+	{ 0x2f208086, "HSW IOAT Ch0" },
+	{ 0x2f218086, "HSW IOAT Ch1" },
+	{ 0x2f228086, "HSW IOAT Ch2" },
+	{ 0x2f238086, "HSW IOAT Ch3" },
+	{ 0x2f248086, "HSW IOAT Ch4" },
+	{ 0x2f258086, "HSW IOAT Ch5" },
+	{ 0x2f268086, "HSW IOAT Ch6" },
+	{ 0x2f278086, "HSW IOAT Ch7" },
+	{ 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
+	{ 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
+
+	{ 0x0c508086, "BWD IOAT Ch0" },
+	{ 0x0c518086, "BWD IOAT Ch1" },
+	{ 0x0c528086, "BWD IOAT Ch2" },
+	{ 0x0c538086, "BWD IOAT Ch3" },
+
+	{ 0x6f508086, "BDXDE IOAT Ch0" },
+	{ 0x6f518086, "BDXDE IOAT Ch1" },
+	{ 0x6f528086, "BDXDE IOAT Ch2" },
+	{ 0x6f538086, "BDXDE IOAT Ch3" },
+
+	{ 0x6f208086, "BDX IOAT Ch0" },
+	{ 0x6f218086, "BDX IOAT Ch1" },
+	{ 0x6f228086, "BDX IOAT Ch2" },
+	{ 0x6f238086, "BDX IOAT Ch3" },
+	{ 0x6f248086, "BDX IOAT Ch4" },
+	{ 0x6f258086, "BDX IOAT Ch5" },
+	{ 0x6f268086, "BDX IOAT Ch6" },
+	{ 0x6f278086, "BDX IOAT Ch7" },
+	{ 0x6f2e8086, "BDX IOAT Ch0 (RAID)" },
+	{ 0x6f2f8086, "BDX IOAT Ch1 (RAID)" },
+
+	{ 0x00000000, NULL           }
+};
+
+/*
+ * OS <-> Driver linkage functions
+ */
+static int
+ioat_probe(device_t device)
+{
+	struct _pcsid *ep;
+	u_int32_t type;
+
+	type = pci_get_devid(device);
+	for (ep = pci_ids; ep->type; ep++) {
+		if (ep->type == type) {
+			device_set_desc(device, ep->desc);
+			return (0);
+		}
+	}
+	return (ENXIO);
+}
+
+static int
+ioat_attach(device_t device)
+{
+	struct ioat_softc *ioat;
+	int error;
+
+	ioat = DEVICE2SOFTC(device);
+	ioat->device = device;
+
+	error = ioat_map_pci_bar(ioat);
+	if (error != 0)
+		goto err;
+
+	ioat->version = ioat_read_cbver(ioat);
+	if (ioat->version < IOAT_VER_3_0) {
+		error = ENODEV;
+		goto err;
+	}
+
+	error = ioat3_attach(device);
+	if (error != 0)
+		goto err;
+
+	error = pci_enable_busmaster(device);
+	if (error != 0)
+		goto err;
+
+	error = ioat_setup_intr(ioat);
+	if (error != 0)
+		goto err;
+
+	error = ioat_reset_hw(ioat);
+	if (error != 0)
+		goto err;
+
+	ioat_process_events(ioat);
+	ioat_setup_sysctl(device);
+
+	ioat->chan_idx = ioat_channel_index;
+	ioat_channel[ioat_channel_index++] = ioat;
+	ioat_test_attach();
+
+err:
+	if (error != 0)
+		ioat_detach(device);
+	return (error);
+}
+
+static int
+ioat_detach(device_t device)
+{
+	struct ioat_softc *ioat;
+
+	ioat = DEVICE2SOFTC(device);
+
+	ioat_test_detach();
+	taskqueue_drain(taskqueue_thread, &ioat->reset_task);
+
+	mtx_lock(IOAT_REFLK);
+	ioat->quiescing = TRUE;
+	ioat->destroying = TRUE;
+	wakeup(&ioat->quiescing);
+
+	ioat_channel[ioat->chan_idx] = NULL;
+
+	ioat_drain_locked(ioat);
+	mtx_unlock(IOAT_REFLK);
+
+	ioat_teardown_intr(ioat);
+	callout_drain(&ioat->timer);
+
+	pci_disable_busmaster(device);
+
+	if (ioat->pci_resource != NULL)
+		bus_release_resource(device, SYS_RES_MEMORY,
+		    ioat->pci_resource_id, ioat->pci_resource);
+
+	if (ioat->ring != NULL)
+		ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring);
+
+	if (ioat->comp_update != NULL) {
+		bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
+		bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
+		    ioat->comp_update_map);
+		bus_dma_tag_destroy(ioat->comp_update_tag);
+	}
+
+	bus_dma_tag_destroy(ioat->hw_desc_tag);
+
+	return (0);
+}
+
+static int
+ioat_teardown_intr(struct ioat_softc *ioat)
+{
+
+	if (ioat->tag != NULL)
+		bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
+
+	if (ioat->res != NULL)
+		bus_release_resource(ioat->device, SYS_RES_IRQ,
+		    rman_get_rid(ioat->res), ioat->res);
+
+	pci_release_msi(ioat->device);
+	return (0);
+}
+
+static int
+ioat_start_channel(struct ioat_softc *ioat)
+{
+	uint64_t status;
+	uint32_t chanerr;
+	int i;
+
+	ioat_acquire(&ioat->dmaengine);
+	ioat_null(&ioat->dmaengine, NULL, NULL, 0);
+	ioat_release(&ioat->dmaengine);
+
+	for (i = 0; i < 100; i++) {
+		DELAY(1);
+		status = ioat_get_chansts(ioat);
+		if (is_ioat_idle(status))
+			return (0);
+	}
+
+	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+	ioat_log_message(0, "could not start channel: "
+	    "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr,
+	    IOAT_CHANERR_STR);
+	return (ENXIO);
+}
+
+/*
+ * Initialize Hardware
+ */
+static int
+ioat3_attach(device_t device)
+{
+	struct ioat_softc *ioat;
+	struct ioat_descriptor **ring;
+	struct ioat_descriptor *next;
+	struct ioat_dma_hw_descriptor *dma_hw_desc;
+	int i, num_descriptors;
+	int error;
+	uint8_t xfercap;
+
+	error = 0;
+	ioat = DEVICE2SOFTC(device);
+	ioat->capabilities = ioat_read_dmacapability(ioat);
+
+	ioat_log_message(0, "Capabilities: %b\n", (int)ioat->capabilities,
+	    IOAT_DMACAP_STR);
+
+	xfercap = ioat_read_xfercap(ioat);
+	ioat->max_xfer_size = 1 << xfercap;
+
+	ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) &
+	    IOAT_INTRDELAY_SUPPORTED) != 0;
+	if (ioat->intrdelay_supported)
+		ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK;
+
+	/* TODO: need to check DCA here if we ever do XOR/PQ */
+
+	mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
+	mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF);
+	callout_init(&ioat->timer, 1);
+	TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat);
+
+	/* Establish lock order for Witness */
+	mtx_lock(&ioat->submit_lock);
+	mtx_lock(&ioat->cleanup_lock);
+	mtx_unlock(&ioat->cleanup_lock);
+	mtx_unlock(&ioat->submit_lock);
+
+	ioat->is_resize_pending = FALSE;
+	ioat->is_completion_pending = FALSE;
+	ioat->is_reset_pending = FALSE;
+	ioat->is_channel_running = FALSE;
+
+	bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
+	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+	    sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
+	    &ioat->comp_update_tag);
+
+	error = bus_dmamem_alloc(ioat->comp_update_tag,
+	    (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map);
+	if (ioat->comp_update == NULL)
+		return (ENOMEM);
+
+	error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
+	    ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
+	    0);
+	if (error != 0)
+		return (error);
+
+	ioat->ring_size_order = IOAT_MIN_ORDER;
+
+	num_descriptors = 1 << ioat->ring_size_order;
+
+	bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0,
+	    BUS_SPACE_MAXADDR_40BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+	    sizeof(struct ioat_dma_hw_descriptor), 1,
+	    sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL,
+	    &ioat->hw_desc_tag);
+
+	ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
+	    M_ZERO | M_WAITOK);
+
+	ring = ioat->ring;
+	for (i = 0; i < num_descriptors; i++) {
+		ring[i] = ioat_alloc_ring_entry(ioat, M_WAITOK);
+		if (ring[i] == NULL)
+			return (ENOMEM);
+
+		ring[i]->id = i;
+	}
+
+	for (i = 0; i < num_descriptors - 1; i++) {
+		next = ring[i + 1];
+		dma_hw_desc = ring[i]->u.dma;
+
+		dma_hw_desc->next = next->hw_desc_bus_addr;
+	}
+
+	ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr;
+
+	ioat->head = ioat->hw_head = 0;
+	ioat->tail = 0;
+	ioat->last_seen = 0;
+	return (0);
+}
+
+static int
+ioat_map_pci_bar(struct ioat_softc *ioat)
+{
+
+	ioat->pci_resource_id = PCIR_BAR(0);
+	ioat->pci_resource = bus_alloc_resource_any(ioat->device,
+	    SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE);
+
+	if (ioat->pci_resource == NULL) {
+		ioat_log_message(0, "unable to allocate pci resource\n");
+		return (ENODEV);
+	}
+
+	ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
+	ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
+	return (0);
+}
+
+static void
+ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
+{
+	struct ioat_softc *ioat = arg;
+
+	KASSERT(error == 0, ("%s: error:%d", __func__, error));
+	ioat->comp_update_bus_addr = seg[0].ds_addr;
+}
+
+static void
+ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+	bus_addr_t *baddr;
+
+	KASSERT(error == 0, ("%s: error:%d", __func__, error));
+	baddr = arg;
+	*baddr = segs->ds_addr;
+}
+
+/*
+ * Interrupt setup and handlers
+ */
+static int
+ioat_setup_intr(struct ioat_softc *ioat)
+{
+	uint32_t num_vectors;
+	int error;
+	boolean_t use_msix;
+	boolean_t force_legacy_interrupts;
+
+	use_msix = FALSE;
+	force_legacy_interrupts = FALSE;
+
+	if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
+		num_vectors = 1;
+		pci_alloc_msix(ioat->device, &num_vectors);
+		if (num_vectors == 1)
+			use_msix = TRUE;
+	}
+
+	if (use_msix) {
+		ioat->rid = 1;
+		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
+		    &ioat->rid, RF_ACTIVE);
+	} else {
+		ioat->rid = 0;
+		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
+		    &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
+	}
+	if (ioat->res == NULL) {
+		ioat_log_message(0, "bus_alloc_resource failed\n");
+		return (ENOMEM);
+	}
+
+	ioat->tag = NULL;
+	error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
+	    INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
+	if (error != 0) {
+		ioat_log_message(0, "bus_setup_intr failed\n");
+		return (error);
+	}
+
+	ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
+	return (0);
+}
+
+static boolean_t
+ioat_model_resets_msix(struct ioat_softc *ioat)
+{
+	u_int32_t pciid;
+
+	pciid = pci_get_devid(ioat->device);
+	switch (pciid) {
+		/* BWD: */
+	case 0x0c508086:
+	case 0x0c518086:
+	case 0x0c528086:
+	case 0x0c538086:
+		/* BDXDE: */
+	case 0x6f508086:
+	case 0x6f518086:
+	case 0x6f528086:
+	case 0x6f538086:
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+static void
+ioat_interrupt_handler(void *arg)
+{
+	struct ioat_softc *ioat = arg;
+
+	ioat->stats.interrupts++;
+	ioat_process_events(ioat);
+}
+
+static int
+chanerr_to_errno(uint32_t chanerr)
+{
+
+	if (chanerr == 0)
+		return (0);
+	if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0)
+		return (EFAULT);
+	if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0)
+		return (EIO);
+	/* This one is probably our fault: */
+	if ((chanerr & IOAT_CHANERR_NDADDERR) != 0)
+		return (EIO);
+	return (EIO);
+}
+
+static void
+ioat_process_events(struct ioat_softc *ioat)
+{
+	struct ioat_descriptor *desc;
+	struct bus_dmadesc *dmadesc;
+	uint64_t comp_update, status;
+	uint32_t completed, chanerr;
+	int error;
+
+	mtx_lock(&ioat->cleanup_lock);
+
+	completed = 0;
+	comp_update = *ioat->comp_update;
+	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
+
+	CTR0(KTR_IOAT, __func__);
+
+	if (status == ioat->last_seen) {
+		/*
+		 * If we landed in process_events and nothing has been
+		 * completed, check for a timeout due to channel halt.
+		 */
+		comp_update = ioat_get_chansts(ioat);
+		goto out;
+	}
+
+	while (1) {
+		desc = ioat_get_ring_entry(ioat, ioat->tail);
+		dmadesc = &desc->bus_dmadesc;
+		CTR1(KTR_IOAT, "completing desc %d", ioat->tail);
+
+		if (dmadesc->callback_fn != NULL)
+			dmadesc->callback_fn(dmadesc->callback_arg, 0);
+
+		completed++;
+		ioat->tail++;
+		if (desc->hw_desc_bus_addr == status)
+			break;
+	}
+
+	ioat->last_seen = desc->hw_desc_bus_addr;
+
+	if (ioat->head == ioat->tail) {
+		ioat->is_completion_pending = FALSE;
+		callout_reset(&ioat->timer, IOAT_INTR_TIMO,
+		    ioat_timer_callback, ioat);
+	}
+
+	ioat->stats.descriptors_processed += completed;
+
+out:
+	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+	mtx_unlock(&ioat->cleanup_lock);
+
+	if (completed != 0) {
+		ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF);
+		wakeup(&ioat->tail);
+	}
+
+	if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update))
+		return;
+
+	ioat->stats.channel_halts++;
+
+	/*
+	 * Fatal programming error on this DMA channel.  Flush any outstanding
+	 * work with error status and restart the engine.
+	 */
+	ioat_log_message(0, "Channel halted due to fatal programming error\n");
+	mtx_lock(&ioat->submit_lock);
+	mtx_lock(&ioat->cleanup_lock);
+	ioat->quiescing = TRUE;
+
+	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+	ioat_halted_debug(ioat, chanerr);
+	ioat->stats.last_halt_chanerr = chanerr;
+
+	while (ioat_get_active(ioat) > 0) {
+		desc = ioat_get_ring_entry(ioat, ioat->tail);
+		dmadesc = &desc->bus_dmadesc;
+		CTR1(KTR_IOAT, "completing err desc %d", ioat->tail);
+
+		if (dmadesc->callback_fn != NULL)
+			dmadesc->callback_fn(dmadesc->callback_arg,
+			    chanerr_to_errno(chanerr));
+
+		ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF);
+		ioat->tail++;
+		ioat->stats.descriptors_processed++;
+		ioat->stats.descriptors_error++;
+	}
+
+	/* Clear error status */
+	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
+
+	mtx_unlock(&ioat->cleanup_lock);
+	mtx_unlock(&ioat->submit_lock);
+
+	ioat_log_message(0, "Resetting channel to recover from error\n");
+	error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task);
+	KASSERT(error == 0,
+	    ("%s: taskqueue_enqueue failed: %d", __func__, error));
+}
+
+static void
+ioat_reset_hw_task(void *ctx, int pending __unused)
+{
+	struct ioat_softc *ioat;
+	int error;
+
+	ioat = ctx;
+	ioat_log_message(1, "%s: Resetting channel\n", __func__);
+
+	error = ioat_reset_hw(ioat);
+	KASSERT(error == 0, ("%s: reset failed: %d", __func__, error));
+	(void)error;
+}
+
+/*
+ * User API functions
+ */
+unsigned
+ioat_get_nchannels(void)
+{
+
+	return (ioat_channel_index);
+}
+
+bus_dmaengine_t
+ioat_get_dmaengine(uint32_t index, int flags)
+{
+	struct ioat_softc *ioat;
+
+	KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0,
+	    ("invalid flags: 0x%08x", flags));
+	KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK),
+	    ("invalid wait | nowait"));
+
+	if (index >= ioat_channel_index)
+		return (NULL);
+
+	ioat = ioat_channel[index];
+	if (ioat == NULL || ioat->destroying)
+		return (NULL);
+
+	if (ioat->quiescing) {
+		if ((flags & M_NOWAIT) != 0)
+			return (NULL);
+
+		mtx_lock(IOAT_REFLK);
+		while (ioat->quiescing && !ioat->destroying)
+			msleep(&ioat->quiescing, IOAT_REFLK, 0, "getdma", 0);
+		mtx_unlock(IOAT_REFLK);
+
+		if (ioat->destroying)
+			return (NULL);
+	}
+
+	/*
+	 * There's a race here between the quiescing check and HW reset or
+	 * module destroy.
+	 */
+	return (&ioat_get(ioat, IOAT_DMAENGINE_REF)->dmaengine);
+}
+
+void
+ioat_put_dmaengine(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	ioat_put(ioat, IOAT_DMAENGINE_REF);
+}
+
+int
+ioat_get_hwversion(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	return (ioat->version);
+}
+
+size_t
+ioat_get_max_io_size(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	return (ioat->max_xfer_size);
+}
+
+int
+ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	if (!ioat->intrdelay_supported)
+		return (ENODEV);
+	if (delay > ioat->intrdelay_max)
+		return (ERANGE);
+
+	ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay);
+	ioat->cached_intrdelay =
+	    ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK;
+	return (0);
+}
+
+uint16_t
+ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	return (ioat->intrdelay_max);
+}
+
+void
+ioat_acquire(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	mtx_lock(&ioat->submit_lock);
+	CTR0(KTR_IOAT, __func__);
+}
+
+int
+ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags)
+{
+	struct ioat_softc *ioat;
+	int error;
+
+	ioat = to_ioat_softc(dmaengine);
+	ioat_acquire(dmaengine);
+
+	error = ioat_reserve_space(ioat, n, mflags);
+	if (error != 0)
+		ioat_release(dmaengine);
+	return (error);
+}
+
+void
+ioat_release(bus_dmaengine_t dmaengine)
+{
+	struct ioat_softc *ioat;
+
+	ioat = to_ioat_softc(dmaengine);
+	CTR0(KTR_IOAT, __func__);
+	ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head);
+	mtx_unlock(&ioat->submit_lock);
+}
+
+static struct ioat_descriptor *
+ioat_op_generic(struct ioat_softc *ioat, uint8_t op,
+    uint32_t size, uint64_t src, uint64_t dst,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg,
+    uint32_t flags)
+{
+	struct ioat_generic_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	int mflags;
+
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+	KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0,
+	    ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS));
+	if ((flags & DMA_NO_WAIT) != 0)
+		mflags = M_NOWAIT;
+	else
+		mflags = M_WAITOK;
+
+	if (size > ioat->max_xfer_size) {
+		ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n",
+		    __func__, ioat->max_xfer_size, (unsigned)size);
+		return (NULL);
+	}
+
+	if (ioat_reserve_space(ioat, 1, mflags) != 0)
+		return (NULL);
+
+	desc = ioat_get_ring_entry(ioat, ioat->head);
+	hw_desc = desc->u.generic;
+
+	hw_desc->u.control_raw = 0;
+	hw_desc->u.control_generic.op = op;
+	hw_desc->u.control_generic.completion_update = 1;
+
+	if ((flags & DMA_INT_EN) != 0)
+		hw_desc->u.control_generic.int_enable = 1;
+	if ((flags & DMA_FENCE) != 0)
+		hw_desc->u.control_generic.fence = 1;
+
+	hw_desc->size = size;
+	hw_desc->src_addr = src;
+	hw_desc->dest_addr = dst;
+
+	desc->bus_dmadesc.callback_fn = callback_fn;
+	desc->bus_dmadesc.callback_arg = callback_arg;
+	return (desc);
+}
+
+struct bus_dmadesc *
+ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
+    void *callback_arg, uint32_t flags)
+{
+	struct ioat_dma_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn,
+	    callback_arg, flags);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.dma;
+	hw_desc->u.control.null = 1;
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
+    bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+    void *callback_arg, uint32_t flags)
+{
+	struct ioat_dma_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	if (((src | dst) & (0xffffull << 48)) != 0) {
+		ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
+		    __func__);
+		return (NULL);
+	}
+
+	desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn,
+	    callback_arg, flags);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.dma;
+	if (g_ioat_debug_level >= 3)
+		dump_descriptor(hw_desc);
+
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1,
+    bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+	struct ioat_dma_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) {
+		ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
+		    __func__);
+		return (NULL);
+	}
+	if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) {
+		ioat_log_message(0, "%s: Addresses must be page-aligned\n",
+		    __func__);
+		return (NULL);
+	}
+
+	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, src1, dst1,
+	    callback_fn, callback_arg, flags);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.dma;
+	if (src2 != src1 + PAGE_SIZE) {
+		hw_desc->u.control.src_page_break = 1;
+		hw_desc->next_src_addr = src2;
+	}
+	if (dst2 != dst1 + PAGE_SIZE) {
+		hw_desc->u.control.dest_page_break = 1;
+		hw_desc->next_dest_addr = dst2;
+	}
+
+	if (g_ioat_debug_level >= 3)
+		dump_descriptor(hw_desc);
+
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src,
+    bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+	struct ioat_crc32_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+	uint32_t teststore;
+	uint8_t op;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	if ((ioat->capabilities & IOAT_DMACAP_MOVECRC) == 0) {
+		ioat_log_message(0, "%s: Device lacks MOVECRC capability\n",
+		    __func__);
+		return (NULL);
+	}
+	if (((src | dst) & (0xffffffull << 40)) != 0) {
+		ioat_log_message(0, "%s: High 24 bits of src/dst invalid\n",
+		    __func__);
+		return (NULL);
+	}
+	teststore = (flags & _DMA_CRC_TESTSTORE);
+	if (teststore == _DMA_CRC_TESTSTORE) {
+		ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
+		return (NULL);
+	}
+	if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
+		ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
+		    __func__);
+		return (NULL);
+	}
+
+	switch (teststore) {
+	case DMA_CRC_STORE:
+		op = IOAT_OP_MOVECRC_STORE;
+		break;
+	case DMA_CRC_TEST:
+		op = IOAT_OP_MOVECRC_TEST;
+		break;
+	default:
+		KASSERT(teststore == 0, ("bogus"));
+		op = IOAT_OP_MOVECRC;
+		break;
+	}
+
+	if ((flags & DMA_CRC_INLINE) == 0 &&
+	    (crcptr & (0xffffffull << 40)) != 0) {
+		ioat_log_message(0,
+		    "%s: High 24 bits of crcptr invalid\n", __func__);
+		return (NULL);
+	}
+
+	desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn,
+	    callback_arg, flags & ~_DMA_CRC_FLAGS);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.crc32;
+
+	if ((flags & DMA_CRC_INLINE) == 0)
+		hw_desc->crc_address = crcptr;
+	else
+		hw_desc->u.control.crc_location = 1;
+
+	if (initialseed != NULL) {
+		hw_desc->u.control.use_seed = 1;
+		hw_desc->seed = *initialseed;
+	}
+
+	if (g_ioat_debug_level >= 3)
+		dump_descriptor(hw_desc);
+
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len,
+    uint32_t *initialseed, bus_addr_t crcptr,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+	struct ioat_crc32_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+	uint32_t teststore;
+	uint8_t op;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	if ((ioat->capabilities & IOAT_DMACAP_CRC) == 0) {
+		ioat_log_message(0, "%s: Device lacks CRC capability\n",
+		    __func__);
+		return (NULL);
+	}
+	if ((src & (0xffffffull << 40)) != 0) {
+		ioat_log_message(0, "%s: High 24 bits of src invalid\n",
+		    __func__);
+		return (NULL);
+	}
+	teststore = (flags & _DMA_CRC_TESTSTORE);
+	if (teststore == _DMA_CRC_TESTSTORE) {
+		ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
+		return (NULL);
+	}
+	if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
+		ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
+		    __func__);
+		return (NULL);
+	}
+
+	switch (teststore) {
+	case DMA_CRC_STORE:
+		op = IOAT_OP_CRC_STORE;
+		break;
+	case DMA_CRC_TEST:
+		op = IOAT_OP_CRC_TEST;
+		break;
+	default:
+		KASSERT(teststore == 0, ("bogus"));
+		op = IOAT_OP_CRC;
+		break;
+	}
+
+	if ((flags & DMA_CRC_INLINE) == 0 &&
+	    (crcptr & (0xffffffull << 40)) != 0) {
+		ioat_log_message(0,
+		    "%s: High 24 bits of crcptr invalid\n", __func__);
+		return (NULL);
+	}
+
+	desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn,
+	    callback_arg, flags & ~_DMA_CRC_FLAGS);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.crc32;
+
+	if ((flags & DMA_CRC_INLINE) == 0)
+		hw_desc->crc_address = crcptr;
+	else
+		hw_desc->u.control.crc_location = 1;
+
+	if (initialseed != NULL) {
+		hw_desc->u.control.use_seed = 1;
+		hw_desc->seed = *initialseed;
+	}
+
+	if (g_ioat_debug_level >= 3)
+		dump_descriptor(hw_desc);
+
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern,
+    bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg,
+    uint32_t flags)
+{
+	struct ioat_fill_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+
+	CTR0(KTR_IOAT, __func__);
+	ioat = to_ioat_softc(dmaengine);
+
+	if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) {
+		ioat_log_message(0, "%s: Device lacks BFILL capability\n",
+		    __func__);
+		return (NULL);
+	}
+
+	if ((dst & (0xffffull << 48)) != 0) {
+		ioat_log_message(0, "%s: High 16 bits of dst invalid\n",
+		    __func__);
+		return (NULL);
+	}
+
+	desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst,
+	    callback_fn, callback_arg, flags);
+	if (desc == NULL)
+		return (NULL);
+
+	hw_desc = desc->u.fill;
+	if (g_ioat_debug_level >= 3)
+		dump_descriptor(hw_desc);
+
+	ioat_submit_single(ioat);
+	return (&desc->bus_dmadesc);
+}
+
+/*
+ * Ring Management
+ */
+static inline uint32_t
+ioat_get_active(struct ioat_softc *ioat)
+{
+
+	return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
+}
+
+static inline uint32_t
+ioat_get_ring_space(struct ioat_softc *ioat)
+{
+
+	return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
+}
+
+static struct ioat_descriptor *
+ioat_alloc_ring_entry(struct ioat_softc *ioat, int mflags)
+{
+	struct ioat_generic_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	int error, busdmaflag;
+
+	error = ENOMEM;
+	hw_desc = NULL;
+
+	if ((mflags & M_WAITOK) != 0)
+		busdmaflag = BUS_DMA_WAITOK;
+	else
+		busdmaflag = BUS_DMA_NOWAIT;
+
+	desc = malloc(sizeof(*desc), M_IOAT, mflags);
+	if (desc == NULL)
+		goto out;
+
+	bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc,
+	    BUS_DMA_ZERO | busdmaflag, &ioat->hw_desc_map);
+	if (hw_desc == NULL)
+		goto out;
+
+	memset(&desc->bus_dmadesc, 0, sizeof(desc->bus_dmadesc));
+	desc->u.generic = hw_desc;
+
+	error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
+	    sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr,
+	    busdmaflag);
+	if (error)
+		goto out;
+
+out:
+	if (error) {
+		ioat_free_ring_entry(ioat, desc);
+		return (NULL);
+	}
+	return (desc);
+}
+
+static void
+ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc)
+{
+
+	if (desc == NULL)
+		return;
+
+	if (desc->u.generic)
+		bus_dmamem_free(ioat->hw_desc_tag, desc->u.generic,
+		    ioat->hw_desc_map);
+	free(desc, M_IOAT);
+}
+
+/*
+ * Reserves space in this IOAT descriptor ring by ensuring enough slots remain
+ * for 'num_descs'.
+ *
+ * If mflags contains M_WAITOK, blocks until enough space is available.
+ *
+ * Returns zero on success, or an errno on error.  If num_descs is beyond the
+ * maximum ring size, returns EINVAl; if allocation would block and mflags
+ * contains M_NOWAIT, returns EAGAIN.
+ *
+ * Must be called with the submit_lock held; returns with the lock held.  The
+ * lock may be dropped to allocate the ring.
+ *
+ * (The submit_lock is needed to add any entries to the ring, so callers are
+ * assured enough room is available.)
+ */
+static int
+ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags)
+{
+	struct ioat_descriptor **new_ring;
+	uint32_t order;
+	int error;
+
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+	error = 0;
+
+	if (num_descs < 1 || num_descs > (1 << IOAT_MAX_ORDER)) {
+		error = EINVAL;
+		goto out;
+	}
+	if (ioat->quiescing) {
+		error = ENXIO;
+		goto out;
+	}
+
+	for (;;) {
+		if (ioat_get_ring_space(ioat) >= num_descs)
+			goto out;
+
+		order = ioat->ring_size_order;
+		if (ioat->is_resize_pending || order == IOAT_MAX_ORDER) {
+			if ((mflags & M_WAITOK) != 0) {
+				msleep(&ioat->tail, &ioat->submit_lock, 0,
+				    "ioat_rsz", 0);
+				continue;
+			}
+
+			error = EAGAIN;
+			break;
+		}
+
+		ioat->is_resize_pending = TRUE;
+		for (;;) {
+			mtx_unlock(&ioat->submit_lock);
+
+			new_ring = ioat_prealloc_ring(ioat, 1 << (order + 1),
+			    TRUE, mflags);
+
+			mtx_lock(&ioat->submit_lock);
+			KASSERT(ioat->ring_size_order == order,
+			    ("is_resize_pending should protect order"));
+
+			if (new_ring == NULL) {
+				KASSERT((mflags & M_WAITOK) == 0,
+				    ("allocation failed"));
+				error = EAGAIN;
+				break;
+			}
+
+			error = ring_grow(ioat, order, new_ring);
+			if (error == 0)
+				break;
+		}
+		ioat->is_resize_pending = FALSE;
+		wakeup(&ioat->tail);
+		if (error)
+			break;
+	}
+
+out:
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+	return (error);
+}
+
+static struct ioat_descriptor **
+ioat_prealloc_ring(struct ioat_softc *ioat, uint32_t size, boolean_t need_dscr,
+    int mflags)
+{
+	struct ioat_descriptor **ring;
+	uint32_t i;
+	int error;
+
+	KASSERT(size > 0 && powerof2(size), ("bogus size"));
+
+	ring = malloc(size * sizeof(*ring), M_IOAT, M_ZERO | mflags);
+	if (ring == NULL)
+		return (NULL);
+
+	if (need_dscr) {
+		error = ENOMEM;
+		for (i = size / 2; i < size; i++) {
+			ring[i] = ioat_alloc_ring_entry(ioat, mflags);
+			if (ring[i] == NULL)
+				goto out;
+			ring[i]->id = i;
+		}
+	}
+	error = 0;
+
+out:
+	if (error != 0 && ring != NULL) {
+		ioat_free_ring(ioat, size, ring);
+		ring = NULL;
+	}
+	return (ring);
+}
+
+static void
+ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
+    struct ioat_descriptor **ring)
+{
+	uint32_t i;
+
+	for (i = 0; i < size; i++) {
+		if (ring[i] != NULL)
+			ioat_free_ring_entry(ioat, ring[i]);
+	}
+	free(ring, M_IOAT);
+}
+
+static struct ioat_descriptor *
+ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
+{
+
+	return (ioat->ring[index % (1 << ioat->ring_size_order)]);
+}
+
+static int
+ring_grow(struct ioat_softc *ioat, uint32_t oldorder,
+    struct ioat_descriptor **newring)
+{
+	struct ioat_descriptor *tmp, *next;
+	struct ioat_dma_hw_descriptor *hw;
+	uint32_t oldsize, newsize, head, tail, i, end;
+	int error;
+
+	CTR0(KTR_IOAT, __func__);
+
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+	if (oldorder != ioat->ring_size_order || oldorder >= IOAT_MAX_ORDER) {
+		error = EINVAL;
+		goto out;
+	}
+
+	oldsize = (1 << oldorder);
+	newsize = (1 << (oldorder + 1));
+
+	mtx_lock(&ioat->cleanup_lock);
+
+	head = ioat->head & (oldsize - 1);
+	tail = ioat->tail & (oldsize - 1);
+
+	/* Copy old descriptors to new ring */
+	for (i = 0; i < oldsize; i++)
+		newring[i] = ioat->ring[i];
+
+	/*
+	 * If head has wrapped but tail hasn't, we must swap some descriptors
+	 * around so that tail can increment directly to head.
+	 */
+	if (head < tail) {
+		for (i = 0; i <= head; i++) {
+			tmp = newring[oldsize + i];
+
+			newring[oldsize + i] = newring[i];
+			newring[oldsize + i]->id = oldsize + i;
+
+			newring[i] = tmp;
+			newring[i]->id = i;
+		}
+		head += oldsize;
+	}
+
+	KASSERT(head >= tail, ("invariants"));
+
+	/* Head didn't wrap; we only need to link in oldsize..newsize */
+	if (head < oldsize) {
+		i = oldsize - 1;
+		end = newsize;
+	} else {
+		/* Head did wrap; link newhead..newsize and 0..oldhead */
+		i = head;
+		end = newsize + (head - oldsize) + 1;
+	}
+
+	/*
+	 * Fix up hardware ring, being careful not to trample the active
+	 * section (tail -> head).
+	 */
+	for (; i < end; i++) {
+		KASSERT((i & (newsize - 1)) < tail ||
+		    (i & (newsize - 1)) >= head, ("trampling snake"));
+
+		next = newring[(i + 1) & (newsize - 1)];
+		hw = newring[i & (newsize - 1)]->u.dma;
+		hw->next = next->hw_desc_bus_addr;
+	}
+
+	free(ioat->ring, M_IOAT);
+	ioat->ring = newring;
+	ioat->ring_size_order = oldorder + 1;
+	ioat->tail = tail;
+	ioat->head = head;
+	error = 0;
+
+	mtx_unlock(&ioat->cleanup_lock);
+out:
+	if (error)
+		ioat_free_ring(ioat, (1 << (oldorder + 1)), newring);
+	return (error);
+}
+
+static int
+ring_shrink(struct ioat_softc *ioat, uint32_t oldorder,
+    struct ioat_descriptor **newring)
+{
+	struct ioat_dma_hw_descriptor *hw;
+	struct ioat_descriptor *ent, *next;
+	uint32_t oldsize, newsize, current_idx, new_idx, i;
+	int error;
+
+	CTR0(KTR_IOAT, __func__);
+
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+	if (oldorder != ioat->ring_size_order || oldorder <= IOAT_MIN_ORDER) {
+		error = EINVAL;
+		goto out_unlocked;
+	}
+
+	oldsize = (1 << oldorder);
+	newsize = (1 << (oldorder - 1));
+
+	mtx_lock(&ioat->cleanup_lock);
+
+	/* Can't shrink below current active set! */
+	if (ioat_get_active(ioat) >= newsize) {
+		error = ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Copy current descriptors to the new ring, dropping the removed
+	 * descriptors.
+	 */
+	for (i = 0; i < newsize; i++) {
+		current_idx = (ioat->tail + i) & (oldsize - 1);
+		new_idx = (ioat->tail + i) & (newsize - 1);
+
+		newring[new_idx] = ioat->ring[current_idx];
+		newring[new_idx]->id = new_idx;
+	}
+
+	/* Free deleted descriptors */
+	for (i = newsize; i < oldsize; i++) {
+		ent = ioat_get_ring_entry(ioat, ioat->tail + i);
+		ioat_free_ring_entry(ioat, ent);
+	}
+
+	/* Fix up hardware ring. */
+	hw = newring[(ioat->tail + newsize - 1) & (newsize - 1)]->u.dma;
+	next = newring[(ioat->tail + newsize) & (newsize - 1)];
+	hw->next = next->hw_desc_bus_addr;
+
+	free(ioat->ring, M_IOAT);
+	ioat->ring = newring;
+	ioat->ring_size_order = oldorder - 1;
+	error = 0;
+
+out:
+	mtx_unlock(&ioat->cleanup_lock);
+out_unlocked:
+	if (error)
+		ioat_free_ring(ioat, (1 << (oldorder - 1)), newring);
+	return (error);
+}
+
+static void
+ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr)
+{
+	struct ioat_descriptor *desc;
+
+	ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr,
+	    IOAT_CHANERR_STR);
+	if (chanerr == 0)
+		return;
+
+	mtx_assert(&ioat->cleanup_lock, MA_OWNED);
+
+	desc = ioat_get_ring_entry(ioat, ioat->tail + 0);
+	dump_descriptor(desc->u.raw);
+
+	desc = ioat_get_ring_entry(ioat, ioat->tail + 1);
+	dump_descriptor(desc->u.raw);
+}
+
+static void
+ioat_timer_callback(void *arg)
+{
+	struct ioat_descriptor **newring;
+	struct ioat_softc *ioat;
+	uint32_t order;
+
+	ioat = arg;
+	ioat_log_message(1, "%s\n", __func__);
+
+	if (ioat->is_completion_pending) {
+		ioat_process_events(ioat);
+		return;
+	}
+
+	/* Slowly scale the ring down if idle. */
+	mtx_lock(&ioat->submit_lock);
+	order = ioat->ring_size_order;
+	if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) {
+		mtx_unlock(&ioat->submit_lock);
+		goto out;
+	}
+	ioat->is_resize_pending = TRUE;
+	mtx_unlock(&ioat->submit_lock);
+
+	newring = ioat_prealloc_ring(ioat, 1 << (order - 1), FALSE,
+	    M_NOWAIT);
+
+	mtx_lock(&ioat->submit_lock);
+	KASSERT(ioat->ring_size_order == order,
+	    ("resize_pending protects order"));
+
+	if (newring != NULL)
+		ring_shrink(ioat, order, newring);
+
+	ioat->is_resize_pending = FALSE;
+	mtx_unlock(&ioat->submit_lock);
+
+out:
+	if (ioat->ring_size_order > IOAT_MIN_ORDER)
+		callout_reset(&ioat->timer, 10 * hz,
+		    ioat_timer_callback, ioat);
+}
+
+/*
+ * Support Functions
+ */
+static void
+ioat_submit_single(struct ioat_softc *ioat)
+{
+
+	ioat_get(ioat, IOAT_ACTIVE_DESCR_REF);
+	atomic_add_rel_int(&ioat->head, 1);
+	atomic_add_rel_int(&ioat->hw_head, 1);
+
+	if (!ioat->is_completion_pending) {
+		ioat->is_completion_pending = TRUE;
+		callout_reset(&ioat->timer, IOAT_INTR_TIMO,
+		    ioat_timer_callback, ioat);
+	}
+
+	ioat->stats.descriptors_submitted++;
+}
+
+static int
+ioat_reset_hw(struct ioat_softc *ioat)
+{
+	uint64_t status;
+	uint32_t chanerr;
+	unsigned timeout;
+	int error;
+
+	mtx_lock(IOAT_REFLK);
+	ioat->quiescing = TRUE;
+	ioat_drain_locked(ioat);
+	mtx_unlock(IOAT_REFLK);
+
+	status = ioat_get_chansts(ioat);
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		ioat_suspend(ioat);
+
+	/* Wait at most 20 ms */
+	for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
+	    timeout < 20; timeout++) {
+		DELAY(1000);
+		status = ioat_get_chansts(ioat);
+	}
+	if (timeout == 20) {
+		error = ETIMEDOUT;
+		goto out;
+	}
+
+	KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce"));
+
+	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
+
+	/*
+	 * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
+	 *  that can cause stability issues for IOAT v3.
+	 */
+	pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
+	    4);
+	chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
+	pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
+
+	/*
+	 * BDXDE and BWD models reset MSI-X registers on device reset.
+	 * Save/restore their contents manually.
+	 */
+	if (ioat_model_resets_msix(ioat)) {
+		ioat_log_message(1, "device resets MSI-X registers; saving\n");
+		pci_save_state(ioat->device);
+	}
+
+	ioat_reset(ioat);
+
+	/* Wait at most 20 ms */
+	for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
+		DELAY(1000);
+	if (timeout == 20) {
+		error = ETIMEDOUT;
+		goto out;
+	}
+
+	if (ioat_model_resets_msix(ioat)) {
+		ioat_log_message(1, "device resets registers; restored\n");
+		pci_restore_state(ioat->device);
+	}
+
+	/* Reset attempts to return the hardware to "halted." */
+	status = ioat_get_chansts(ioat);
+	if (is_ioat_active(status) || is_ioat_idle(status)) {
+		/* So this really shouldn't happen... */
+		ioat_log_message(0, "Device is active after a reset?\n");
+		ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+		error = 0;
+		goto out;
+	}
+
+	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+	if (chanerr != 0) {
+		mtx_lock(&ioat->cleanup_lock);
+		ioat_halted_debug(ioat, chanerr);
+		mtx_unlock(&ioat->cleanup_lock);
+		error = EIO;
+		goto out;
+	}
+
+	/*
+	 * Bring device back online after reset.  Writing CHAINADDR brings the
+	 * device back to active.
+	 *
+	 * The internal ring counter resets to zero, so we have to start over
+	 * at zero as well.
+	 */
+	ioat->tail = ioat->head = ioat->hw_head = 0;
+	ioat->last_seen = 0;
+
+	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
+	ioat_write_chainaddr(ioat, ioat->ring[0]->hw_desc_bus_addr);
+	error = 0;
+
+out:
+	mtx_lock(IOAT_REFLK);
+	ioat->quiescing = FALSE;
+	wakeup(&ioat->quiescing);
+	mtx_unlock(IOAT_REFLK);
+
+	if (error == 0)
+		error = ioat_start_channel(ioat);
+
+	return (error);
+}
+
+static int
+sysctl_handle_chansts(SYSCTL_HANDLER_ARGS)
+{
+	struct ioat_softc *ioat;
+	struct sbuf sb;
+	uint64_t status;
+	int error;
+
+	ioat = arg1;
+
+	status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
+
+	sbuf_new_for_sysctl(&sb, NULL, 256, req);
+	switch (status) {
+	case IOAT_CHANSTS_ACTIVE:
+		sbuf_printf(&sb, "ACTIVE");
+		break;
+	case IOAT_CHANSTS_IDLE:
+		sbuf_printf(&sb, "IDLE");
+		break;
+	case IOAT_CHANSTS_SUSPENDED:
+		sbuf_printf(&sb, "SUSPENDED");
+		break;
+	case IOAT_CHANSTS_HALTED:
+		sbuf_printf(&sb, "HALTED");
+		break;
+	case IOAT_CHANSTS_ARMED:
+		sbuf_printf(&sb, "ARMED");
+		break;
+	default:
+		sbuf_printf(&sb, "UNKNOWN");
+		break;
+	}
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	return (EINVAL);
+}
+
+static int
+sysctl_handle_dpi(SYSCTL_HANDLER_ARGS)
+{
+	struct ioat_softc *ioat;
+	struct sbuf sb;
+#define	PRECISION	"1"
+	const uintmax_t factor = 10;
+	uintmax_t rate;
+	int error;
+
+	ioat = arg1;
+	sbuf_new_for_sysctl(&sb, NULL, 16, req);
+
+	if (ioat->stats.interrupts == 0) {
+		sbuf_printf(&sb, "NaN");
+		goto out;
+	}
+	rate = ioat->stats.descriptors_processed * factor /
+	    ioat->stats.interrupts;
+	sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor,
+	    rate % factor);
+#undef	PRECISION
+out:
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	return (EINVAL);
+}
+
+static int
+sysctl_handle_error(SYSCTL_HANDLER_ARGS)
+{
+	struct ioat_descriptor *desc;
+	struct ioat_softc *ioat;
+	int error, arg;
+
+	ioat = arg1;
+
+	arg = 0;
+	error = SYSCTL_OUT(req, &arg, sizeof(arg));
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	error = SYSCTL_IN(req, &arg, sizeof(arg));
+	if (error != 0)
+		return (error);
+
+	if (arg != 0) {
+		ioat_acquire(&ioat->dmaengine);
+		desc = ioat_op_generic(ioat, IOAT_OP_COPY, 1,
+		    0xffff000000000000ull, 0xffff000000000000ull, NULL, NULL,
+		    0);
+		if (desc == NULL)
+			error = ENOMEM;
+		else
+			ioat_submit_single(ioat);
+		ioat_release(&ioat->dmaengine);
+	}
+	return (error);
+}
+
+static int
+sysctl_handle_reset(SYSCTL_HANDLER_ARGS)
+{
+	struct ioat_softc *ioat;
+	int error, arg;
+
+	ioat = arg1;
+
+	arg = 0;
+	error = SYSCTL_OUT(req, &arg, sizeof(arg));
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	error = SYSCTL_IN(req, &arg, sizeof(arg));
+	if (error != 0)
+		return (error);
+
+	if (arg != 0)
+		error = ioat_reset_hw(ioat);
+
+	return (error);
+}
+
+static void
+dump_descriptor(void *hw_desc)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++) {
+		for (j = 0; j < 8; j++)
+			printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
+		printf("\n");
+	}
+}
+
+static void
+ioat_setup_sysctl(device_t device)
+{
+	struct sysctl_oid_list *par, *statpar, *state, *hammer;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree, *tmp;
+	struct ioat_softc *ioat;
+
+	ioat = DEVICE2SOFTC(device);
+	ctx = device_get_sysctl_ctx(device);
+	tree = device_get_sysctl_tree(device);
+	par = SYSCTL_CHILDREN(tree);
+
+	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD,
+	    &ioat->version, 0, "HW version (0xMM form)");
+	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD,
+	    &ioat->max_xfer_size, 0, "HW maximum transfer size");
+	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD,
+	    &ioat->intrdelay_supported, 0, "Is INTRDELAY supported");
+#ifdef notyet
+	SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD,
+	    &ioat->intrdelay_max, 0,
+	    "Maximum configurable INTRDELAY on this channel (microseconds)");
+#endif
+
+	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL,
+	    "IOAT channel internal state");
+	state = SYSCTL_CHILDREN(tmp);
+
+	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD,
+	    &ioat->ring_size_order, 0, "SW descriptor ring size order");
+	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head,
+	    0, "SW descriptor head pointer index");
+	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail,
+	    0, "SW descriptor tail pointer index");
+	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD,
+	    &ioat->hw_head, 0, "HW DMACOUNT");
+
+	SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD,
+	    ioat->comp_update, "HW addr of last completion");
+
+	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_resize_pending", CTLFLAG_RD,
+	    &ioat->is_resize_pending, 0, "resize pending");
+	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending",
+	    CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending");
+	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD,
+	    &ioat->is_reset_pending, 0, "reset pending");
+	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD,
+	    &ioat->is_channel_running, 0, "channel running");
+
+	SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts",
+	    CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A",
+	    "String of the channel status");
+
+#ifdef notyet
+	SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD,
+	    &ioat->cached_intrdelay, 0,
+	    "Current INTRDELAY on this channel (cached, microseconds)");
+#endif
+
+	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL,
+	    "Big hammers (mostly for testing)");
+	hammer = SYSCTL_CHILDREN(tmp);
+
+	SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset",
+	    CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I",
+	    "Set to non-zero to reset the hardware");
+	SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_error",
+	    CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_error, "I",
+	    "Set to non-zero to inject a recoverable hardware error");
+
+	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL,
+	    "IOAT channel statistics");
+	statpar = SYSCTL_CHILDREN(tmp);
+
+	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW,
+	    &ioat->stats.interrupts,
+	    "Number of interrupts processed on this channel");
+	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW,
+	    &ioat->stats.descriptors_processed,
+	    "Number of descriptors processed on this channel");
+	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW,
+	    &ioat->stats.descriptors_submitted,
+	    "Number of descriptors submitted to this channel");
+	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW,
+	    &ioat->stats.descriptors_error,
+	    "Number of descriptors failed by channel errors");
+#ifdef notyet
+	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW,
+	    &ioat->stats.channel_halts, 0,
+	    "Number of times the channel has halted");
+	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW,
+	    &ioat->stats.last_halt_chanerr, 0,
+	    "The raw CHANERR when the channel was last halted");
+#endif
+
+	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt",
+	    CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A",
+	    "Descriptors per interrupt");
+}
+
+static inline struct ioat_softc *
+ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind)
+{
+	uint32_t old;
+
+	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
+
+	old = atomic_fetchadd_32(&ioat->refcnt, 1);
+	KASSERT(old < UINT32_MAX, ("refcnt overflow"));
+
+#ifdef INVARIANTS
+	old = atomic_fetchadd_32(&ioat->refkinds[kind], 1);
+	KASSERT(old < UINT32_MAX, ("refcnt kind overflow"));
+#endif
+
+	return (ioat);
+}
+
+static inline void
+ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
+{
+
+	_ioat_putn(ioat, n, kind, FALSE);
+}
+
+static inline void
+ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
+{
+
+	_ioat_putn(ioat, n, kind, TRUE);
+}
+
+static inline void
+_ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind,
+    boolean_t locked)
+{
+	uint32_t old;
+
+	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
+
+	if (n == 0)
+		return;
+
+#ifdef INVARIANTS
+	old = atomic_fetchadd_32(&ioat->refkinds[kind], -n);
+	KASSERT(old >= n, ("refcnt kind underflow"));
+#endif
+
+	/* Skip acquiring the lock if resulting refcnt > 0. */
+	for (;;) {
+		old = ioat->refcnt;
+		if (old <= n)
+			break;
+		if (atomic_cmpset_32(&ioat->refcnt, old, old - n))
+			return;
+	}
+
+	if (locked)
+		mtx_assert(IOAT_REFLK, MA_OWNED);
+	else
+		mtx_lock(IOAT_REFLK);
+
+	old = atomic_fetchadd_32(&ioat->refcnt, -n);
+	KASSERT(old >= n, ("refcnt error"));
+
+	if (old == n)
+		wakeup(IOAT_REFLK);
+	if (!locked)
+		mtx_unlock(IOAT_REFLK);
+}
+
+static inline void
+ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind)
+{
+
+	ioat_putn(ioat, 1, kind);
+}
+
+static void
+ioat_drain_locked(struct ioat_softc *ioat)
+{
+
+	mtx_assert(IOAT_REFLK, MA_OWNED);
+	while (ioat->refcnt > 0)
+		msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0);
+}


Property changes on: trunk/sys/dev/ioat/ioat.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat.h
===================================================================
--- trunk/sys/dev/ioat/ioat.h	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,221 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat.h 301812 2016-06-10 18:40:03Z ngie $");
+
+#ifndef __IOAT_H__
+#define __IOAT_H__
+
+#include <sys/param.h>
+#include <machine/bus.h>
+
+/*
+ * This file defines the public interface to the IOAT driver.
+ */
+
+/*
+ * Enables an interrupt for this operation. Typically, you would only enable
+ * this on the last operation in a group
+ */
+#define	DMA_INT_EN	0x1
+/*
+ * Like M_NOWAIT.  Operations will return NULL if they cannot allocate a
+ * descriptor without blocking.
+ */
+#define	DMA_NO_WAIT	0x2
+/*
+ * Disallow prefetching the source of the following operation.  Ordinarily, DMA
+ * operations can be pipelined on some hardware.  E.g., operation 2's source
+ * may be prefetched before operation 1 completes.
+ */
+#define	DMA_FENCE	0x4
+#define	_DMA_GENERIC_FLAGS	(DMA_INT_EN | DMA_NO_WAIT | DMA_FENCE)
+
+/*
+ * Emit a CRC32C as the result of a ioat_copy_crc() or ioat_crc().
+ */
+#define	DMA_CRC_STORE	0x8
+
+/*
+ * Compare the CRC32C of a ioat_copy_crc() or ioat_crc() against an expeceted
+ * value.  It is invalid to specify both TEST and STORE.
+ */
+#define	DMA_CRC_TEST	0x10
+#define	_DMA_CRC_TESTSTORE	(DMA_CRC_STORE | DMA_CRC_TEST)
+
+/*
+ * Use an inline comparison CRC32C or emit an inline CRC32C result.  Invalid
+ * without one of STORE or TEST.
+ */
+#define	DMA_CRC_INLINE	0x20
+#define	_DMA_CRC_FLAGS	(DMA_CRC_STORE | DMA_CRC_TEST | DMA_CRC_INLINE)
+
+/*
+ * Hardware revision number.  Different hardware revisions support different
+ * features.  For example, 3.2 cannot read from MMIO space, while 3.3 can.
+ */
+#define	IOAT_VER_3_0			0x30
+#define	IOAT_VER_3_2			0x32
+#define	IOAT_VER_3_3			0x33
+
+typedef void *bus_dmaengine_t;
+struct bus_dmadesc;
+typedef void (*bus_dmaengine_callback_t)(void *arg, int error);
+
+unsigned ioat_get_nchannels(void);
+
+/*
+ * Called first to acquire a reference to the DMA channel
+ *
+ * Flags may be M_WAITOK or M_NOWAIT.
+ */
+bus_dmaengine_t ioat_get_dmaengine(uint32_t channel_index, int flags);
+
+/* Release the DMA channel */
+void ioat_put_dmaengine(bus_dmaengine_t dmaengine);
+
+/* Check the DMA engine's HW version */
+int ioat_get_hwversion(bus_dmaengine_t dmaengine);
+size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine);
+
+/*
+ * Set interrupt coalescing on a DMA channel.
+ *
+ * The argument is in microseconds.  A zero value disables coalescing.  Any
+ * other value delays interrupt generation for N microseconds to provide
+ * opportunity to coalesce multiple operations into a single interrupt.
+ *
+ * Returns an error status, or zero on success.
+ *
+ * - ERANGE if the given value exceeds the delay supported by the hardware.
+ *   (All current hardware supports a maximum of 0x3fff microseconds delay.)
+ * - ENODEV if the hardware does not support interrupt coalescing.
+ */
+int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay);
+
+/*
+ * Return the maximum supported coalescing period, for use in
+ * ioat_set_interrupt_coalesce().  If the hardware does not support coalescing,
+ * returns zero.
+ */
+uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine);
+
+/*
+ * Acquire must be called before issuing an operation to perform. Release is
+ * called after.  Multiple operations can be issued within the context of one
+ * acquire and release
+ */
+void ioat_acquire(bus_dmaengine_t dmaengine);
+void ioat_release(bus_dmaengine_t dmaengine);
+
+/*
+ * Acquire_reserve can be called to ensure there is room for N descriptors.  If
+ * it succeeds, the next N valid operations will successfully enqueue.
+ *
+ * It may fail with:
+ *   - ENXIO if the channel is in an errored state, or the driver is being
+ *     unloaded
+ *   - EAGAIN if mflags included M_NOWAIT
+ *
+ * On failure, the caller does not hold the dmaengine.
+ */
+int ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags);
+
+/*
+ * Issue a blockfill operation.  The 64-bit pattern 'fillpattern' is written to
+ * 'len' physically contiguous bytes at 'dst'.
+ *
+ * Only supported on devices with the BFILL capability.
+ */
+struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst,
+    uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+    void *callback_arg, uint32_t flags);
+
+/* Issues the copy data operation */
+struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
+    bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+    void *callback_arg, uint32_t flags);
+
+/*
+ * Issue a copy data operation, with constraints:
+ *  - src1, src2, dst1, dst2 are all page-aligned addresses
+ *  - The quantity to copy is exactly 2 pages;
+ *  - src1 -> dst1, src2 -> dst2
+ *
+ * Why use this instead of normal _copy()?  You can copy two non-contiguous
+ * pages (src, dst, or both) with one descriptor.
+ */
+struct bus_dmadesc *ioat_copy_8k_aligned(bus_dmaengine_t dmaengine,
+    bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * Copy len bytes from dst to src, like ioat_copy().
+ *
+ * Additionally, accumulate a CRC32C of the data.
+ *
+ * If initialseed is not NULL, the value it points to is used to seed the
+ * initial value of the CRC32C.
+ *
+ * If flags include DMA_CRC_STORE and not DMA_CRC_INLINE, crcptr is written
+ * with the 32-bit CRC32C result (in wire format).
+ *
+ * If flags include DMA_CRC_TEST and not DMA_CRC_INLINE, the computed CRC32C is
+ * compared with the 32-bit CRC32C pointed to by crcptr.  If they do not match,
+ * a channel error is raised.
+ *
+ * If the DMA_CRC_INLINE flag is set, crcptr is ignored and the DMA engine uses
+ * the 4 bytes trailing the source data (TEST) or the destination data (STORE).
+ */
+struct bus_dmadesc *ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst,
+    bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * ioat_crc() is nearly identical to ioat_copy_crc(), but does not actually
+ * move data around.
+ *
+ * Like ioat_copy_crc, ioat_crc computes a CRC32C over len bytes pointed to by
+ * src.  The flags affect its operation in the same way, with one exception:
+ *
+ * If flags includes both DMA_CRC_STORE and DMA_CRC_INLINE, the computed CRC32C
+ * is written to the 4 bytes trailing the *source* data.
+ */
+struct bus_dmadesc *ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src,
+    bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * Issues a null operation. This issues the operation to the hardware, but the
+ * hardware doesn't do anything with it.
+ */
+struct bus_dmadesc *ioat_null(bus_dmaengine_t dmaengine,
+    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+
+#endif /* __IOAT_H__ */
+


Property changes on: trunk/sys/dev/ioat/ioat.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat_hw.h
===================================================================
--- trunk/sys/dev/ioat/ioat_hw.h	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat_hw.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat_hw.h 300661 2016-05-25 07:09:54Z mav $");
+
+#ifndef __IOAT_HW_H__
+#define __IOAT_HW_H__
+
+#define	IOAT_MAX_CHANNELS		32
+
+#define	IOAT_CHANCNT_OFFSET		0x00
+
+#define	IOAT_XFERCAP_OFFSET		0x01
+/* Only bits [4:0] are valid. */
+#define	IOAT_XFERCAP_VALID_MASK		0x1f
+
+#define	IOAT_GENCTRL_OFFSET		0x02
+
+#define	IOAT_INTRCTRL_OFFSET		0x03
+#define	IOAT_INTRCTRL_MASTER_INT_EN	0x01
+
+#define	IOAT_ATTNSTATUS_OFFSET		0x04
+
+#define	IOAT_CBVER_OFFSET		0x08
+
+#define	IOAT_INTRDELAY_OFFSET		0x0C
+#define	IOAT_INTRDELAY_SUPPORTED	(1 << 15)
+/* Reserved.				(1 << 14) */
+/* [13:0] is the coalesce period, in microseconds. */
+#define	IOAT_INTRDELAY_US_MASK		((1 << 14) - 1)
+
+#define	IOAT_CS_STATUS_OFFSET		0x0E
+
+#define	IOAT_DMACAPABILITY_OFFSET	0x10
+#define	IOAT_DMACAP_PB			(1 << 0)
+#define	IOAT_DMACAP_CRC			(1 << 1)
+#define	IOAT_DMACAP_MARKER_SKIP		(1 << 2)
+#define	IOAT_DMACAP_OLD_XOR		(1 << 3)
+#define	IOAT_DMACAP_DCA			(1 << 4)
+#define	IOAT_DMACAP_MOVECRC		(1 << 5)
+#define	IOAT_DMACAP_BFILL		(1 << 6)
+#define	IOAT_DMACAP_EXT_APIC		(1 << 7)
+#define	IOAT_DMACAP_XOR			(1 << 8)
+#define	IOAT_DMACAP_PQ			(1 << 9)
+#define	IOAT_DMACAP_DMA_DIF		(1 << 10)
+#define	IOAT_DMACAP_DWBES		(1 << 13)
+#define	IOAT_DMACAP_RAID16SS		(1 << 17)
+#define	IOAT_DMACAP_DMAMC		(1 << 18)
+#define	IOAT_DMACAP_CTOS		(1 << 19)
+
+#define	IOAT_DMACAP_STR \
+    "\20\24Completion_Timeout_Support\23DMA_with_Multicasting_Support" \
+    "\22RAID_Super_descriptors\16Descriptor_Write_Back_Error_Support" \
+    "\13DMA_with_DIF\12PQ\11XOR\10Extended_APIC_ID\07Block_Fill\06Move_CRC" \
+    "\05DCA\04Old_XOR\03Marker_Skipping\02CRC\01Page_Break"
+
+/* DMA Channel Registers */
+#define	IOAT_CHANCTRL_OFFSET			0x80
+#define	IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define	IOAT_CHANCTRL_COMPL_DCA_EN		0x0200
+#define	IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define	IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define	IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define	IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define	IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define	IOAT_CHANCTRL_INT_REARM			0x0001
+#define	IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
+						 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+						 IOAT_CHANCTRL_ERR_INT_EN)
+
+#define	IOAT_CHANCMD_OFFSET		0x84
+#define	IOAT_CHANCMD_RESET		0x20
+#define	IOAT_CHANCMD_SUSPEND		0x04
+
+#define	IOAT_DMACOUNT_OFFSET		0x86
+
+#define	IOAT_CHANSTS_OFFSET_LOW		0x88
+#define	IOAT_CHANSTS_OFFSET_HIGH	0x8C
+#define	IOAT_CHANSTS_OFFSET		0x88
+
+#define	IOAT_CHANSTS_STATUS		0x7ULL
+#define	IOAT_CHANSTS_ACTIVE		0x0
+#define	IOAT_CHANSTS_IDLE		0x1
+#define	IOAT_CHANSTS_SUSPENDED		0x2
+#define	IOAT_CHANSTS_HALTED		0x3
+#define	IOAT_CHANSTS_ARMED		0x4
+
+#define	IOAT_CHANSTS_UNAFFILIATED_ERROR	0x8ULL
+#define	IOAT_CHANSTS_SOFT_ERROR		0x10ULL
+
+#define	IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK	(~0x3FULL)
+
+#define	IOAT_CHAINADDR_OFFSET_LOW	0x90
+#define	IOAT_CHAINADDR_OFFSET_HIGH	0x94
+
+#define	IOAT_CHANCMP_OFFSET_LOW		0x98
+#define	IOAT_CHANCMP_OFFSET_HIGH	0x9C
+
+#define	IOAT_CHANERR_OFFSET		0xA8
+
+#define	IOAT_CHANERR_XSADDERR		(1 << 0)
+#define	IOAT_CHANERR_XDADDERR		(1 << 1)
+#define	IOAT_CHANERR_NDADDERR		(1 << 2)
+#define	IOAT_CHANERR_DERR		(1 << 3)
+#define	IOAT_CHANERR_CHADDERR		(1 << 4)
+#define	IOAT_CHANERR_CCMDERR		(1 << 5)
+#define	IOAT_CHANERR_CUNCORERR		(1 << 6)
+#define	IOAT_CHANERR_DUNCORERR		(1 << 7)
+#define	IOAT_CHANERR_RDERR		(1 << 8)
+#define	IOAT_CHANERR_WDERR		(1 << 9)
+#define	IOAT_CHANERR_DCERR		(1 << 10)
+#define	IOAT_CHANERR_DXSERR		(1 << 11)
+#define	IOAT_CHANERR_CMPADDERR		(1 << 12)
+#define	IOAT_CHANERR_INTCFGERR		(1 << 13)
+#define	IOAT_CHANERR_SEDERR		(1 << 14)
+#define	IOAT_CHANERR_UNAFFERR		(1 << 15)
+#define	IOAT_CHANERR_CXPERR		(1 << 16)
+/* Reserved.				(1 << 17) */
+#define	IOAT_CHANERR_DCNTERR		(1 << 18)
+#define	IOAT_CHANERR_DIFFERR		(1 << 19)
+#define	IOAT_CHANERR_GTVERR		(1 << 20)
+#define	IOAT_CHANERR_ATVERR		(1 << 21)
+#define	IOAT_CHANERR_RTVERR		(1 << 22)
+#define	IOAT_CHANERR_BBERR		(1 << 23)
+#define	IOAT_CHANERR_RDIFFERR		(1 << 24)
+#define	IOAT_CHANERR_RGTVERR		(1 << 25)
+#define	IOAT_CHANERR_RATVERR		(1 << 26)
+#define	IOAT_CHANERR_RRTVERR		(1 << 27)
+
+#define	IOAT_CHANERR_STR \
+    "\20\34RRTVERR\33RATVERR\32RGTVERR\31RDIFFERR\30BBERR\27RTVERR\26ATVERR" \
+    "\25GTVERR\24DIFFERR\23DCNTERR\21CXPERR\20UNAFFERR\17SEDERR\16INTCFGERR" \
+    "\15CMPADDERR\14DXSERR\13DCERR\12WDERR\11RDERR\10DUNCORERR\07CUNCORERR" \
+    "\06CCMDERR\05CHADDERR\04DERR\03NDADDERR\02XDADDERR\01XSADDERR"
+
+
+#define	IOAT_CFG_CHANERR_INT_OFFSET		0x180
+#define	IOAT_CFG_CHANERRMASK_INT_OFFSET		0x184
+
+#define	IOAT_MIN_ORDER			4
+#define	IOAT_MAX_ORDER			16
+
+#endif /* __IOAT_HW_H__ */


Property changes on: trunk/sys/dev/ioat/ioat_hw.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat_internal.h
===================================================================
--- trunk/sys/dev/ioat/ioat_internal.h	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat_internal.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,601 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat_internal.h 301812 2016-06-10 18:40:03Z ngie $");
+
+#ifndef __IOAT_INTERNAL_H__
+#define __IOAT_INTERNAL_H__
+
+#include <sys/_task.h>
+
+#define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
+#define	KTR_IOAT		KTR_SPARE3
+
+#define	ioat_read_chancnt(ioat) \
+	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
+
+#define	ioat_read_xfercap(ioat) \
+	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
+
+#define	ioat_write_intrctrl(ioat, value) \
+	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
+
+#define	ioat_read_cbver(ioat) \
+	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
+
+#define	ioat_read_dmacapability(ioat) \
+	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
+
+#define	ioat_write_chanctrl(ioat, value) \
+	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
+
+static __inline uint64_t
+ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
+    bus_space_handle_t handle, bus_size_t offset)
+{
+	return (bus_space_read_4(tag, handle, offset) |
+	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
+}
+
+static __inline void
+ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
+    bus_space_handle_t handle, bus_size_t offset, uint64_t val)
+{
+	bus_space_write_4(tag, handle, offset, val);
+	bus_space_write_4(tag, handle, offset + 4, val >> 32);
+}
+
+#ifdef __i386__
+#define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
+#define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
+#else
+#define ioat_bus_space_read_8(tag, handle, offset) \
+	bus_space_read_8((tag), (handle), (offset))
+#define ioat_bus_space_write_8(tag, handle, offset, val) \
+	bus_space_write_8((tag), (handle), (offset), (val))
+#endif
+
+#define ioat_read_1(ioat, offset) \
+	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset))
+
+#define ioat_read_2(ioat, offset) \
+	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset))
+
+#define ioat_read_4(ioat, offset) \
+	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset))
+
+#define ioat_read_8(ioat, offset) \
+	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset))
+
+#define ioat_read_double_4(ioat, offset) \
+	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
+	    (ioat)->pci_bus_handle, (offset))
+
+#define ioat_write_1(ioat, offset, value) \
+	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset), (value))
+
+#define ioat_write_2(ioat, offset, value) \
+	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset), (value))
+
+#define ioat_write_4(ioat, offset, value) \
+	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset), (value))
+
+#define ioat_write_8(ioat, offset, value) \
+	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+	    (offset), (value))
+
+#define ioat_write_double_4(ioat, offset, value) \
+	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
+	    (ioat)->pci_bus_handle, (offset), (value))
+
+MALLOC_DECLARE(M_IOAT);
+
+SYSCTL_DECL(_hw_ioat);
+
+extern int g_ioat_debug_level;
+
+struct generic_dma_control {
+	uint32_t int_enable:1;
+	uint32_t src_snoop_disable:1;
+	uint32_t dest_snoop_disable:1;
+	uint32_t completion_update:1;
+	uint32_t fence:1;
+	uint32_t reserved1:1;
+	uint32_t src_page_break:1;
+	uint32_t dest_page_break:1;
+	uint32_t bundle:1;
+	uint32_t dest_dca:1;
+	uint32_t hint:1;
+	uint32_t reserved2:13;
+	uint32_t op:8;
+};
+
+struct ioat_generic_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t reserved[4];
+};
+
+struct ioat_dma_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t src_snoop_disable:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t null:1;
+			uint32_t src_page_break:1;
+			uint32_t dest_page_break:1;
+			uint32_t bundle:1;
+			uint32_t dest_dca:1;
+			uint32_t hint:1;
+			uint32_t reserved:13;
+			#define IOAT_OP_COPY 0x00
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t next_src_addr;
+	uint64_t next_dest_addr;
+	uint64_t user1;
+	uint64_t user2;
+};
+
+struct ioat_fill_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t reserved:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t reserved2:2;
+			uint32_t dest_page_break:1;
+			uint32_t bundle:1;
+			uint32_t reserved3:15;
+			#define IOAT_OP_FILL 0x01
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_data;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t reserved;
+	uint64_t next_dest_addr;
+	uint64_t user1;
+	uint64_t user2;
+};
+
+struct ioat_crc32_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t src_snoop_disable:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t reserved1:3;
+			uint32_t bundle:1;
+			uint32_t dest_dca:1;
+			uint32_t hint:1;
+			uint32_t use_seed:1;
+			/*
+			 * crc_location:
+			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
+			 * 0: comparison value is pointed to by CRC Address
+			 *    field.
+			 * 1: comparison value follows data in wire format
+			 *    ("inverted reflected bit order") in the 4 bytes
+			 *    following the source data.
+			 *
+			 * For IOAT_OP_CRC_STORE:
+			 * 0: Result will be stored at location pointed to by
+			 *    CRC Address field (in wire format).
+			 * 1: Result will be stored directly following the
+			 *    source data.
+			 *
+			 * For IOAT_OP_MOVECRC_STORE:
+			 * 0: Result will be stored at location pointed to by
+			 *    CRC Address field (in wire format).
+			 * 1: Result will be stored directly following the
+			 *    *destination* data.
+			 */
+			uint32_t crc_location:1;
+			uint32_t reserved2:11;
+			/*
+			 * MOVECRC - Move data in the same way as standard copy
+			 * operation, but also compute CRC32.
+			 *
+			 * CRC - Only compute CRC on source data.
+			 *
+			 * There is a CRC accumulator register in the hardware.
+			 * If 'initial' is set, it is initialized to the value
+			 * in 'seed.'
+			 *
+			 * In all modes, these operators accumulate size bytes
+			 * at src_addr into the running CRC32C.
+			 *
+			 * Store mode emits the accumulated CRC, in wire
+			 * format, as specified by the crc_location bit above.
+			 *
+			 * Test mode compares the accumulated CRC against the
+			 * reference CRC, as described in crc_location above.
+			 * On failure, halts the DMA engine with a CRC error
+			 * status.
+			 */
+			#define	IOAT_OP_MOVECRC		0x41
+			#define	IOAT_OP_MOVECRC_TEST	0x42
+			#define	IOAT_OP_MOVECRC_STORE	0x43
+			#define	IOAT_OP_CRC		0x81
+			#define	IOAT_OP_CRC_TEST	0x82
+			#define	IOAT_OP_CRC_STORE	0x83
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t next_src_addr;
+	uint64_t next_dest_addr;
+	uint32_t seed;
+	uint32_t reserved;
+	uint64_t crc_address;
+};
+
+struct ioat_xor_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t src_snoop_disable:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t src_count:3;
+			uint32_t bundle:1;
+			uint32_t dest_dca:1;
+			uint32_t hint:1;
+			uint32_t reserved:13;
+			#define IOAT_OP_XOR 0x87
+			#define IOAT_OP_XOR_VAL 0x88
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t src_addr3;
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_hw_descriptor {
+	uint64_t src_addr6;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t next;
+	uint64_t reserved[4];
+};
+
+struct ioat_pq_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t src_snoop_disable:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t src_count:3;
+			uint32_t bundle:1;
+			uint32_t dest_dca:1;
+			uint32_t hint:1;
+			uint32_t p_disable:1;
+			uint32_t q_disable:1;
+			uint32_t reserved:11;
+			#define IOAT_OP_PQ 0x89
+			#define IOAT_OP_PQ_VAL 0x8a
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t p_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t src_addr3;
+	uint8_t  coef[8];
+	uint64_t q_addr;
+};
+
+struct ioat_pq_ext_hw_descriptor {
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+	uint64_t src_addr6;
+	uint64_t next;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t reserved[2];
+};
+
+struct ioat_pq_update_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct generic_dma_control control_generic;
+		struct {
+			uint32_t int_enable:1;
+			uint32_t src_snoop_disable:1;
+			uint32_t dest_snoop_disable:1;
+			uint32_t completion_update:1;
+			uint32_t fence:1;
+			uint32_t src_cnt:3;
+			uint32_t bundle:1;
+			uint32_t dest_dca:1;
+			uint32_t hint:1;
+			uint32_t p_disable:1;
+			uint32_t q_disable:1;
+			uint32_t reserved:3;
+			uint32_t coef:8;
+			#define IOAT_OP_PQ_UP 0x8b
+			uint32_t op:8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t p_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t p_src;
+	uint64_t q_src;
+	uint64_t q_addr;
+};
+
+struct ioat_raw_hw_descriptor {
+	uint64_t field[8];
+};
+
+struct bus_dmadesc {
+	bus_dmaengine_callback_t callback_fn;
+	void			 *callback_arg;
+};
+
+struct ioat_descriptor {
+	struct bus_dmadesc	bus_dmadesc;
+	union {
+		struct ioat_generic_hw_descriptor	*generic;
+		struct ioat_dma_hw_descriptor		*dma;
+		struct ioat_fill_hw_descriptor		*fill;
+		struct ioat_crc32_hw_descriptor		*crc32;
+		struct ioat_xor_hw_descriptor		*xor;
+		struct ioat_xor_ext_hw_descriptor	*xor_ext;
+		struct ioat_pq_hw_descriptor		*pq;
+		struct ioat_pq_ext_hw_descriptor	*pq_ext;
+		struct ioat_raw_hw_descriptor		*raw;
+	} u;
+	uint32_t		id;
+	bus_addr_t		hw_desc_bus_addr;
+};
+
+/* Unused by this driver at this time. */
+#define	IOAT_OP_MARKER		0x84
+
+/*
+ * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
+ * doesn't support anything older than v3.
+ */
+#define	IOAT_OP_OLD_XOR		0x85
+#define	IOAT_OP_OLD_XOR_VAL	0x86
+
+enum ioat_ref_kind {
+	IOAT_DMAENGINE_REF = 0,
+	IOAT_ACTIVE_DESCR_REF,
+	IOAT_NUM_REF_KINDS
+};
+
+/* One of these per allocated PCI device. */
+struct ioat_softc {
+	bus_dmaengine_t		dmaengine;
+#define	to_ioat_softc(_dmaeng)						\
+({									\
+	bus_dmaengine_t *_p = (_dmaeng);				\
+	(struct ioat_softc *)((char *)_p -				\
+	    offsetof(struct ioat_softc, dmaengine));			\
+})
+
+	int			version;
+	unsigned		chan_idx;
+
+	struct mtx		submit_lock;
+	device_t		device;
+	bus_space_tag_t		pci_bus_tag;
+	bus_space_handle_t	pci_bus_handle;
+	int			pci_resource_id;
+	struct resource		*pci_resource;
+	uint32_t		max_xfer_size;
+	uint32_t		capabilities;
+	uint16_t		intrdelay_max;
+	uint16_t		cached_intrdelay;
+
+	struct resource		*res;
+	int			rid;
+	void			*tag;
+
+	bus_dma_tag_t		hw_desc_tag;
+	bus_dmamap_t		hw_desc_map;
+
+	bus_dma_tag_t		comp_update_tag;
+	bus_dmamap_t		comp_update_map;
+	uint64_t		*comp_update;
+	bus_addr_t		comp_update_bus_addr;
+
+	struct callout		timer;
+	struct task		reset_task;
+
+	boolean_t		quiescing;
+	boolean_t		destroying;
+	boolean_t		is_resize_pending;
+	boolean_t		is_completion_pending;
+	boolean_t		is_reset_pending;
+	boolean_t		is_channel_running;
+	boolean_t		intrdelay_supported;
+
+	uint32_t		head;
+	uint32_t		tail;
+	uint32_t		hw_head;
+	uint32_t		ring_size_order;
+	bus_addr_t		last_seen;
+
+	struct ioat_descriptor	**ring;
+
+	struct mtx		cleanup_lock;
+	volatile uint32_t	refcnt;
+#ifdef INVARIANTS
+	volatile uint32_t	refkinds[IOAT_NUM_REF_KINDS];
+#endif
+
+	struct {
+		uint64_t	interrupts;
+		uint64_t	descriptors_processed;
+		uint64_t	descriptors_error;
+		uint64_t	descriptors_submitted;
+
+		uint32_t	channel_halts;
+		uint32_t	last_halt_chanerr;
+	} stats;
+};
+
+void ioat_test_attach(void);
+void ioat_test_detach(void);
+
+static inline uint64_t
+ioat_get_chansts(struct ioat_softc *ioat)
+{
+	uint64_t status;
+
+	if (ioat->version >= IOAT_VER_3_3)
+		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
+	else
+		/* Must read lower 4 bytes before upper 4 bytes. */
+		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
+	return (status);
+}
+
+static inline void
+ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
+{
+
+	if (ioat->version >= IOAT_VER_3_3)
+		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
+	else
+		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
+}
+
+static inline void
+ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
+{
+
+	if (ioat->version >= IOAT_VER_3_3)
+		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
+	else
+		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
+}
+
+static inline boolean_t
+is_ioat_active(uint64_t status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline boolean_t
+is_ioat_idle(uint64_t status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
+}
+
+static inline boolean_t
+is_ioat_halted(uint64_t status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline boolean_t
+is_ioat_suspended(uint64_t status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+static inline void
+ioat_suspend(struct ioat_softc *ioat)
+{
+	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
+}
+
+static inline void
+ioat_reset(struct ioat_softc *ioat)
+{
+	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
+}
+
+static inline boolean_t
+ioat_reset_pending(struct ioat_softc *ioat)
+{
+	uint8_t cmd;
+
+	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
+	return ((cmd & IOAT_CHANCMD_RESET) != 0);
+}
+
+#endif /* __IOAT_INTERNAL_H__ */


Property changes on: trunk/sys/dev/ioat/ioat_internal.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat_test.c
===================================================================
--- trunk/sys/dev/ioat/ioat_test.c	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat_test.c	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,603 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat_test.c 315071 2017-03-11 15:26:41Z avg $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/stdarg.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "ioat.h"
+#include "ioat_hw.h"
+#include "ioat_internal.h"
+#include "ioat_test.h"
+
+#ifndef time_after
+#define	time_after(a,b)		((long)(b) - (long)(a) < 0)
+#endif
+
+MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations");
+
+#define	IOAT_MAX_BUFS	256
+
+struct test_transaction {
+	void			*buf[IOAT_MAX_BUFS];
+	uint32_t		length;
+	uint32_t		depth;
+	struct ioat_test	*test;
+	TAILQ_ENTRY(test_transaction)	entry;
+};
+
+#define	IT_LOCK()	mtx_lock(&ioat_test_lk)
+#define	IT_UNLOCK()	mtx_unlock(&ioat_test_lk)
+#define	IT_ASSERT()	mtx_assert(&ioat_test_lk, MA_OWNED)
+static struct mtx ioat_test_lk;
+MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF);
+
+static int g_thread_index = 1;
+static struct cdev *g_ioat_cdev = NULL;
+
+#define	ioat_test_log(v, ...)	_ioat_test_log((v), "ioat_test: " __VA_ARGS__)
+static void _ioat_test_log(int verbosity, const char *fmt, ...);
+
+static void
+ioat_test_transaction_destroy(struct test_transaction *tx)
+{
+	struct ioat_test *test;
+	int i;
+
+	test = tx->test;
+
+	for (i = 0; i < IOAT_MAX_BUFS; i++) {
+		if (tx->buf[i] != NULL) {
+			if (test->testkind == IOAT_TEST_DMA_8K)
+				free(tx->buf[i], M_IOAT_TEST);
+			else
+				contigfree(tx->buf[i], tx->length, M_IOAT_TEST);
+			tx->buf[i] = NULL;
+		}
+	}
+
+	free(tx, M_IOAT_TEST);
+}
+
+static struct
+test_transaction *ioat_test_transaction_create(struct ioat_test *test,
+    unsigned num_buffers)
+{
+	struct test_transaction *tx;
+	unsigned i;
+
+	tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO);
+	if (tx == NULL)
+		return (NULL);
+
+	tx->length = test->buffer_size;
+
+	for (i = 0; i < num_buffers; i++) {
+		if (test->testkind == IOAT_TEST_DMA_8K)
+			tx->buf[i] = malloc(test->buffer_size, M_IOAT_TEST,
+			    M_NOWAIT);
+		else
+			tx->buf[i] = contigmalloc(test->buffer_size,
+			    M_IOAT_TEST, M_NOWAIT, 0, BUS_SPACE_MAXADDR,
+			    PAGE_SIZE, 0);
+
+		if (tx->buf[i] == NULL) {
+			ioat_test_transaction_destroy(tx);
+			return (NULL);
+		}
+	}
+	return (tx);
+}
+
+static void
+dump_hex(void *p, size_t chunks)
+{
+	size_t i, j;
+
+	for (i = 0; i < chunks; i++) {
+		for (j = 0; j < 8; j++)
+			printf("%08x ", ((uint32_t *)p)[i * 8 + j]);
+		printf("\n");
+	}
+}
+
+static bool
+ioat_compare_ok(struct test_transaction *tx)
+{
+	struct ioat_test *test;
+	char *dst, *src;
+	uint32_t i, j;
+
+	test = tx->test;
+
+	for (i = 0; i < tx->depth; i++) {
+		dst = tx->buf[2 * i + 1];
+		src = tx->buf[2 * i];
+
+		if (test->testkind == IOAT_TEST_FILL) {
+			for (j = 0; j < tx->length; j += sizeof(uint64_t)) {
+				if (memcmp(src, &dst[j],
+					MIN(sizeof(uint64_t), tx->length - j))
+				    != 0)
+					return (false);
+			}
+		} else if (test->testkind == IOAT_TEST_DMA) {
+			if (memcmp(src, dst, tx->length) != 0)
+				return (false);
+		} else if (test->testkind == IOAT_TEST_RAW_DMA) {
+			if (test->raw_write)
+				dst = test->raw_vtarget;
+			dump_hex(dst, tx->length / 32);
+		}
+	}
+	return (true);
+}
+
+static void
+ioat_dma_test_callback(void *arg, int error)
+{
+	struct test_transaction *tx;
+	struct ioat_test *test;
+
+	if (error != 0)
+		ioat_test_log(0, "%s: Got error: %d\n", __func__, error);
+
+	tx = arg;
+	test = tx->test;
+
+	if (test->verify && !ioat_compare_ok(tx)) {
+		ioat_test_log(0, "miscompare found\n");
+		atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth);
+	} else if (!test->too_late)
+		atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth);
+
+	IT_LOCK();
+	TAILQ_REMOVE(&test->pend_q, tx, entry);
+	TAILQ_INSERT_TAIL(&test->free_q, tx, entry);
+	wakeup(&test->free_q);
+	IT_UNLOCK();
+}
+
+static int
+ioat_test_prealloc_memory(struct ioat_test *test, int index)
+{
+	uint32_t i, j, k;
+	struct test_transaction *tx;
+
+	for (i = 0; i < test->transactions; i++) {
+		tx = ioat_test_transaction_create(test, test->chain_depth * 2);
+		if (tx == NULL) {
+			ioat_test_log(0, "tx == NULL - memory exhausted\n");
+			test->status[IOAT_TEST_NO_MEMORY]++;
+			return (ENOMEM);
+		}
+
+		TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+
+		tx->test = test;
+		tx->depth = test->chain_depth;
+
+		/* fill in source buffers */
+		for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) {
+			uint32_t val = j + (index << 28);
+
+			for (k = 0; k < test->chain_depth; k++) {
+				((uint32_t *)tx->buf[2*k])[j] = ~val;
+				((uint32_t *)tx->buf[2*k+1])[j] = val;
+			}
+		}
+	}
+	return (0);
+}
+
+static void
+ioat_test_release_memory(struct ioat_test *test)
+{
+	struct test_transaction *tx, *s;
+
+	TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s)
+		ioat_test_transaction_destroy(tx);
+	TAILQ_INIT(&test->free_q);
+
+	TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s)
+		ioat_test_transaction_destroy(tx);
+	TAILQ_INIT(&test->pend_q);
+}
+
+static void
+ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
+{
+	struct test_transaction *tx;
+	struct bus_dmadesc *desc;
+	bus_dmaengine_callback_t cb;
+	bus_addr_t src, dest;
+	uint64_t fillpattern;
+	uint32_t i, flags;
+
+	desc = NULL;
+
+	IT_LOCK();
+	while (TAILQ_EMPTY(&test->free_q))
+		msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
+
+	tx = TAILQ_FIRST(&test->free_q);
+	TAILQ_REMOVE(&test->free_q, tx, entry);
+	TAILQ_INSERT_HEAD(&test->pend_q, tx, entry);
+	IT_UNLOCK();
+
+	if (test->testkind != IOAT_TEST_MEMCPY)
+		ioat_acquire(dma);
+	for (i = 0; i < tx->depth; i++) {
+		if (test->testkind == IOAT_TEST_MEMCPY) {
+			memcpy(tx->buf[2 * i + 1], tx->buf[2 * i], tx->length);
+			if (i == tx->depth - 1)
+				ioat_dma_test_callback(tx, 0);
+			continue;
+		}
+
+		src = vtophys((vm_offset_t)tx->buf[2*i]);
+		dest = vtophys((vm_offset_t)tx->buf[2*i+1]);
+
+		if (test->testkind == IOAT_TEST_RAW_DMA) {
+			if (test->raw_write)
+				dest = test->raw_target;
+			else
+				src = test->raw_target;
+		}
+
+		if (i == tx->depth - 1) {
+			cb = ioat_dma_test_callback;
+			flags = DMA_INT_EN;
+		} else {
+			cb = NULL;
+			flags = 0;
+		}
+
+		if (test->testkind == IOAT_TEST_DMA ||
+		    test->testkind == IOAT_TEST_RAW_DMA)
+			desc = ioat_copy(dma, dest, src, tx->length, cb, tx,
+			    flags);
+		else if (test->testkind == IOAT_TEST_FILL) {
+			fillpattern = *(uint64_t *)tx->buf[2*i];
+			desc = ioat_blockfill(dma, dest, fillpattern,
+			    tx->length, cb, tx, flags);
+		} else if (test->testkind == IOAT_TEST_DMA_8K) {
+			bus_addr_t src2, dst2;
+
+			src2 = vtophys((vm_offset_t)tx->buf[2*i] + PAGE_SIZE);
+			dst2 = vtophys((vm_offset_t)tx->buf[2*i+1] + PAGE_SIZE);
+
+			desc = ioat_copy_8k_aligned(dma, dest, dst2, src, src2,
+			    cb, tx, flags);
+		}
+		if (desc == NULL)
+			break;
+	}
+	if (test->testkind == IOAT_TEST_MEMCPY)
+		return;
+	ioat_release(dma);
+
+	/*
+	 * We couldn't issue an IO -- either the device is being detached or
+	 * the HW reset.  Essentially spin until the device comes back up or
+	 * our timer expires.
+	 */
+	if (desc == NULL && tx->depth > 0) {
+		atomic_add_32(&test->status[IOAT_TEST_NO_DMA_ENGINE], tx->depth);
+		IT_LOCK();
+		TAILQ_REMOVE(&test->pend_q, tx, entry);
+		TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+		IT_UNLOCK();
+	}
+}
+
+static void
+ioat_dma_test(void *arg)
+{
+	struct ioat_softc *ioat;
+	struct ioat_test *test;
+	bus_dmaengine_t dmaengine;
+	uint32_t loops;
+	int index, rc, start, end, error;
+
+	test = arg;
+	memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status));
+
+	if (test->testkind == IOAT_TEST_DMA_8K &&
+	    test->buffer_size != 2 * PAGE_SIZE) {
+		ioat_test_log(0, "Asked for 8k test and buffer size isn't 8k\n");
+		test->status[IOAT_TEST_INVALID_INPUT]++;
+		return;
+	}
+
+	if (test->buffer_size > 1024 * 1024) {
+		ioat_test_log(0, "Buffer size too large >1MB\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
+
+	if (test->chain_depth * 2 > IOAT_MAX_BUFS) {
+		ioat_test_log(0, "Depth too large (> %u)\n",
+		    (unsigned)IOAT_MAX_BUFS / 2);
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
+
+	if (btoc((uint64_t)test->buffer_size * test->chain_depth *
+	    test->transactions) > (physmem / 4)) {
+		ioat_test_log(0, "Sanity check failed -- test would "
+		    "use more than 1/4 of phys mem.\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
+
+	if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) {
+		ioat_test_log(0, "Sanity check failed -- test would "
+		    "use more than available IOAT ring space.\n");
+		test->status[IOAT_TEST_NO_MEMORY]++;
+		return;
+	}
+
+	if (test->testkind >= IOAT_NUM_TESTKINDS) {
+		ioat_test_log(0, "Invalid kind %u\n",
+		    (unsigned)test->testkind);
+		test->status[IOAT_TEST_INVALID_INPUT]++;
+		return;
+	}
+
+	dmaengine = ioat_get_dmaengine(test->channel_index, M_NOWAIT);
+	if (dmaengine == NULL) {
+		ioat_test_log(0, "Couldn't acquire dmaengine\n");
+		test->status[IOAT_TEST_NO_DMA_ENGINE]++;
+		return;
+	}
+	ioat = to_ioat_softc(dmaengine);
+
+	if (test->testkind == IOAT_TEST_FILL &&
+	    (ioat->capabilities & IOAT_DMACAP_BFILL) == 0)
+	{
+		ioat_test_log(0,
+		    "Hardware doesn't support block fill, aborting test\n");
+		test->status[IOAT_TEST_INVALID_INPUT]++;
+		goto out;
+	}
+
+	if (test->coalesce_period > ioat->intrdelay_max) {
+		ioat_test_log(0,
+		    "Hardware doesn't support intrdelay of %u us.\n",
+		    (unsigned)test->coalesce_period);
+		test->status[IOAT_TEST_INVALID_INPUT]++;
+		goto out;
+	}
+	error = ioat_set_interrupt_coalesce(dmaengine, test->coalesce_period);
+	if (error == ENODEV && test->coalesce_period == 0)
+		error = 0;
+	if (error != 0) {
+		ioat_test_log(0, "ioat_set_interrupt_coalesce: %d\n", error);
+		test->status[IOAT_TEST_INVALID_INPUT]++;
+		goto out;
+	}
+
+	if (test->zero_stats)
+		memset(&ioat->stats, 0, sizeof(ioat->stats));
+
+	if (test->testkind == IOAT_TEST_RAW_DMA) {
+		if (test->raw_is_virtual) {
+			test->raw_vtarget = (void *)test->raw_target;
+			test->raw_target = vtophys(test->raw_vtarget);
+		} else {
+			test->raw_vtarget = pmap_mapdev(test->raw_target,
+			    test->buffer_size);
+		}
+	}
+
+	index = g_thread_index++;
+	TAILQ_INIT(&test->free_q);
+	TAILQ_INIT(&test->pend_q);
+
+	if (test->duration == 0)
+		ioat_test_log(1, "Thread %d: num_loops remaining: 0x%08x\n",
+		    index, test->transactions);
+	else
+		ioat_test_log(1, "Thread %d: starting\n", index);
+
+	rc = ioat_test_prealloc_memory(test, index);
+	if (rc != 0) {
+		ioat_test_log(0, "prealloc_memory: %d\n", rc);
+		goto out;
+	}
+	wmb();
+
+	test->too_late = false;
+	start = ticks;
+	end = start + (((sbintime_t)test->duration * hz) / 1000);
+
+	for (loops = 0;; loops++) {
+		if (test->duration == 0 && loops >= test->transactions)
+			break;
+		else if (test->duration != 0 && time_after(ticks, end)) {
+			test->too_late = true;
+			break;
+		}
+
+		ioat_test_submit_1_tx(test, dmaengine);
+	}
+
+	ioat_test_log(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n",
+	    ticks - start, ticks - end, (ticks - start) / hz);
+
+	IT_LOCK();
+	while (!TAILQ_EMPTY(&test->pend_q))
+		msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz);
+	IT_UNLOCK();
+
+	ioat_test_log(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n",
+	    ticks - start, ticks - end, (ticks - start) / hz);
+
+	ioat_test_release_memory(test);
+out:
+	if (test->testkind == IOAT_TEST_RAW_DMA && !test->raw_is_virtual)
+		pmap_unmapdev((vm_offset_t)test->raw_vtarget,
+		    test->buffer_size);
+	ioat_put_dmaengine(dmaengine);
+}
+
+static int
+ioat_test_open(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+
+	return (0);
+}
+
+static int
+ioat_test_close(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+
+	return (0);
+}
+
+static int
+ioat_test_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int flag,
+    struct thread *td)
+{
+
+	switch (cmd) {
+	case IOAT_DMATEST:
+		ioat_dma_test(arg);
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static struct cdevsw ioat_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_flags =	0,
+	.d_open =	ioat_test_open,
+	.d_close =	ioat_test_close,
+	.d_ioctl =	ioat_test_ioctl,
+	.d_name =	"ioat_test",
+};
+
+static int
+enable_ioat_test(bool enable)
+{
+
+	mtx_assert(&Giant, MA_OWNED);
+
+	if (enable && g_ioat_cdev == NULL) {
+		g_ioat_cdev = make_dev(&ioat_cdevsw, 0, UID_ROOT, GID_WHEEL,
+		    0600, "ioat_test");
+	} else if (!enable && g_ioat_cdev != NULL) {
+		destroy_dev(g_ioat_cdev);
+		g_ioat_cdev = NULL;
+	}
+	return (0);
+}
+
+static int
+sysctl_enable_ioat_test(SYSCTL_HANDLER_ARGS)
+{
+	int error, enabled;
+
+	enabled = (g_ioat_cdev != NULL);
+	error = sysctl_handle_int(oidp, &enabled, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	enable_ioat_test(enabled);
+	return (0);
+}
+SYSCTL_PROC(_hw_ioat, OID_AUTO, enable_ioat_test, CTLTYPE_INT | CTLFLAG_RW,
+    0, 0, sysctl_enable_ioat_test, "I",
+    "Non-zero: Enable the /dev/ioat_test device");
+
+void
+ioat_test_attach(void)
+{
+#ifdef notyet
+	char *val;
+
+	val = kern_getenv("hw.ioat.enable_ioat_test");
+	if (val != NULL && strcmp(val, "0") != 0) {
+#else
+	int val = 0;
+
+	TUNABLE_INT_FETCH("hw.ioat.enable_ioat_test", &val);
+	if (val != 0) {
+#endif
+		mtx_lock(&Giant);
+		enable_ioat_test(true);
+		mtx_unlock(&Giant);
+	}
+#ifdef notyet
+	freeenv(val);
+#endif
+}
+
+void
+ioat_test_detach(void)
+{
+
+	mtx_lock(&Giant);
+	enable_ioat_test(false);
+	mtx_unlock(&Giant);
+}
+
+static void
+_ioat_test_log(int verbosity, const char *fmt, ...)
+{
+	va_list argp;
+
+	if (verbosity > g_ioat_debug_level)
+		return;
+
+	va_start(argp, fmt);
+	vprintf(fmt, argp);
+	va_end(argp);
+}


Property changes on: trunk/sys/dev/ioat/ioat_test.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ioat/ioat_test.h
===================================================================
--- trunk/sys/dev/ioat/ioat_test.h	                        (rev 0)
+++ trunk/sys/dev/ioat/ioat_test.h	2018-05-27 23:46:32 UTC (rev 10104)
@@ -0,0 +1,91 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD: stable/10/sys/dev/ioat/ioat_test.h 300661 2016-05-25 07:09:54Z mav $");
+
+#ifndef __IOAT_TEST_H__
+#define __IOAT_TEST_H__
+
+enum ioat_res {
+	IOAT_TEST_OK = 0,
+	IOAT_TEST_NO_DMA_ENGINE,
+	IOAT_TEST_NO_MEMORY,
+	IOAT_TEST_MISCOMPARE,
+	IOAT_TEST_INVALID_INPUT,
+	IOAT_NUM_RES
+};
+
+enum ioat_test_kind {
+	IOAT_TEST_FILL = 0,
+	IOAT_TEST_DMA,
+	IOAT_TEST_RAW_DMA,
+	IOAT_TEST_DMA_8K,
+	IOAT_TEST_MEMCPY,
+	IOAT_NUM_TESTKINDS
+};
+
+struct test_transaction;
+
+struct ioat_test {
+	volatile uint32_t status[IOAT_NUM_RES];
+	uint32_t channel_index;
+
+	enum ioat_test_kind testkind;
+
+	/* HW max of 1MB */
+	uint32_t buffer_size;
+	uint32_t chain_depth;
+	uint32_t transactions;
+
+	/*
+	 * If non-zero, duration is time in ms;
+	 * If zero, bounded by 'transactions' above.
+	 */
+	uint32_t duration;
+
+	/* If true, check for miscompares after a copy. */
+	bool verify;
+
+	/* DMA directly to/from some memory address */
+	uint64_t raw_target;
+	void *raw_vtarget;
+	bool raw_write;
+	bool raw_is_virtual;
+
+	bool zero_stats;
+	/* Configure coalesce period */
+	uint16_t coalesce_period;
+
+	/* Internal usage -- not test inputs */
+	TAILQ_HEAD(, test_transaction) free_q;
+	TAILQ_HEAD(, test_transaction) pend_q;
+	volatile bool too_late;
+};
+
+#define	IOAT_DMATEST	_IOWR('i', 0, struct ioat_test)
+
+#endif /* __IOAT_TEST_H__ */


Property changes on: trunk/sys/dev/ioat/ioat_test.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property


More information about the Midnightbsd-cvs mailing list