[Midnightbsd-cvs] src [9998] trunk/sys/i386: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 12:35:42 EDT 2018


Revision: 9998
          http://svnweb.midnightbsd.org/src/?rev=9998
Author:   laffer1
Date:     2018-05-27 12:35:41 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/i386/Makefile
    trunk/sys/i386/acpica/Makefile
    trunk/sys/i386/acpica/acpi_machdep.c
    trunk/sys/i386/acpica/acpi_wakecode.S
    trunk/sys/i386/bios/apm.c
    trunk/sys/i386/bios/apm.h
    trunk/sys/i386/bios/mca_machdep.c
    trunk/sys/i386/bios/mca_machdep.h
    trunk/sys/i386/bios/smapi.c
    trunk/sys/i386/bios/smapi_bios.S
    trunk/sys/i386/i386/apic_vector.s
    trunk/sys/i386/i386/atomic.c
    trunk/sys/i386/i386/atpic_vector.s
    trunk/sys/i386/i386/autoconf.c
    trunk/sys/i386/i386/bios.c
    trunk/sys/i386/i386/bioscall.s
    trunk/sys/i386/i386/bpf_jit_machdep.c
    trunk/sys/i386/i386/bpf_jit_machdep.h
    trunk/sys/i386/i386/db_disasm.c
    trunk/sys/i386/i386/db_interface.c
    trunk/sys/i386/i386/db_trace.c
    trunk/sys/i386/i386/elan-mmcr.c
    trunk/sys/i386/i386/elf_machdep.c
    trunk/sys/i386/i386/exception.s
    trunk/sys/i386/i386/gdb_machdep.c
    trunk/sys/i386/i386/genassym.c
    trunk/sys/i386/i386/geode.c
    trunk/sys/i386/i386/i686_mem.c
    trunk/sys/i386/i386/identcpu.c
    trunk/sys/i386/i386/in_cksum.c
    trunk/sys/i386/i386/initcpu.c
    trunk/sys/i386/i386/io.c
    trunk/sys/i386/i386/k6_mem.c
    trunk/sys/i386/i386/legacy.c
    trunk/sys/i386/i386/locore.s
    trunk/sys/i386/i386/longrun.c
    trunk/sys/i386/i386/machdep.c
    trunk/sys/i386/i386/mem.c
    trunk/sys/i386/i386/minidump_machdep.c
    trunk/sys/i386/i386/mp_clock.c
    trunk/sys/i386/i386/mp_machdep.c
    trunk/sys/i386/i386/mp_watchdog.c
    trunk/sys/i386/i386/mpboot.s
    trunk/sys/i386/i386/perfmon.c
    trunk/sys/i386/i386/pmap.c
    trunk/sys/i386/i386/ptrace_machdep.c
    trunk/sys/i386/i386/stack_machdep.c
    trunk/sys/i386/i386/support.s
    trunk/sys/i386/i386/swtch.s
    trunk/sys/i386/i386/symbols.raw
    trunk/sys/i386/i386/sys_machdep.c
    trunk/sys/i386/i386/trap.c
    trunk/sys/i386/i386/uio_machdep.c
    trunk/sys/i386/i386/vm86.c
    trunk/sys/i386/i386/vm86bios.s
    trunk/sys/i386/i386/vm_machdep.c

Added Paths:
-----------
    trunk/sys/i386/include/counter.h
    trunk/sys/i386/include/fdt.h
    trunk/sys/i386/include/ofw_machdep.h

Property Changed:
----------------
    trunk/sys/i386/acpica/acpi_wakecode.S
    trunk/sys/i386/bios/smapi_bios.S
    trunk/sys/i386/i386/apic_vector.s
    trunk/sys/i386/i386/atpic_vector.s
    trunk/sys/i386/i386/bioscall.s
    trunk/sys/i386/i386/exception.s
    trunk/sys/i386/i386/locore.s
    trunk/sys/i386/i386/mpboot.s
    trunk/sys/i386/i386/support.s
    trunk/sys/i386/i386/swtch.s
    trunk/sys/i386/i386/symbols.raw
    trunk/sys/i386/i386/vm86bios.s

Modified: trunk/sys/i386/Makefile
===================================================================
--- trunk/sys/i386/Makefile	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/Makefile	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,4 +1,5 @@
-# $FreeBSD$
+# $MidnightBSD$
+# $FreeBSD: stable/10/sys/i386/Makefile 224063 2011-07-15 17:27:26Z mckusick $
 #	@(#)Makefile	8.1 (Berkeley) 6/11/93
 
 # Makefile for i386 links, tags file

Modified: trunk/sys/i386/acpica/Makefile
===================================================================
--- trunk/sys/i386/acpica/Makefile	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/Makefile	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+# $MidnightBSD$
 # $FreeBSD: src/sys/i386/acpica/Makefile,v 1.8 2006/10/19 05:55:09 ru Exp $
 
 # Correct path for kernel builds

Modified: trunk/sys/i386/acpica/acpi_machdep.c
===================================================================
--- trunk/sys/i386/acpica/acpi_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/acpi_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/i386/acpica/acpi_machdep.c 237822 2012-06-29 21:24:56Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/i386/acpica/acpi_machdep.c 246855 2013-02-15 22:43:08Z jkim $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -140,7 +140,7 @@
 	void *data;
 
 	off = pa & PAGE_MASK;
-	length = roundup(length + off, PAGE_SIZE);
+	length = round_page(length + off);
 	pa = pa & PG_FRAME;
 	va = (vm_offset_t)pmap_kenter_temporary(pa, offset) +
 	    (offset * PAGE_SIZE);
@@ -164,7 +164,7 @@
 
 	va = (vm_offset_t)data;
 	off = va & PAGE_MASK;
-	length = roundup(length + off, PAGE_SIZE);
+	length = round_page(length + off);
 	va &= ~PAGE_MASK;
 	while (length > 0) {
 		pmap_kremove(va);

Modified: trunk/sys/i386/acpica/acpi_wakecode.S
===================================================================
--- trunk/sys/i386/acpica/acpi_wakecode.S	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/acpi_wakecode.S	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
  * Copyright (c) 2001-2012 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: release/9.2.0/sys/i386/acpica/acpi_wakecode.S 235796 2012-05-22 17:44:01Z iwasaki $
+ * $FreeBSD: stable/10/sys/i386/acpica/acpi_wakecode.S 237027 2012-06-13 21:03:01Z jkim $
  */
 
 #include <machine/asmacros.h>
@@ -142,8 +143,8 @@
 	mov	%ax, %ds
 
 	/* Get PCB and return address. */
-	movl	wakeup_pcb - wakeup_start(%ebx), %esi
-	movl	wakeup_ret - wakeup_start(%ebx), %edi
+	movl	wakeup_pcb - wakeup_start(%ebx), %ecx
+	movl	wakeup_ret - wakeup_start(%ebx), %edx
 
 	/* Restore CR4 and CR3. */
 	movl	wakeup_cr4 - wakeup_start(%ebx), %eax
@@ -166,7 +167,7 @@
 	jmp	1f
 1:
 	/* Jump to return address. */
-	jmp	*%edi
+	jmp	*%edx
 
 	.data
 
@@ -202,4 +203,7 @@
 	.long	0
 wakeup_ret:
 	.long	0
+wakeup_gdt:		/* not used */
+	.word	0
+	.long	0
 dummy:


Property changes on: trunk/sys/i386/acpica/acpi_wakecode.S
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/bios/apm.c
===================================================================
--- trunk/sys/i386/bios/apm.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/apm.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * APM (Advanced Power Management) BIOS Device Driver
  *
@@ -17,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/apm.c 241885 2012-10-22 13:06:09Z eadler $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/i386/bios/apm.h
===================================================================
--- trunk/sys/i386/bios/apm.h	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/apm.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * APM (Advanced Power Management) BIOS Device Driver
  *
@@ -15,7 +16,7 @@
  *
  * Sep, 1994	Implemented on FreeBSD 1.1.5.1R (Toshiba AVS001WD)
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/bios/apm.h 183328 2008-09-24 18:46:39Z jhb $
  */
 
 #ifndef __APM_H__

Modified: trunk/sys/i386/bios/mca_machdep.c
===================================================================
--- trunk/sys/i386/bios/mca_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/mca_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 Matthew N. Dodd <winter at jurai.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/mca_machdep.c 134582 2004-08-31 21:51:51Z mdodd $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/i386/bios/mca_machdep.h
===================================================================
--- trunk/sys/i386/bios/mca_machdep.h	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/mca_machdep.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 Matthew N. Dodd <winter at jurai.net>
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/bios/mca_machdep.h 50823 1999-09-03 02:04:28Z mdodd $
  */
 
 extern int	MCA_system;

Modified: trunk/sys/i386/bios/smapi.c
===================================================================
--- trunk/sys/i386/bios/smapi.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/smapi.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003 Matthew N. Dodd <winter at jurai.net>
  * All rights reserved.
@@ -25,11 +26,12 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/smapi.c 299230 2016-05-08 09:02:51Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/malloc.h>
 
 #include <sys/module.h>
 #include <sys/bus.h>
@@ -79,7 +81,7 @@
 	.d_version =	D_VERSION,
 	.d_ioctl =	smapi_ioctl,
 	.d_name =	"smapi",
-	.d_flags =	D_MEM | D_NEEDGIANT,
+	.d_flags =	D_NEEDGIANT,
 };
 
 static void	smapi_identify(driver_t *, device_t);
@@ -292,6 +294,7 @@
 		for (i = 0; i < count; i++) {
 			device_delete_child(device_get_parent(devs[i]), devs[i]);
 		}
+		free(devs, M_TEMP);
 		break;
 	default:
 		break;

Modified: trunk/sys/i386/bios/smapi_bios.S
===================================================================
--- trunk/sys/i386/bios/smapi_bios.S	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/smapi_bios.S	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 #include <machine/asm.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/smapi_bios.S 239869 2012-08-29 18:22:52Z dim $");
 
 /*
  * This is cribbed from the Linux thinkpad-4.1 driver by


Property changes on: trunk/sys/i386/bios/smapi_bios.S
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/apic_vector.s
===================================================================
--- trunk/sys/i386/i386/apic_vector.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/apic_vector.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
@@ -28,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/apic_vector.s 302041 2016-06-21 04:51:55Z sephe $
  */
 
 /*
@@ -138,6 +139,25 @@
 	MEXITCOUNT
 	jmp	doreti
 
+#ifdef XENHVM
+/*
+ * Xen event channel upcall interrupt handler.
+ * Only used when the hypervisor supports direct vector callbacks.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(xen_intr_upcall)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	FAKE_MCOUNT(TF_EIP(%esp))
+	pushl	%esp
+	call	xen_intr_handle_upcall
+	add	$4, %esp
+	MEXITCOUNT
+	jmp	doreti
+#endif
+
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
@@ -144,40 +164,22 @@
  */
 	.text
 	SUPERALIGN_TEXT
-IDTVEC(invltlb)
-	pushl	%eax
-	pushl	%ds
-	movl	$KDSEL, %eax		/* Kernel data selector */
-	movl	%eax, %ds
-
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
-	pushl	%fs
-	movl	$KPSEL, %eax		/* Private space selector */
-	movl	%eax, %fs
-	movl	PCPU(CPUID), %eax
-	popl	%fs
-#ifdef COUNT_XINVLTLB_HITS
-	incl	xhits_gbl(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
-	movl	ipi_invltlb_counts(,%eax,4),%eax
-	incl	(%eax)
-#endif
-#endif
-
-	movl	%cr3, %eax		/* invalidate the TLB */
-	movl	%eax, %cr3
-
+invltlb_ret:
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
+	POP_FRAME
+	iret
 
-	lock
-	incl	smp_tlb_wait
+	SUPERALIGN_TEXT
+IDTVEC(invltlb)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
 
-	popl	%ds
-	popl	%eax
-	iret
+	call	invltlb_handler
 
+	jmp	invltlb_ret
+
 /*
  * Single page TLB shootdown
  */
@@ -184,39 +186,14 @@
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
-	pushl	%eax
-	pushl	%ds
-	movl	$KDSEL, %eax		/* Kernel data selector */
-	movl	%eax, %ds
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
 
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
-	pushl	%fs
-	movl	$KPSEL, %eax		/* Private space selector */
-	movl	%eax, %fs
-	movl	PCPU(CPUID), %eax
-	popl	%fs
-#ifdef COUNT_XINVLTLB_HITS
-	incl	xhits_pg(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
-	movl	ipi_invlpg_counts(,%eax,4),%eax
-	incl	(%eax)
-#endif
-#endif
+	call	invlpg_handler
 
-	movl	smp_tlb_addr1, %eax
-	invlpg	(%eax)			/* invalidate single page */
+	jmp	invltlb_ret
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popl	%ds
-	popl	%eax
-	iret
-
 /*
  * Page range TLB shootdown.
  */
@@ -223,45 +200,14 @@
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
-	pushl	%eax
-	pushl	%edx
-	pushl	%ds
-	movl	$KDSEL, %eax		/* Kernel data selector */
-	movl	%eax, %ds
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
 
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
-	pushl	%fs
-	movl	$KPSEL, %eax		/* Private space selector */
-	movl	%eax, %fs
-	movl	PCPU(CPUID), %eax
-	popl	%fs
-#ifdef COUNT_XINVLTLB_HITS
-	incl	xhits_rng(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
-	movl	ipi_invlrng_counts(,%eax,4),%eax
-	incl	(%eax)
-#endif
-#endif
+	call	invlrng_handler
 
-	movl	smp_tlb_addr1, %edx
-	movl	smp_tlb_addr2, %eax
-1:	invlpg	(%edx)			/* invalidate single page */
-	addl	$PAGE_SIZE, %edx
-	cmpl	%eax, %edx
-	jb	1b
+	jmp	invltlb_ret
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popl	%ds
-	popl	%edx
-	popl	%eax
-	iret
-
 /*
  * Invalidate cache.
  */
@@ -268,33 +214,14 @@
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
-	pushl	%eax
-	pushl	%ds
-	movl	$KDSEL, %eax		/* Kernel data selector */
-	movl	%eax, %ds
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
 
-#ifdef COUNT_IPIS
-	pushl	%fs
-	movl	$KPSEL, %eax		/* Private space selector */
-	movl	%eax, %fs
-	movl	PCPU(CPUID), %eax
-	popl	%fs
-	movl	ipi_invlcache_counts(,%eax,4),%eax
-	incl	(%eax)
-#endif
+	call	invlcache_handler
 
-	wbinvd
+	jmp	invltlb_ret
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popl	%ds
-	popl	%eax
-	iret
-
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */


Property changes on: trunk/sys/i386/i386/apic_vector.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/atomic.c
===================================================================
--- trunk/sys/i386/i386/atomic.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/atomic.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 Peter Jeremy
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/atomic.c 119452 2003-08-25 09:48:48Z obrien $");
 
 /* This file creates publically callable functions to perform various
  * simple arithmetic on memory which is atomic in the presence of

Modified: trunk/sys/i386/i386/atpic_vector.s
===================================================================
--- trunk/sys/i386/i386/atpic_vector.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/atpic_vector.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
@@ -28,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/atpic_vector.s 209483 2010-06-23 20:44:07Z kib $
  */
 
 /*


Property changes on: trunk/sys/i386/i386/atpic_vector.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/autoconf.c
===================================================================
--- trunk/sys/i386/i386/autoconf.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/autoconf.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/autoconf.c 146794 2005-05-29 23:44:22Z marcel $");
 
 /*
  * Setup the system to run on the current machine.

Modified: trunk/sys/i386/i386/bios.c
===================================================================
--- trunk/sys/i386/i386/bios.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bios.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1997 Michael Smith
  * Copyright (c) 1998 Jonathan Lemon
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/bios.c 282065 2015-04-27 08:02:12Z kib $");
 
 /*
  * Code for dealing with the BIOS in x86 PC systems.
@@ -372,9 +373,11 @@
 	    break;
 
 	default:
+	    va_end(ap);
 	    return (EINVAL);
 	}
     }
+    va_end(ap);
 
     if (flags & BIOSARGS_FLAG) {
 	if (arg_end - arg_start > ctob(16))
@@ -387,7 +390,7 @@
     args->seg.code32.limit = 0xffff;	
 
     ptd = (pd_entry_t *)rcr3();
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
     if (ptd == IdlePDPT)
 #else
     if (ptd == IdlePTD)
@@ -448,9 +451,11 @@
 	    break;
 
 	default:
+	    va_end(ap);
 	    return (EINVAL);
 	}
     }
+    va_end(ap);
 
     set_bios_selectors(&args->seg, flags);
     bioscall_vector.vec16.offset = (u_short)args->entry;

Modified: trunk/sys/i386/i386/bioscall.s
===================================================================
--- trunk/sys/i386/i386/bioscall.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bioscall.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1997 Jonathan Lemon
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/bioscall.s 103436 2002-09-17 01:49:00Z peter $
  */
 
 /*


Property changes on: trunk/sys/i386/i386/bioscall.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/bpf_jit_machdep.c
===================================================================
--- trunk/sys/i386/i386/bpf_jit_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bpf_jit_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
  * Copyright (C) 2005-2009 Jung-uk Kim <jkim at FreeBSD.org>
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/bpf_jit_machdep.c 207081 2010-04-22 23:47:19Z jkim $");
 
 #ifdef _KERNEL
 #include "opt_bpf.h"

Modified: trunk/sys/i386/i386/bpf_jit_machdep.h
===================================================================
--- trunk/sys/i386/i386/bpf_jit_machdep.h	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bpf_jit_machdep.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
  * Copyright (C) 2005-2009 Jung-uk Kim <jkim at FreeBSD.org>
@@ -28,7 +29,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/bpf_jit_machdep.h 207081 2010-04-22 23:47:19Z jkim $
  */
 
 #ifndef _BPF_JIT_MACHDEP_H_

Modified: trunk/sys/i386/i386/db_disasm.c
===================================================================
--- trunk/sys/i386/i386/db_disasm.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_disasm.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_disasm.c 280272 2015-03-19 23:13:19Z markj $");
 
 /*
  * Instruction disassembler.
@@ -195,6 +196,26 @@
 /*0f*/	{ "",      FALSE, NONE,  0,	      0 },
 };
 
+static const struct inst db_inst_0f1x[] = {
+/*10*/	{ "",      FALSE, NONE,  0,	      0 },
+/*11*/	{ "",      FALSE, NONE,  0,	      0 },
+/*12*/	{ "",      FALSE, NONE,  0,	      0 },
+/*13*/	{ "",      FALSE, NONE,  0,	      0 },
+/*14*/	{ "",      FALSE, NONE,  0,	      0 },
+/*15*/	{ "",      FALSE, NONE,  0,	      0 },
+/*16*/	{ "",      FALSE, NONE,  0,	      0 },
+/*17*/	{ "",      FALSE, NONE,  0,	      0 },
+
+/*18*/	{ "",      FALSE, NONE,  0,	      0 },
+/*19*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1a*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1b*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1c*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1d*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1e*/	{ "",      FALSE, NONE,  0,	      0 },
+/*1f*/	{ "nopl",  TRUE,  SDEP,  0,	      "nopw" },
+};
+
 static const struct inst db_inst_0f2x[] = {
 /*20*/	{ "mov",   TRUE,  LONG,  op2(CR,El),  0 },
 /*21*/	{ "mov",   TRUE,  LONG,  op2(DR,El),  0 },
@@ -356,7 +377,7 @@
 
 static const struct inst * const db_inst_0f[] = {
 	db_inst_0f0x,
-	0,
+	db_inst_0f1x,
 	db_inst_0f2x,
 	db_inst_0f3x,
 	db_inst_0f4x,
@@ -782,7 +803,7 @@
 /*c7*/	{ "mov",   TRUE,  LONG,  op2(I, E),   0 },
 
 /*c8*/	{ "enter", FALSE, NONE,  op2(Iw, Ib), 0 },
-/*c9*/	{ "leave", FALSE, NONE,  0,           0 },
+/*c9*/	{ "leave", FALSE, NONE,  0,	      0 },
 /*ca*/	{ "lret",  FALSE, NONE,  op1(Iw),     0 },
 /*cb*/	{ "lret",  FALSE, NONE,  0,	      0 },
 /*cc*/	{ "int",   FALSE, NONE,  op1(o3),     0 },
@@ -1266,7 +1287,7 @@
 		case 0xc8:
 			i_name = "monitor";
 			i_size = NONE;
-			i_mode = 0;			
+			i_mode = 0;
 			break;
 		case 0xc9:
 			i_name = "mwait";

Modified: trunk/sys/i386/i386/db_interface.c
===================================================================
--- trunk/sys/i386/i386/db_interface.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_interface.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_interface.c 139724 2005-01-05 19:10:48Z imp $");
 
 /*
  * Interface to new debugger.

Modified: trunk/sys/i386/i386/db_trace.c
===================================================================
--- trunk/sys/i386/i386/db_trace.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_trace.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_trace.c 290731 2015-11-12 23:49:47Z jhb $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -48,16 +49,10 @@
 #include <ddb/db_sym.h>
 #include <ddb/db_variables.h>
 
-static db_varfcn_t db_dr0;
-static db_varfcn_t db_dr1;
-static db_varfcn_t db_dr2;
-static db_varfcn_t db_dr3;
-static db_varfcn_t db_dr4;
-static db_varfcn_t db_dr5;
-static db_varfcn_t db_dr6;
-static db_varfcn_t db_dr7;
 static db_varfcn_t db_esp;
 static db_varfcn_t db_frame;
+static db_varfcn_t db_frame_seg;
+static db_varfcn_t db_gs;
 static db_varfcn_t db_ss;
 
 /*
@@ -65,10 +60,11 @@
  */
 #define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
-	{ "cs",		DB_OFFSET(tf_cs),	db_frame },
-	{ "ds",		DB_OFFSET(tf_ds),	db_frame },
-	{ "es",		DB_OFFSET(tf_es),	db_frame },
-	{ "fs",		DB_OFFSET(tf_fs),	db_frame },
+	{ "cs",		DB_OFFSET(tf_cs),	db_frame_seg },
+	{ "ds",		DB_OFFSET(tf_ds),	db_frame_seg },
+	{ "es",		DB_OFFSET(tf_es),	db_frame_seg },
+	{ "fs",		DB_OFFSET(tf_fs),	db_frame_seg },
+	{ "gs",		NULL,			db_gs },
 	{ "ss",		NULL,			db_ss },
 	{ "eax",	DB_OFFSET(tf_eax),	db_frame },
 	{ "ecx",	DB_OFFSET(tf_ecx),	db_frame },
@@ -80,41 +76,9 @@
 	{ "edi",	DB_OFFSET(tf_edi),	db_frame },
 	{ "eip",	DB_OFFSET(tf_eip),	db_frame },
 	{ "efl",	DB_OFFSET(tf_eflags),	db_frame },
-#define	DB_N_SHOW_REGS	15	/* Don't show registers after here. */
-	{ "dr0",	NULL,			db_dr0 },
-	{ "dr1",	NULL,			db_dr1 },
-	{ "dr2",	NULL,			db_dr2 },
-	{ "dr3",	NULL,			db_dr3 },
-	{ "dr4",	NULL,			db_dr4 },
-	{ "dr5",	NULL,			db_dr5 },
-	{ "dr6",	NULL,			db_dr6 },
-	{ "dr7",	NULL,			db_dr7 },
 };
-struct db_variable *db_eregs = db_regs + DB_N_SHOW_REGS;
+struct db_variable *db_eregs = db_regs + nitems(db_regs);
 
-#define DB_DRX_FUNC(reg)		\
-static int				\
-db_ ## reg (vp, valuep, op)		\
-	struct db_variable *vp;		\
-	db_expr_t * valuep;		\
-	int op;				\
-{					\
-	if (op == DB_VAR_GET)		\
-		*valuep = r ## reg ();	\
-	else				\
-		load_ ## reg (*valuep); \
-	return (1);			\
-}
-
-DB_DRX_FUNC(dr0)
-DB_DRX_FUNC(dr1)
-DB_DRX_FUNC(dr2)
-DB_DRX_FUNC(dr3)
-DB_DRX_FUNC(dr4)
-DB_DRX_FUNC(dr5)
-DB_DRX_FUNC(dr6)
-DB_DRX_FUNC(dr7)
-
 static __inline int
 get_esp(struct trapframe *tf)
 {
@@ -139,6 +103,22 @@
 }
 
 static int
+db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+	uint16_t *reg;
+
+	if (kdb_frame == NULL)
+		return (0);
+
+	reg = (uint16_t *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
+	if (op == DB_VAR_GET)
+		*valuep = *reg;
+	else
+		*reg = *valuep;
+	return (1);
+}
+
+static int
 db_esp(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
@@ -153,6 +133,17 @@
 }
 
 static int
+db_gs(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+
+	if (op == DB_VAR_GET)
+		*valuep = rgs();
+	else
+		load_gs(*valuep);
+	return (1);
+}
+
+static int
 db_ss(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
@@ -390,7 +381,7 @@
 
 static int
 db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
-    db_addr_t pc, int count)
+    db_addr_t pc, register_t sp, int count)
 {
 	struct i386_frame *actframe;
 #define MAXNARG	16
@@ -447,7 +438,21 @@
 		 */
 		actframe = frame;
 		if (first) {
-			if (tf != NULL) {
+			first = FALSE;
+			if (sym == C_DB_SYM_NULL && sp != 0) {
+				/*
+				 * If a symbol couldn't be found, we've probably
+				 * jumped to a bogus location, so try and use
+				 * the return address to find our caller.
+				 */
+				db_print_stack_entry(name, 0, 0, 0, pc,
+				    NULL);
+				pc = db_get_value(sp, 4, FALSE);
+				if (db_search_symbol(pc, DB_STGY_PROC,
+				    &offset) == C_DB_SYM_NULL)
+					break;
+				continue;
+			} else if (tf != NULL) {
 				instr = db_get_value(pc, 4, FALSE);
 				if ((instr & 0xffffff) == 0x00e58955) {
 					/* pushl %ebp; movl %esp, %ebp */
@@ -475,7 +480,6 @@
 				    actframe);
 				break;
 			}
-			first = FALSE;
 		}
 
 		argp = &actframe->f_arg0;
@@ -522,7 +526,7 @@
 	frame = (struct i386_frame *)ebp;
 	callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE);
 	frame = frame->f_frame;
-	db_backtrace(curthread, NULL, frame, callpc, -1);
+	db_backtrace(curthread, NULL, frame, callpc, 0, -1);
 }
 
 int
@@ -529,10 +533,12 @@
 db_trace_thread(struct thread *thr, int count)
 {
 	struct pcb *ctx;
+	struct trapframe *tf;
 
 	ctx = kdb_thr_ctx(thr);
-	return (db_backtrace(thr, NULL, (struct i386_frame *)ctx->pcb_ebp,
-		    ctx->pcb_eip, count));
+	tf = thr == kdb_thread ? kdb_frame : NULL;
+	return (db_backtrace(thr, tf, (struct i386_frame *)ctx->pcb_ebp,
+	    ctx->pcb_eip, ctx->pcb_esp, count));
 }
 
 int

Modified: trunk/sys/i386/i386/elan-mmcr.c
===================================================================
--- trunk/sys/i386/i386/elan-mmcr.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/elan-mmcr.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
@@ -39,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/elan-mmcr.c 214346 2010-10-25 15:28:03Z jhb $");
 
 #include "opt_cpu.h"
 #include <sys/param.h>

Modified: trunk/sys/i386/i386/elf_machdep.c
===================================================================
--- trunk/sys/i386/i386/elf_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/elf_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
@@ -24,8 +25,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/elf_machdep.c 294136 2016-01-16 07:56:49Z dchagin $");
 
+#include "opt_cpu.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
@@ -45,7 +48,12 @@
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
+#include <machine/npx.h>
 
+#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
@@ -81,6 +89,8 @@
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
+	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
@@ -116,14 +126,49 @@
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
+static Elf32_Brandinfo kfreebsd_brand_info = {
+	.brand		= ELFOSABI_FREEBSD,
+	.machine	= EM_386,
+	.compat_3_brand	= "FreeBSD",
+	.emul_path	= NULL,
+	.interp_path	= "/lib/ld.so.1",
+	.sysvec		= &elf32_freebsd_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &elf32_kfreebsd_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
+};
 
+SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
+	(sysinit_cfunc_t) elf32_insert_brand_entry,
+	&kfreebsd_brand_info);
+
+
 void
-elf32_dump_thread(struct thread *td __unused, void *dst __unused,
-    size_t *off __unused)
+elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
+#ifdef CPU_ENABLE_SSE
+	void *buf;
+#endif
+	size_t len;
+
+	len = 0;
+#ifdef CPU_ENABLE_SSE
+	if (use_xsave) {
+		if (dst != NULL) {
+			npxgetregs(td);
+			len += elf32_populate_note(NT_X86_XSTATE,
+			    get_pcb_user_save_td(td), dst,
+			    cpu_max_ext_state_size, &buf);
+			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
+			    xsave_mask;
+		} else
+			len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
+			    cpu_max_ext_state_size, NULL);
+	}
+#endif
+	*off = len;
 }
 
-
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
@@ -135,6 +180,7 @@
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
+	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
@@ -170,8 +216,8 @@
 			break;
 
 		case R_386_32:		/* S + A */
-			addr = lookup(lf, symidx, 1);
-			if (addr == 0)
+			error = lookup(lf, symidx, 1, &addr);
+			if (error != 0)
 				return -1;
 			addr += addend;
 			if (*where != addr)
@@ -179,8 +225,8 @@
 			break;
 
 		case R_386_PC32:	/* S + A - P */
-			addr = lookup(lf, symidx, 1);
-			if (addr == 0)
+			error = lookup(lf, symidx, 1, &addr);
+			if (error != 0)
 				return -1;
 			addr += addend - (Elf_Addr)where;
 			if (*where != addr)
@@ -197,8 +243,8 @@
 			break;
 
 		case R_386_GLOB_DAT:	/* S */
-			addr = lookup(lf, symidx, 1);
-			if (addr == 0)
+			error = lookup(lf, symidx, 1, &addr);
+			if (error != 0)
 				return -1;
 			if (*where != addr)
 				*where = addr;

Modified: trunk/sys/i386/i386/exception.s
===================================================================
--- trunk/sys/i386/i386/exception.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/exception.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
@@ -31,7 +32,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/exception.s 322755 2017-08-21 15:44:57Z kib $
  */
 
 #include "opt_apic.h"
@@ -54,13 +55,13 @@
 	.globl	dtrace_invop_jump_addr
 	.align	4
 	.type	dtrace_invop_jump_addr, @object
-        .size	dtrace_invop_jump_addr, 4
+	.size	dtrace_invop_jump_addr, 4
 dtrace_invop_jump_addr:
 	.zero	4
 	.globl	dtrace_invop_calltrap_addr
 	.align	4
 	.type	dtrace_invop_calltrap_addr, @object
-        .size	dtrace_invop_calltrap_addr, 4
+	.size	dtrace_invop_calltrap_addr, 4
 dtrace_invop_calltrap_addr:
 	.zero	8
 #endif
@@ -75,22 +76,22 @@
  * Trap and fault vector routines.
  *
  * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable 
- * interrupts.  A few of the traps we are use are interrupt gates, 
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts.  A few of the traps we are use are interrupt gates,
  * SDT_SYS386IGT, which are nearly the same thing except interrupts are
  * disabled on entry.
  *
  * The cpu will push a certain amount of state onto the kernel stack for
- * the current process.  The amount of state depends on the type of trap 
- * and whether the trap crossed rings or not.  See i386/include/frame.h.  
- * At the very least the current EFLAGS (status register, which includes 
+ * the current process.  The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not.  See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
  * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu.  The cpu 
- * will also push an 'error' code for certain traps.  We push a dummy 
- * error code for those traps where the cpu doesn't in order to maintain 
+ * and the return instruction pointer are pushed by the cpu.  The cpu
+ * will also push an 'error' code for certain traps.  We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
  * a consistent frame.  We also push a contrived 'trap number'.
  *
- * The cpu does not push the general registers, we must do that, and we 
+ * The cpu does not push the general registers, we must do that, and we
  * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
  * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
  * must load them with appropriate values for supervisor mode operation.
@@ -145,21 +146,25 @@
 	pushl $0; TRAP(T_XMMFLT)
 
 	/*
-	 * alltraps entry point.  Interrupts are enabled if this was a trap
-	 * gate (TGT), else disabled if this was an interrupt gate (IGT).
-	 * Note that int0x80_syscall is a trap gate.   Interrupt gates are
-	 * used by page faults, non-maskable interrupts, debug and breakpoint
+	 * All traps except ones for syscalls jump to alltraps.  If
+	 * interrupts were enabled when the trap occurred, then interrupts
+	 * are enabled now if the trap was through a trap gate, else
+	 * disabled if the trap was through an interrupt gate.  Note that
+	 * int0x80_syscall is a trap gate.   Interrupt gates are used by
+	 * page faults, non-maskable interrupts, debug and breakpoint
 	 * exceptions.
 	 */
-
 	SUPERALIGN_TEXT
 	.globl	alltraps
 	.type	alltraps, at function
 alltraps:
 	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
+	pushl	$0
+	movw	%ds,(%esp)
+	pushl	$0
+	movw	%es,(%esp)
+	pushl	$0
+	movw	%fs,(%esp)
 alltraps_with_regs_pushed:
 	SET_KERNEL_SREGS
 	cld
@@ -168,7 +173,7 @@
 	pushl	%esp
 	call	trap
 	add	$4, %esp
-	
+
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
@@ -181,21 +186,29 @@
 #ifdef KDTRACE_HOOKS
 	SUPERALIGN_TEXT
 IDTVEC(ill)
-	/* Check if there is no DTrace hook registered. */
-	cmpl	$0,dtrace_invop_jump_addr
+	/*
+	 * Check if a DTrace hook is registered.  The default (data) segment
+	 * cannot be used for this since %ds is not known good until we
+	 * verify that the entry was from kernel mode.
+	 */
+	cmpl	$0,%ss:dtrace_invop_jump_addr
 	je	norm_ill
 
-	/* Check if this is a user fault. */
-	cmpl	$GSEL_KPL, 4(%esp)	/* Check the code segment. */
-              
-	/* If so, just handle it as a normal trap. */
+	/*
+	 * Check if this is a user fault.  If so, just handle it as a normal
+	 * trap.
+	 */
+	cmpl	$GSEL_KPL, 4(%esp)	/* Check the code segment */
 	jne	norm_ill
-              
+	testl	$PSL_VM, 8(%esp)	/* and vm86 mode. */
+	jnz	norm_ill
+
 	/*
 	 * This is a kernel instruction fault that might have been caused
 	 * by a DTrace provider.
 	 */
-	pushal				/* Push all registers onto the stack. */
+	pushal
+	cld
 
 	/*
 	 * Set our jump address for the jump back in the event that
@@ -215,10 +228,10 @@
 #endif
 
 /*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
+ * Call gate entry for syscalls (lcall 7,0).
+ * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables.
  *
  * The intersegment call has been set up to specify one dummy parameter.
- *
  * This leaves a place to put eflags so that the call frame can be
  * converted to a trap frame. Note that the eflags is (semi-)bogusly
  * pushed into (what will be) tf_err and then copied later into the
@@ -231,11 +244,14 @@
 	pushfl				/* save eflags */
 	popl	8(%esp)			/* shuffle into tf_eflags */
 	pushl	$7			/* sizeof "lcall 7,0" */
-	subl	$4,%esp			/* skip over tf_trapno */
+	pushl	$0			/* tf_trapno */
 	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
+	pushl	$0
+	movw	%ds,(%esp)
+	pushl	$0
+	movw	%es,(%esp)
+	pushl	$0
+	movw	%fs,(%esp)
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
@@ -246,20 +262,25 @@
 	jmp	doreti
 
 /*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ * Trap gate entry for syscalls (int 0x80).
+ * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all
+ * Linux executables.
  *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate).  Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
+ * Even though the name says 'int0x80', this is actually a trap gate, not an
+ * interrupt gate.  Thus interrupts are enabled on entry just as they are for
+ * a normal syscall.
  */
 	SUPERALIGN_TEXT
 IDTVEC(int0x80_syscall)
 	pushl	$2			/* sizeof "int 0x80" */
-	subl	$4,%esp			/* skip over tf_trapno */
+	pushl	$0			/* tf_trapno */
 	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
+	pushl	$0
+	movw	%ds,(%esp)
+	pushl	$0
+	movw	%es,(%esp)
+	pushl	$0
+	movw	%fs,(%esp)
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
@@ -332,6 +353,7 @@
 	.text
 	SUPERALIGN_TEXT
 	.type	doreti, at function
+	.globl	doreti
 doreti:
 	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
 doreti_next:
@@ -348,13 +370,14 @@
 	/*
 	 * PSL_VM must be checked first since segment registers only
 	 * have an RPL in non-VM86 mode.
+	 * ASTs can not be handled now if we are in a vm86 call.
 	 */
-	testl	$PSL_VM,TF_EFLAGS(%esp)	/* are we in vm86 mode? */
+	testl	$PSL_VM,TF_EFLAGS(%esp)
 	jz	doreti_notvm86
 	movl	PCPU(CURPCB),%ecx
-	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)	/* are we in a vm86 call? */
-	jz	doreti_ast		/* can handle ASTS now if not */
-  	jmp	doreti_exit
+	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)
+	jz	doreti_ast
+	jmp	doreti_exit
 
 doreti_notvm86:
 	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
@@ -401,7 +424,7 @@
 doreti_iret:
 	iret
 
-  	/*
+	/*
 	 * doreti_iret_fault and friends.  Alternative return code for
 	 * the case where we get a fault in the doreti_exit code
 	 * above.  trap() (i386/i386/trap.c) catches this specific
@@ -413,13 +436,16 @@
 doreti_iret_fault:
 	subl	$8,%esp
 	pushal
-	pushl	%ds
+	pushl	$0
+	movw	%ds,(%esp)
 	.globl	doreti_popl_ds_fault
 doreti_popl_ds_fault:
-	pushl	%es
+	pushl	$0
+	movw	%es,(%esp)
 	.globl	doreti_popl_es_fault
 doreti_popl_es_fault:
-	pushl	%fs
+	pushl	$0
+	movw	%fs,(%esp)
 	.globl	doreti_popl_fs_fault
 doreti_popl_fs_fault:
 	sti


Property changes on: trunk/sys/i386/i386/exception.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/gdb_machdep.c
===================================================================
--- trunk/sys/i386/i386/gdb_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/gdb_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Marcel Moolenaar
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/gdb_machdep.c 290734 2015-11-13 00:50:34Z jhb $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -45,14 +46,22 @@
 void *
 gdb_cpu_getreg(int regnum, size_t *regsz)
 {
+	static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL);
+	static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL);
+	static uint32_t _kprivsel = GSEL(GPRIV_SEL, SEL_KPL);
 
 	*regsz = gdb_cpu_regsz(regnum);
 
-	if (kdb_thread  == curthread) {
+	if (kdb_thread == curthread) {
 		switch (regnum) {
 		case 0:	return (&kdb_frame->tf_eax);
 		case 1:	return (&kdb_frame->tf_ecx);
 		case 2:	return (&kdb_frame->tf_edx);
+		case 9: return (&kdb_frame->tf_eflags);
+		case 10: return (&kdb_frame->tf_cs);
+		case 12: return (&kdb_frame->tf_ds);
+		case 13: return (&kdb_frame->tf_es);
+		case 14: return (&kdb_frame->tf_fs);
 		}
 	}
 	switch (regnum) {
@@ -62,6 +71,12 @@
 	case 6:  return (&kdb_thrctx->pcb_esi);
 	case 7:  return (&kdb_thrctx->pcb_edi);
 	case 8:  return (&kdb_thrctx->pcb_eip);
+	case 10: return (&_kcodesel);
+	case 11: return (&_kdatasel);
+	case 12: return (&_kdatasel);
+	case 13: return (&_kdatasel);
+	case 14: return (&_kprivsel);
+	case 15: return (&kdb_thrctx->pcb_gs);
 	}
 	return (NULL);
 }

Modified: trunk/sys/i386/i386/genassym.c
===================================================================
--- trunk/sys/i386/i386/genassym.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/genassym.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1990 The Regents of the University of California.
  * All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/genassym.c 286878 2015-08-18 09:09:39Z kib $");
 
 #include "opt_apic.h"
 #include "opt_compat.h"
@@ -103,6 +104,7 @@
 ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
 /* ASSYM(UPAGES, UPAGES);*/
 ASSYM(KSTACK_PAGES, KSTACK_PAGES);
+ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
 ASSYM(NPDEPG, NPDEPG);
@@ -144,7 +146,6 @@
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
 ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
@@ -154,7 +155,6 @@
 ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 
 ASSYM(PCB_SIZE, sizeof(struct pcb));
@@ -246,9 +246,8 @@
 #endif
 
 #ifdef XEN
-#include <xen/hypervisor.h>
 ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START);
+ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
 #endif
 
 #ifdef	HWPMC_HOOKS

Modified: trunk/sys/i386/i386/geode.c
===================================================================
--- trunk/sys/i386/i386/geode.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/geode.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003-2004 Poul-Henning Kamp
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/geode.c 208111 2010-05-15 10:31:11Z phk $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/i386/i386/i686_mem.c
===================================================================
--- trunk/sys/i386/i386/i686_mem.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/i686_mem.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 Michael Smith <msmith at freebsd.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/i686_mem.c 217506 2011-01-17 17:30:35Z jkim $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/i386/i386/identcpu.c
===================================================================
--- trunk/sys/i386/i386/identcpu.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/identcpu.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.

Modified: trunk/sys/i386/i386/in_cksum.c
===================================================================
--- trunk/sys/i386/i386/in_cksum.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/in_cksum.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -31,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/in_cksum.c 189572 2009-03-09 13:11:16Z rwatson $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/i386/i386/initcpu.c
===================================================================
--- trunk/sys/i386/i386/initcpu.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/initcpu.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) KATO Takenori, 1997, 1998.
  * 
@@ -28,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/initcpu.c 313150 2017-02-03 12:20:44Z kib $");
 
 #include "opt_cpu.h"
 
@@ -48,12 +49,6 @@
 #define CPU_ENABLE_SSE
 #endif
 
-#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
-void	enable_K5_wt_alloc(void);
-void	enable_K6_wt_alloc(void);
-void	enable_K6_2_wt_alloc(void);
-#endif
-
 #ifdef I486_CPU
 static void init_5x86(void);
 static void init_bluelightning(void);
@@ -65,6 +60,12 @@
 static void init_6x86(void);
 #endif /* I486_CPU */
 
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+static void	enable_K5_wt_alloc(void);
+static void	enable_K6_wt_alloc(void);
+static void	enable_K6_2_wt_alloc(void);
+#endif
+
 #ifdef I686_CPU
 static void	init_6x86MX(void);
 static void	init_ppro(void);
@@ -81,22 +82,34 @@
  */
 static int	hw_clflush_disable = -1;
 
-/* Must *NOT* be BSS or locore will bzero these after setting them */
-int	cpu = 0;		/* Are we 386, 386sx, 486, etc? */
-u_int	cpu_feature = 0;	/* Feature flags */
-u_int	cpu_feature2 = 0;	/* Feature flags */
-u_int	amd_feature = 0;	/* AMD feature flags */
-u_int	amd_feature2 = 0;	/* AMD feature flags */
-u_int	amd_pminfo = 0;		/* AMD advanced power management info */
-u_int	via_feature_rng = 0;	/* VIA RNG features */
-u_int	via_feature_xcrypt = 0;	/* VIA ACE features */
-u_int	cpu_high = 0;		/* Highest arg to CPUID */
-u_int	cpu_id = 0;		/* Stepping ID */
-u_int	cpu_procinfo = 0;	/* HyperThreading Info / Brand Index / CLFUSH */
-u_int	cpu_procinfo2 = 0;	/* Multicore info */
-char	cpu_vendor[20] = "";	/* CPU Origin code */
-u_int	cpu_vendor_id = 0;	/* CPU vendor ID */
+int	cpu;			/* Are we 386, 386sx, 486, etc? */
+u_int	cpu_feature;		/* Feature flags */
+u_int	cpu_feature2;		/* Feature flags */
+u_int	amd_feature;		/* AMD feature flags */
+u_int	amd_feature2;		/* AMD feature flags */
+u_int	amd_pminfo;		/* AMD advanced power management info */
+u_int	via_feature_rng;	/* VIA RNG features */
+u_int	via_feature_xcrypt;	/* VIA ACE features */
+u_int	cpu_high;		/* Highest arg to CPUID */
+u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
+u_int	cpu_id;			/* Stepping ID */
+u_int	cpu_procinfo;		/* HyperThreading Info / Brand Index / CLFUSH */
+u_int	cpu_procinfo2;		/* Multicore info */
+char	cpu_vendor[20];		/* CPU Origin code */
+u_int	cpu_vendor_id;		/* CPU vendor ID */
+#ifdef CPU_ENABLE_SSE
+u_int	cpu_fxsr;		/* SSE enabled */
+u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
+#endif
 u_int	cpu_clflush_line_size = 32;
+u_int	cpu_stdext_feature;
+u_int	cpu_stdext_feature2;
+u_int	cpu_max_ext_state_size;
+u_int	cpu_mon_mwait_flags;	/* MONITOR/MWAIT flags (CPUID.05H.ECX) */
+u_int	cpu_mon_min_size;	/* MONITOR minimum range size, bytes */
+u_int	cpu_mon_max_size;	/* MONITOR minimum range size, bytes */
+u_int	cyrix_did;		/* Device ID of Cyrix CPU */
+u_int	cpu_maxphyaddr;		/* Max phys addr width in bits */
 
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
 	&via_feature_rng, 0, "VIA RNG feature available in CPU");
@@ -103,11 +116,6 @@
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
 	&via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU");
 
-#ifdef CPU_ENABLE_SSE
-u_int	cpu_fxsr;		/* SSE enabled */
-u_int	cpu_mxcsr_mask;		/* valid bits in mxcsr */
-#endif
-
 #ifdef I486_CPU
 /*
  * IBM Blue Lightning
@@ -421,6 +429,19 @@
 
 #ifdef I586_CPU
 /*
+ * Rise mP6
+ */
+static void
+init_rise(void)
+{
+
+	/*
+	 * The CMPXCHG8B instruction is always available but hidden.
+	 */
+	cpu_feature |= CPUID_CX8;
+}
+
+/*
  * IDT WinChip C6/2/2A/2B/3
  *
  * http://www.centtech.com/winchip_bios_writers_guide_v4_0.pdf
@@ -440,7 +461,7 @@
 	fcr &= ~(1ULL << 11);
 
 	/*
-	 * Additioanlly, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3.
+	 * Additionally, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3.
 	 */
 	if (CPUID_TO_MODEL(cpu_id) >= 8)
 		fcr |= (1 << 12) | (1 << 19) | (1 << 20);
@@ -516,6 +537,8 @@
 	intr_restore(saveintr);
 }
 
+static int ppro_apic_used = -1;
+
 static void
 init_ppro(void)
 {
@@ -524,12 +547,32 @@
 	/*
 	 * Local APIC should be disabled if it is not going to be used.
 	 */
-	apicbase = rdmsr(MSR_APICBASE);
-	apicbase &= ~APICBASE_ENABLED;
-	wrmsr(MSR_APICBASE, apicbase);
+	if (ppro_apic_used != 1) {
+		apicbase = rdmsr(MSR_APICBASE);
+		apicbase &= ~APICBASE_ENABLED;
+		wrmsr(MSR_APICBASE, apicbase);
+		ppro_apic_used = 0;
+	}
 }
 
 /*
+ * If the local APIC is going to be used after being disabled above,
+ * re-enable it and don't disable it in the future.
+ */
+void
+ppro_reenable_apic(void)
+{
+	u_int64_t	apicbase;
+
+	if (ppro_apic_used == 0) {
+		apicbase = rdmsr(MSR_APICBASE);
+		apicbase |= APICBASE_ENABLED;
+		wrmsr(MSR_APICBASE, apicbase);
+		ppro_apic_used = 1;
+	}
+}
+
+/*
  * Initialize BBL_CR_CTL3 (Control register 3: used to configure the
  * L2 cache).
  */
@@ -635,20 +678,6 @@
 }
 #endif
 
-/*
- * Initialize CR4 (Control register 4) to enable SSE instructions.
- */
-void
-enable_sse(void)
-{
-#if defined(CPU_ENABLE_SSE)
-	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
-		load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
-		cpu_fxsr = hw_instruction_sse = 1;
-	}
-#endif
-}
-
 extern int elf32_nxstack;
 
 void
@@ -681,6 +710,27 @@
 #ifdef I586_CPU
 	case CPU_586:
 		switch (cpu_vendor_id) {
+		case CPU_VENDOR_AMD:
+#ifdef CPU_WT_ALLOC
+			if (((cpu_id & 0x0f0) > 0) &&
+			    ((cpu_id & 0x0f0) < 0x60) &&
+			    ((cpu_id & 0x00f) > 3))
+				enable_K5_wt_alloc();
+			else if (((cpu_id & 0x0f0) > 0x80) ||
+			    (((cpu_id & 0x0f0) == 0x80) &&
+				(cpu_id & 0x00f) > 0x07))
+				enable_K6_2_wt_alloc();
+			else if ((cpu_id & 0x0f0) > 0x50)
+				enable_K6_wt_alloc();
+#endif
+			if ((cpu_id & 0xf0) == 0xa0)
+				/*
+				 * Make sure the TSC runs through
+				 * suspension, otherwise we can't use
+				 * it as timecounter
+				 */
+				wrmsr(0x1900, rdmsr(0x1900) | 0x20ULL);
+			break;
 		case CPU_VENDOR_CENTAUR:
 			init_winchip();
 			break;
@@ -687,6 +737,9 @@
 		case CPU_VENDOR_TRANSMETA:
 			init_transmeta();
 			break;
+		case CPU_VENDOR_RISE:
+			init_rise();
+			break;
 		}
 		break;
 #endif
@@ -733,23 +786,33 @@
 			init_transmeta();
 			break;
 		}
-#ifdef PAE
-		if ((amd_feature & AMDID_NX) != 0) {
-			uint64_t msr;
-
-			msr = rdmsr(MSR_EFER) | EFER_NXE;
-			wrmsr(MSR_EFER, msr);
-			pg_nx = PG_NX;
-			elf32_nxstack = 1;
-		}
-#endif
 		break;
 #endif
 	default:
 		break;
 	}
-	enable_sse();
+#if defined(CPU_ENABLE_SSE)
+	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+		load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+		cpu_fxsr = hw_instruction_sse = 1;
+	}
+#endif
+#if defined(PAE) || defined(PAE_TABLES)
+	if ((amd_feature & AMDID_NX) != 0) {
+		uint64_t msr;
 
+		msr = rdmsr(MSR_EFER) | EFER_NXE;
+		wrmsr(MSR_EFER, msr);
+		pg_nx = PG_NX;
+		elf32_nxstack = 1;
+	}
+#endif
+}
+
+void
+initializecpucache(void)
+{
+
 	/*
 	 * CPUID with %eax = 1, %ebx returns
 	 * Bits 15-8: CLFLUSH line size
@@ -764,14 +827,18 @@
 	 * CPUID_SS feature even though the native CPU supports it.
 	 */
 	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
-	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
 		cpu_feature &= ~CPUID_CLFSH;
+		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+	}
 	/*
-	 * Allow to disable CLFLUSH feature manually by
-	 * hw.clflush_disable tunable.
+	 * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+	 * by setting the hw.clflush_disable tunable.
 	 */
-	if (hw_clflush_disable == 1)
+	if (hw_clflush_disable == 1) {
 		cpu_feature &= ~CPUID_CLFSH;
+		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+	}
 
 #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
 	/*
@@ -825,7 +892,7 @@
  * Enable write allocate feature of AMD processors.
  * Following two functions require the Maxmem variable being set.
  */
-void
+static void
 enable_K5_wt_alloc(void)
 {
 	u_int64_t	msr;
@@ -871,7 +938,7 @@
 	}
 }
 
-void
+static void
 enable_K6_wt_alloc(void)
 {
 	quad_t	size;
@@ -931,7 +998,7 @@
 	intr_restore(saveintr);
 }
 
-void
+static void
 enable_K6_2_wt_alloc(void)
 {
 	quad_t	size;

Modified: trunk/sys/i386/i386/io.c
===================================================================
--- trunk/sys/i386/i386/io.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/io.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Mark R V Murray
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/io.c 207329 2010-04-28 15:38:01Z attilio $");
 
 #include <sys/param.h>
 #include <sys/proc.h>

Modified: trunk/sys/i386/i386/k6_mem.c
===================================================================
--- trunk/sys/i386/i386/k6_mem.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/k6_mem.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999 Brian Fundakowski Feldman
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/k6_mem.c 189903 2009-03-17 00:48:11Z jkim $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/i386/i386/legacy.c
===================================================================
--- trunk/sys/i386/i386/legacy.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/legacy.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 1998 Massachusetts Institute of Technology
  *

Modified: trunk/sys/i386/i386/locore.s
===================================================================
--- trunk/sys/i386/i386/locore.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/locore.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -30,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/locore.s 286878 2015-08-18 09:09:39Z kib $
  *
  *		originally from: locore.s, by William F. Jolitz
  *
@@ -99,7 +100,7 @@
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	.globl	IdlePDPT
 IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
 #endif
@@ -281,7 +282,7 @@
 1:
 
 /* Now enable paging */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	%eax, %cr3
 	movl	%cr4, %eax
@@ -302,18 +303,15 @@
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
-			/* bootstrap stack end location */
-	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
 
+	/*
+	 * Only use bottom page for init386().  init386() calculates the
+	 * PCB + FPU save area size and returns the true top of stack.
+	 */
+	leal	PAGE_SIZE(%eax),%esp
+
 	xorl	%ebp,%ebp		/* mark end of frames */
 
-#ifdef PAE
-	movl	IdlePDPT,%esi
-#else
-	movl	IdlePTD,%esi
-#endif
-	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
-
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
@@ -324,6 +322,9 @@
 	 */
 	addl	$4,%esp
 
+	/* Switch to true top of stack. */
+	movl	%eax,%esp
+
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
@@ -722,7 +723,7 @@
 	movl	%esi,R(KPTmap)
 
 /* Allocate Page Table Directory */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	/* XXX only need 32 bytes (easier for now) */
 	ALLOCPAGES(1)
 	movl	%esi,R(IdlePDPT)
@@ -731,7 +732,7 @@
 	movl	%esi,R(IdlePTD)
 
 /* Allocate KSTACK */
-	ALLOCPAGES(KSTACK_PAGES)
+	ALLOCPAGES(TD0_KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
@@ -775,8 +776,7 @@
  * if we've enabled PSE above, we'll just switch the corresponding kernel
  * PDEs before we turn on paging.
  *
- * XXX: We waste some pages here in the PSE case!  DON'T BLINDLY REMOVE
- * THIS!  SMP needs the page table to be there to map the kernel P==V.
+ * XXX: We waste some pages here in the PSE case!
  */
 	xorl	%eax, %eax
 	movl	R(KERNend),%ecx
@@ -789,7 +789,7 @@
 	fillkptphys($PG_RW)
 
 /* Map page directory. */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
@@ -801,7 +801,7 @@
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
-	movl	$(KSTACK_PAGES), %ecx
+	movl	$(TD0_KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
@@ -891,7 +891,7 @@
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePTD), %eax
 	xorl	%ebx, %ebx
 	movl	$NPGPTD, %ecx


Property changes on: trunk/sys/i386/i386/locore.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/longrun.c
===================================================================
--- trunk/sys/i386/i386/longrun.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/longrun.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Tamotsu Hattori.
  * Copyright (c) 2001 Mitsuru IWASAKI.
@@ -34,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/longrun.c 214346 2010-10-25 15:28:03Z jhb $");
 
 #include "opt_cpu.h"
 

Modified: trunk/sys/i386/i386/machdep.c
===================================================================
--- trunk/sys/i386/i386/machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
@@ -38,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/machdep.c 325543 2017-11-08 11:39:42Z kib $");
 
 #include "opt_apic.h"
 #include "opt_atalk.h"
@@ -54,6 +55,7 @@
 #include "opt_mp_watchdog.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
+#include "opt_platform.h"
 #include "opt_xbox.h"
 #include "opt_kdtrace.h"
 
@@ -81,6 +83,7 @@
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
@@ -110,7 +113,11 @@
 #include <ddb/db_sym.h>
 #endif
 
+#ifdef PC98
+#include <pc98/pc98/pc98_machdep.h>
+#else
 #include <isa/rtc.h>
+#endif
 
 #include <net/netisr.h>
 
@@ -137,6 +144,9 @@
 #ifdef SMP
 #include <machine/smp.h>
 #endif
+#ifdef FDT
+#include <x86/fdt.h>
+#endif
 
 #ifdef DEV_APIC
 #include <machine/apicvar.h>
@@ -155,9 +165,8 @@
 
 #ifdef XEN
 /* XEN includes */
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/xen_intr.h>
@@ -175,13 +184,9 @@
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
-extern void init386(int first);
+extern register_t init386(int first);
 extern void dblfault_handler(void);
 
-extern void printcpuinfo(void);	/* XXX header file */
-extern void finishidentcpu(void);
-extern void panicifcpuunsupported(void);
-
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
@@ -191,8 +196,10 @@
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpusave, size_t xfpusave_len);
+static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpustate, size_t xfpustate_len);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -210,6 +217,14 @@
 int	_udatasel, _ucodesel;
 u_int	basemem;
 
+#ifdef PC98
+int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
+int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
+
+static int	ispc98 = 1;
+SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
+#endif
+
 int cold = 1;
 
 #ifdef COMPAT_43
@@ -256,7 +271,8 @@
 {
 	uintmax_t memsize;
 	char *sysenv;
-	
+
+#ifndef PC98
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
@@ -269,9 +285,11 @@
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
+		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
+		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
@@ -280,6 +298,7 @@
 		}
 		freeenv(sysenv);
 	}
+#endif /* !PC98 */
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
@@ -290,7 +309,6 @@
 #ifdef PERFMON
 	perfmon_init();
 #endif
-	realmem = Maxmem;
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
@@ -304,6 +322,7 @@
 	if (memsize < ptoa((uintmax_t)cnt.v_free_count))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
+	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
@@ -338,11 +357,6 @@
 #ifndef XEN
 	cpu_setregs();
 #endif
-
-	/*
-	 * Add BSP as an interrupt target.
-	 */
-	intr_add_cpu(0);
 }
 
 /*
@@ -349,7 +363,7 @@
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
+ * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
@@ -387,10 +401,6 @@
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
-	/* Translate the signal if appropriate. */
-	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
-		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
@@ -538,10 +548,6 @@
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
-	/* Translate the signal if appropriate. */
-	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
-		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
@@ -628,6 +634,8 @@
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
+	char *xfpusave;
+	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
@@ -652,6 +660,18 @@
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
+#ifdef CPU_ENABLE_SSE
+	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
+		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
+		xfpusave = __builtin_alloca(xfpusave_len);
+	} else {
+#else
+	{
+#endif
+		xfpusave_len = 0;
+		xfpusave = NULL;
+	}
+
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
@@ -662,7 +682,7 @@
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
-	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
@@ -673,7 +693,6 @@
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
-	sf.sf_uc.uc_mcontext.mc_flags = 0;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
@@ -681,13 +700,19 @@
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
-		sp = td->td_sigstk.ss_sp +
-		    td->td_sigstk.ss_size - sizeof(struct sigframe);
+		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
-		sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+		sp = (char *)regs->tf_esp - 128;
+	if (xfpusave != NULL) {
+		sp -= xfpusave_len;
+		sp = (char *)((unsigned int)sp & ~0x3F);
+		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+	}
+	sp -= sizeof(struct sigframe);
+
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
@@ -748,7 +773,10 @@
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
-	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+	    (xfpusave != NULL && copyout(xfpusave,
+	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
@@ -758,6 +786,8 @@
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
+	if (regs->tf_eip == 0)
+		regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
@@ -841,17 +871,7 @@
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
-		/*
-		 * XXX do allow users to change the privileged flag PSL_RF.
-		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
-		 * should sometimes set it there too.  tf_eflags is kept in
-		 * the signal context during signal handling and there is no
-		 * other place to remember it, so the PSL_RF bit may be
-		 * corrupted by the signal handler without us knowing.
-		 * Corruption of the PSL_RF bit at worst causes one more or
-		 * one less debugger trap, so allowing it is fairly harmless.
-		 */
-		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 	    		return (EINVAL);
 		}
 
@@ -967,17 +987,7 @@
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
-		/*
-		 * XXX do allow users to change the privileged flag PSL_RF.
-		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
-		 * should sometimes set it there too.  tf_eflags is kept in
-		 * the signal context during signal handling and there is no
-		 * other place to remember it, so the PSL_RF bit may be
-		 * corrupted by the signal handler without us knowing.
-		 * Corruption of the PSL_RF bit at worst causes one more or
-		 * one less debugger trap, so allowing it is fairly harmless.
-		 */
-		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
@@ -1026,15 +1036,25 @@
 	} */ *uap;
 {
 	ucontext_t uc;
+	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
+	char *xfpustate;
+	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
+	p = td->td_proc;
+
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
+	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
+		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
+		    td->td_name, ucp->uc_mcontext.mc_flags);
+		return (EINVAL);
+	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
@@ -1081,17 +1101,7 @@
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
-		/*
-		 * XXX do allow users to change the privileged flag PSL_RF.
-		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
-		 * should sometimes set it there too.  tf_eflags is kept in
-		 * the signal context during signal handling and there is no
-		 * other place to remember it, so the PSL_RF bit may be
-		 * corrupted by the signal handler without us knowing.
-		 * Corruption of the PSL_RF bit at worst causes one more or
-		 * one less debugger trap, so allowing it is fairly harmless.
-		 */
-		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
@@ -1115,7 +1125,30 @@
 			return (EINVAL);
 		}
 
-		ret = set_fpcontext(td, &ucp->uc_mcontext);
+		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+			if (xfpustate_len > cpu_max_ext_state_size -
+			    sizeof(union savefpu)) {
+				uprintf(
+			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+				    p->p_pid, td->td_name, xfpustate_len);
+				return (EINVAL);
+			}
+			xfpustate = __builtin_alloca(xfpustate_len);
+			error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+			    xfpustate, xfpustate_len);
+			if (error != 0) {
+				uprintf(
+	"pid %d (%s): sigreturn copying xfpustate failed\n",
+				    p->p_pid, td->td_name);
+				return (error);
+			}
+		} else {
+			xfpustate = NULL;
+			xfpustate_len = 0;
+		}
+		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
+		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
@@ -1216,6 +1249,13 @@
 
 #ifdef XEN
 
+static void
+idle_block(void)
+{
+
+	HYPERVISOR_sched_op(SCHEDOP_block, 0);
+}
+
 void
 cpu_halt(void)
 {
@@ -1225,7 +1265,7 @@
 int scheduler_running;
 
 static void
-cpu_idle_hlt(int busy)
+cpu_idle_hlt(sbintime_t sbt)
 {
 
 	scheduler_running = 1;
@@ -1241,12 +1281,12 @@
 cpu_halt(void)
 {
 	for (;;)
-		__asm__ ("hlt");
+		halt();
 }
 
 #endif
 
-void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
+void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
 static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
@@ -1257,34 +1297,51 @@
 #define	STATE_MWAIT	0x1
 #define	STATE_SLEEPING	0x2
 
+#ifndef PC98
 static void
-cpu_idle_acpi(int busy)
+cpu_idle_acpi(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
+
+	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
-		cpu_idle_hook();
+		cpu_idle_hook(sbt);
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
+#endif /* !PC98 */
 
 #ifndef XEN
 static void
-cpu_idle_hlt(int busy)
+cpu_idle_hlt(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
+
 	/*
-	 * We must absolutely guarentee that hlt is the next instruction
-	 * after sti or we introduce a timing window.
+	 * Since we may be in a critical section from cpu_idle(), if
+	 * an interrupt fires during that critical section we may have
+	 * a pending preemption.  If the CPU halts, then that thread
+	 * may not execute until a later interrupt awakens the CPU.
+	 * To handle this race, check for a runnable thread after
+	 * disabling interrupts and immediately return if one is
+	 * found.  Also, we must absolutely guarentee that hlt is
+	 * the next instruction after sti.  This ensures that any
+	 * interrupt that fires after the call to disable_intr() will
+	 * immediately awaken the CPU from hlt.  Finally, please note
+	 * that on x86 this works fine because of interrupts enabled only
+	 * after the instruction following sti takes place, while IF is set
+	 * to 1 immediately, allowing hlt instruction to acknowledge the
+	 * interrupt.
 	 */
 	disable_intr();
 	if (sched_runnable())
@@ -1295,32 +1352,31 @@
 }
 #endif
 
-/*
- * MWAIT cpu power states.  Lower 4 bits are sub-states.
- */
-#define	MWAIT_C0	0xf0
-#define	MWAIT_C1	0x00
-#define	MWAIT_C2	0x10
-#define	MWAIT_C3	0x20
-#define	MWAIT_C4	0x30
-
 static void
-cpu_idle_mwait(int busy)
+cpu_idle_mwait(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_MWAIT;
-	if (!sched_runnable()) {
-		cpu_monitor(state, 0, 0);
-		if (*state == STATE_MWAIT)
-			cpu_mwait(0, MWAIT_C1);
+
+	/* See comments in cpu_idle_hlt(). */
+	disable_intr();
+	if (sched_runnable()) {
+		enable_intr();
+		*state = STATE_RUNNING;
+		return;
 	}
+	cpu_monitor(state, 0, 0);
+	if (*state == STATE_MWAIT)
+		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
+	else
+		enable_intr();
 	*state = STATE_RUNNING;
 }
 
 static void
-cpu_idle_spin(int busy)
+cpu_idle_spin(sbintime_t sbt)
 {
 	int *state;
 	int i;
@@ -1327,6 +1383,12 @@
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_RUNNING;
+
+	/*
+	 * The sched_runnable() call is racy but as long as there is
+	 * a loop missing it one time will have just a little impact if any 
+	 * (and it is much better than missing the check at all).
+	 */
 	for (i = 0; i < 1000; i++) {
 		if (sched_runnable())
 			return;
@@ -1363,10 +1425,10 @@
 	}
 }
 
-#ifdef XEN
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+#if defined(PC98) || defined(XEN)
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
 #else
-void (*cpu_idle_fn)(int) = cpu_idle_acpi;
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
 #endif
 
 void
@@ -1375,6 +1437,7 @@
 #ifndef XEN
 	uint64_t msr;
 #endif
+	sbintime_t sbt = -1;
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
@@ -1394,12 +1457,12 @@
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
-		cpu_idleclock();
+		sbt = cpu_idleclock();
 	}
 
 #ifndef XEN
 	/* Apply AMD APIC timer C1E workaround. */
-	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+	if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
@@ -1407,7 +1470,7 @@
 #endif
 
 	/* Call main idle method. */
-	cpu_idle_fn(busy);
+	cpu_idle_fn(sbt);
 
 	/* Switch timers mack into active mode. */
 	if (!busy) {
@@ -1450,7 +1513,9 @@
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
 	{ cpu_idle_hlt, "hlt" },
+#ifndef PC98
 	{ cpu_idle_acpi, "acpi" },
+#endif
 	{ NULL, NULL }
 };
 
@@ -1467,9 +1532,11 @@
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
+#ifndef PC98
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
+#endif
 		p += sprintf(p, "%s%s", p != avail ? ", " : "",
 		    idle_tbl[i].id_name);
 	}
@@ -1504,9 +1571,11 @@
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
+#ifndef PC98
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
+#endif
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
 		cpu_idle_fn = idle_tbl[i].id_fn;
@@ -1518,22 +1587,6 @@
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
-uint64_t (*atomic_load_acq_64)(volatile uint64_t *) =
-    atomic_load_acq_64_i386;
-void (*atomic_store_rel_64)(volatile uint64_t *, uint64_t) =
-    atomic_store_rel_64_i386;
-
-static void
-cpu_probe_cmpxchg8b(void)
-{
-
-	if ((cpu_feature & CPUID_CX8) != 0 ||
-	    cpu_vendor_id == CPU_VENDOR_RISE) {
-		atomic_load_acq_64 = atomic_load_acq_64_i586;
-		atomic_store_rel_64 = atomic_store_rel_64_i586;
-	}
-}
-
 /*
  * Reset registers to default values on exec.
  */
@@ -1585,17 +1638,9 @@
 			 */
 		        reset_dbregs();
                 }
-                pcb->pcb_flags &= ~PCB_DBREGS;
+		pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
-	/*
-	 * Initialize the math emulator (if any) for the current process.
-	 * Actually, just clear the bit that says that the emulator has
-	 * been initialized.  Initialization is delayed until the process
-	 * traps to the emulator (if it is done at all) mainly because
-	 * emulators don't provide an entry point for initialization.
-	 */
-	td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
@@ -1644,6 +1689,10 @@
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
+static char bootmethod[16] = "BIOS";
+SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
+    "System firmware boot method");
+
 /*
  * Initialize 386 and configure to run kernel
  */
@@ -1666,10 +1715,6 @@
 struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
-#endif
-
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
@@ -1946,6 +1991,9 @@
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
 #endif
+#ifdef XENHVM
+	IDTVEC(xen_intr_upcall),
+#endif
 	IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 #ifdef DDB
@@ -1988,7 +2036,30 @@
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
+	if (rcr4() & CR4_XSAVE)
+		db_printf("xcr0\t0x%016llx\n", rxcr(0));
+	if (amd_feature & (AMDID_NX | AMDID_LM))
+		db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
+	if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
+		db_printf("FEATURES_CTL\t0x%016llx\n",
+		    rdmsr(MSR_IA32_FEATURE_CONTROL));
+	if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
+	    cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
+		db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
+	if (cpu_feature & CPUID_PAT)
+		db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 }
+
+DB_SHOW_COMMAND(dbregs, db_show_dbregs)
+{
+
+	db_printf("dr0\t0x%08x\n", rdr0());
+	db_printf("dr1\t0x%08x\n", rdr1());
+	db_printf("dr2\t0x%08x\n", rdr2());
+	db_printf("dr3\t0x%08x\n", rdr3());
+	db_printf("dr6\t0x%08x\n", rdr6());
+	db_printf("dr7\t0x%08x\n", rdr7());	
+}
 #endif
 
 void
@@ -2005,7 +2076,7 @@
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
-#ifndef XEN
+#if !defined(PC98) && !defined(XEN)
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
@@ -2084,7 +2155,9 @@
 	physmap[insert_idx + 1] = smap->base + smap->length;
 	return (1);
 }
+#endif /* !PC98 && !XEN */
 
+#ifndef XEN
 static void
 basemem_setup(void)
 {
@@ -2132,7 +2205,7 @@
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 }
-#endif
+#endif /* !XEN */
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
@@ -2147,9 +2220,274 @@
  *
  * XXX first should be vm_paddr_t.
  */
+#ifdef PC98
 static void
 getmemsize(int first)
 {
+	int off, physmap_idx, pa_indx, da_indx;
+	u_long physmem_tunable, memtest;
+	vm_paddr_t physmap[PHYSMAP_SIZE];
+	pt_entry_t *pte;
+	quad_t dcons_addr, dcons_size;
+	int i;
+	int pg_n;
+	u_int extmem;
+	u_int under16;
+	vm_paddr_t pa;
+
+	bzero(physmap, sizeof(physmap));
+
+	/* XXX - some of EPSON machines can't use PG_N */
+	pg_n = PG_N;
+	if (pc98_machine_type & M_EPSON_PC98) {
+		switch (epson_machine_id) {
+#ifdef WB_CACHE
+		default:
+#endif
+		case EPSON_PC486_HX:
+		case EPSON_PC486_HG:
+		case EPSON_PC486_HA:
+			pg_n = 0;
+			break;
+		}
+	}
+
+	under16 = pc98_getmemsize(&basemem, &extmem);
+	basemem_setup();
+
+	physmap[0] = 0;
+	physmap[1] = basemem * 1024;
+	physmap_idx = 2;
+	physmap[physmap_idx] = 0x100000;
+	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
+
+	/*
+	 * Now, physmap contains a map of physical memory.
+	 */
+
+#ifdef SMP
+	/* make hole for AP bootstrap code */
+	physmap[1] = mp_bootaddress(physmap[1]);
+#endif
+
+	/*
+	 * Maxmem isn't the "maximum memory", it's one larger than the
+	 * highest page of the physical address space.  It should be
+	 * called something like "Maxphyspage".  We may adjust this 
+	 * based on ``hw.physmem'' and the results of the memory test.
+	 */
+	Maxmem = atop(physmap[physmap_idx + 1]);
+
+#ifdef MAXMEM
+	Maxmem = MAXMEM / 4;
+#endif
+
+	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
+		Maxmem = atop(physmem_tunable);
+
+	/*
+	 * By default keep the memtest enabled.  Use a general name so that
+	 * one could eventually do more with the code than just disable it.
+	 */
+	memtest = 1;
+	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
+
+	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
+	    (boothowto & RB_VERBOSE))
+		printf("Physical memory use set to %ldK\n", Maxmem * 4);
+
+	/*
+	 * If Maxmem has been increased beyond what the system has detected,
+	 * extend the last memory segment to the new limit.
+	 */ 
+	if (atop(physmap[physmap_idx + 1]) < Maxmem)
+		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
+
+	/*
+	 * We need to divide chunk if Maxmem is larger than 16MB and
+	 * under 16MB area is not full of memory.
+	 * (1) system area (15-16MB region) is cut off
+	 * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
+	 */
+	if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
+		/* 15M - 16M region is cut off, so need to divide chunk */
+		physmap[physmap_idx + 1] = under16 * 1024;
+		physmap_idx += 2;
+		physmap[physmap_idx] = 0x1000000;
+		physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
+	}
+
+	/* call pmap initialization to make new kernel address space */
+	pmap_bootstrap(first);
+
+	/*
+	 * Size up each available chunk of physical memory.
+	 */
+	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
+	pa_indx = 0;
+	da_indx = 1;
+	phys_avail[pa_indx++] = physmap[0];
+	phys_avail[pa_indx] = physmap[0];
+	dump_avail[da_indx] = physmap[0];
+	pte = CMAP3;
+
+	/*
+	 * Get dcons buffer address
+	 */
+	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
+	    getenv_quad("dcons.size", &dcons_size) == 0)
+		dcons_addr = 0;
+
+	/*
+	 * physmap is in bytes, so when converting to page boundaries,
+	 * round up the start address and round down the end address.
+	 */
+	for (i = 0; i <= physmap_idx; i += 2) {
+		vm_paddr_t end;
+
+		end = ptoa((vm_paddr_t)Maxmem);
+		if (physmap[i + 1] < end)
+			end = trunc_page(physmap[i + 1]);
+		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
+			int tmp, page_bad, full;
+			int *ptr = (int *)CADDR3;
+
+			full = FALSE;
+			/*
+			 * block out kernel memory as not available.
+			 */
+			if (pa >= KERNLOAD && pa < first)
+				goto do_dump_avail;
+
+			/*
+			 * block out dcons buffer
+			 */
+			if (dcons_addr > 0
+			    && pa >= trunc_page(dcons_addr)
+			    && pa < dcons_addr + dcons_size)
+				goto do_dump_avail;
+
+			page_bad = FALSE;
+			if (memtest == 0)
+				goto skip_memtest;
+
+			/*
+			 * map page into kernel: valid, read/write,non-cacheable
+			 */
+			*pte = pa | PG_V | PG_RW | pg_n;
+			invltlb();
+
+			tmp = *(int *)ptr;
+			/*
+			 * Test for alternating 1's and 0's
+			 */
+			*(volatile int *)ptr = 0xaaaaaaaa;
+			if (*(volatile int *)ptr != 0xaaaaaaaa)
+				page_bad = TRUE;
+			/*
+			 * Test for alternating 0's and 1's
+			 */
+			*(volatile int *)ptr = 0x55555555;
+			if (*(volatile int *)ptr != 0x55555555)
+				page_bad = TRUE;
+			/*
+			 * Test for all 1's
+			 */
+			*(volatile int *)ptr = 0xffffffff;
+			if (*(volatile int *)ptr != 0xffffffff)
+				page_bad = TRUE;
+			/*
+			 * Test for all 0's
+			 */
+			*(volatile int *)ptr = 0x0;
+			if (*(volatile int *)ptr != 0x0)
+				page_bad = TRUE;
+			/*
+			 * Restore original value.
+			 */
+			*(int *)ptr = tmp;
+
+skip_memtest:
+			/*
+			 * Adjust array of valid/good pages.
+			 */
+			if (page_bad == TRUE)
+				continue;
+			/*
+			 * If this good page is a continuation of the
+			 * previous set of good pages, then just increase
+			 * the end pointer. Otherwise start a new chunk.
+			 * Note that "end" points one higher than end,
+			 * making the range >= start and < end.
+			 * If we're also doing a speculative memory
+			 * test and we at or past the end, bump up Maxmem
+			 * so that we keep going. The first bad page
+			 * will terminate the loop.
+			 */
+			if (phys_avail[pa_indx] == pa) {
+				phys_avail[pa_indx] += PAGE_SIZE;
+			} else {
+				pa_indx++;
+				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
+					printf(
+		"Too many holes in the physical address space, giving up\n");
+					pa_indx--;
+					full = TRUE;
+					goto do_dump_avail;
+				}
+				phys_avail[pa_indx++] = pa;	/* start */
+				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
+			}
+			physmem++;
+do_dump_avail:
+			if (dump_avail[da_indx] == pa) {
+				dump_avail[da_indx] += PAGE_SIZE;
+			} else {
+				da_indx++;
+				if (da_indx == DUMP_AVAIL_ARRAY_END) {
+					da_indx--;
+					goto do_next;
+				}
+				dump_avail[da_indx++] = pa;	/* start */
+				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
+			}
+do_next:
+			if (full)
+				break;
+		}
+	}
+	*pte = 0;
+	invltlb();
+	
+	/*
+	 * XXX
+	 * The last chunk must contain at least one page plus the message
+	 * buffer to avoid complicating other code (message buffer address
+	 * calculation, etc.).
+	 */
+	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
+	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
+		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
+		phys_avail[pa_indx--] = 0;
+		phys_avail[pa_indx--] = 0;
+	}
+
+	Maxmem = atop(phys_avail[pa_indx]);
+
+	/* Trim off space for the message buffer. */
+	phys_avail[pa_indx] -= round_page(msgbufsize);
+
+	/* Map the message buffer. */
+	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
+		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
+		    off);
+
+	PT_UPDATES_FLUSH();
+}
+#else /* PC98 */
+static void
+getmemsize(int first)
+{
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long physmem_tunable, memtest;
 	vm_paddr_t physmap[PHYSMAP_SIZE];
@@ -2156,7 +2494,7 @@
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size;
 #ifndef XEN
-	int hasbrokenint12, i;
+	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
@@ -2241,7 +2579,8 @@
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
-	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
@@ -2391,7 +2730,7 @@
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
-	pte = CMAP1;
+	pte = CMAP3;
 
 	/*
 	 * Get dcons buffer address
@@ -2413,7 +2752,7 @@
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
-			int *ptr = (int *)CADDR1;
+			int *ptr = (int *)CADDR3;
 
 			full = FALSE;
 			/*
@@ -2554,18 +2893,21 @@
 
 	PT_UPDATES_FLUSH();
 }
+#endif /* PC98 */
 
 #ifdef XEN
 #define MTOPSIZE (1<<(14 + PAGE_SHIFT))
 
-void
+register_t
 init386(first)
 	int first;
 {
 	unsigned long gdtmachpfn;
 	int error, gsel_tss, metadata_missing, x, pa;
-	size_t kstack0_sz;
 	struct pcpu *pc;
+#ifdef CPU_ENABLE_SSE
+	struct xstate_hdr *xhdr;
+#endif
 	struct callback_register event = {
 		.type = CALLBACKTYPE_event,
 		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
@@ -2577,8 +2919,6 @@
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
-	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
-	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
 	/*
  	 * This may be done better later if it gets more high level
@@ -2658,7 +2998,6 @@
 
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
-	PCPU_SET(curpcb, thread0.td_pcb);
 
 	/*
 	 * Initialize mutexes.
@@ -2739,16 +3078,8 @@
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
+	initializecpucache();
 
-	/* make an initial tss so cpu can get interrupt stack on syscall! */
-	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
-	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
-	    kstack0_sz - sizeof(struct pcb) - 16);
-	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
-	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
-	    PCPU_GET(common_tss.tss_esp0));
-	
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
@@ -2756,7 +3087,7 @@
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	dblfault_tss.tss_cr3 = (int)IdlePDPT;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
@@ -2776,6 +3107,32 @@
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+	npxinit(true);
+#endif
+	/*
+	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
+	 * area size.  Zero out the extended state header in fpu save
+	 * area.
+	 */
+	thread0.td_pcb = get_pcb_td(&thread0);
+	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+#ifdef CPU_ENABLE_SSE
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+		    1);
+		xhdr->xstate_bv = xsave_mask;
+	}
+#endif
+	PCPU_SET(curpcb, thread0.td_pcb);
+	/* make an initial tss so cpu can get interrupt stack on syscall! */
+	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
+	PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
+	    PCPU_GET(common_tss.tss_esp0));
+	
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -2783,7 +3140,7 @@
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
@@ -2794,23 +3151,24 @@
 	thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
 
 	cpu_probe_amdc1e();
-	cpu_probe_cmpxchg8b();
+
+	/* Location of kernel stack for locore */
+	return ((register_t)thread0.td_pcb);
 }
 
 #else
-void
-init386(first)
-	int first;
+register_t
+init386(int first)
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, x, pa;
-	size_t kstack0_sz;
 	struct pcpu *pc;
+#ifdef CPU_ENABLE_SSE
+	struct xstate_hdr *xhdr;
+#endif
 
 	thread0.td_kstack = proc0kstack;
-	thread0.td_kstack_pages = KSTACK_PAGES;
-	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
-	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
+	thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 
 	/*
  	 * This may be done better later if it gets more high level
@@ -2818,6 +3176,13 @@
 	 */
 	proc_linkup0(&proc0, &thread0);
 
+#ifdef PC98
+	/*
+	 * Initialize DMAC
+	 */
+	pc98_init_dmac();
+#endif
+
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
@@ -2825,11 +3190,16 @@
 	} else {
 		metadata_missing = 1;
 	}
-	if (envmode == 1)
-		kern_envp = static_env;
-	else if (bootinfo.bi_envp)
-		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
+	if (bootinfo.bi_envp)
+		init_static_kenv((caddr_t)bootinfo.bi_envp + KERNBASE, 0);
+	else
+		init_static_kenv(NULL, 0);
+
+#ifndef XEN
+	identify_hypervisor();
+#endif
+
 	/* Init basic tunables, hz etc */
 	init_param1();
 
@@ -2864,7 +3234,6 @@
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
-	PCPU_SET(curpcb, thread0.td_pcb);
 
 	/*
 	 * Initialize mutexes.
@@ -2934,6 +3303,10 @@
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
+#ifdef XENHVM
+	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+#endif
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
@@ -2967,6 +3340,40 @@
 	 */
 	i8254_init();
 
+	finishidentcpu();	/* Final stage of CPU initialization */
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	initializecpu();	/* Initialize CPU registers */
+	initializecpucache();
+
+	/* pointer to selector slot for %fs/%gs */
+	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+
+	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#if defined(PAE) || defined(PAE_TABLES)
+	dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
+	dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
+	dblfault_tss.tss_eip = (int)dblfault_handler;
+	dblfault_tss.tss_eflags = PSL_KERNEL;
+	dblfault_tss.tss_ds = dblfault_tss.tss_es =
+	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+	vm86_initialize();
+	getmemsize(first);
+	init_param2(physmem);
+
+	/* now running on new page tables, configured,and u/iom is accessible */
+
 	/*
 	 * Initialize the console before we print anything out.
 	 */
@@ -2977,7 +3384,9 @@
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
+#ifndef PC98
 	elcr_probe();
+#endif
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
@@ -3006,17 +3415,29 @@
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 
-	finishidentcpu();	/* Final stage of CPU initialization */
-	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-	initializecpu();	/* Initialize CPU registers */
-
+	msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+	npxinit(true);
+#endif
+	/*
+	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
+	 * area size.  Zero out the extended state header in fpu save
+	 * area.
+	 */
+	thread0.td_pcb = get_pcb_td(&thread0);
+	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
+	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+#ifdef CPU_ENABLE_SSE
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+		    1);
+		xhdr->xstate_bv = xsave_mask;
+	}
+#endif
+	PCPU_SET(curpcb, thread0.td_pcb);
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
-	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
-	    kstack0_sz - sizeof(struct pcb) - 16);
+	PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
@@ -3024,34 +3445,6 @@
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
-	/* pointer to selector slot for %fs/%gs */
-	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
-	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
-	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
-	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
-	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#ifdef PAE
-	dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
-	dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
-	dblfault_tss.tss_eip = (int)dblfault_handler;
-	dblfault_tss.tss_eflags = PSL_KERNEL;
-	dblfault_tss.tss_ds = dblfault_tss.tss_es =
-	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
-	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
-	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
-	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
-	vm86_initialize();
-	getmemsize(first);
-	init_param2(physmem);
-
-	/* now running on new page tables, configured,and u/iom is accessible */
-
-	msgbufinit(msgbufp, msgbufsize);
-
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
@@ -3076,7 +3469,7 @@
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
@@ -3085,7 +3478,13 @@
 	thread0.td_frame = &proc0_tf;
 
 	cpu_probe_amdc1e();
-	cpu_probe_cmpxchg8b();
+
+#ifdef FDT
+	x86_init_fdt();
+#endif
+
+	/* Location of kernel stack for locore */
+	return ((register_t)thread0.td_pcb);
 }
 #endif
 
@@ -3096,6 +3495,46 @@
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
+#ifndef PC98
+static int
+smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+	struct bios_smap *smapbase;
+	struct bios_smap_xattr smap;
+	caddr_t kmdp;
+	uint32_t *smapattr;
+	int count, error, i;
+
+	/* Retrieve the system memory map from the loader. */
+	kmdp = preload_search_by_type("elf kernel");
+	if (kmdp == NULL)
+		kmdp = preload_search_by_type("elf32 kernel");
+	if (kmdp == NULL)
+		return (0);
+	smapbase = (struct bios_smap *)preload_search_info(kmdp,
+	    MODINFO_METADATA | MODINFOMD_SMAP);
+	if (smapbase == NULL)
+		return (0);
+	smapattr = (uint32_t *)preload_search_info(kmdp,
+	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
+	count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
+	error = 0;
+	for (i = 0; i < count; i++) {
+		smap.base = smapbase[i].base;
+		smap.length = smapbase[i].length;
+		smap.type = smapbase[i].type;
+		if (smapattr != NULL)
+			smap.xattr = smapattr[i];
+		else
+			smap.xattr = 0;
+		error = SYSCTL_OUT(req, &smap, sizeof(smap));
+	}
+	return (error);
+}
+SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
+    smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
+#endif /* !PC98 */
+
 void
 spinlock_enter(void)
 {
@@ -3143,9 +3582,9 @@
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
-	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
+	tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
 	if (tmp == 0)
-		panic("kmem_alloc returned 0");
+		panic("kmem_malloc returned 0");
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor*)
@@ -3154,9 +3593,7 @@
 	r_idt.rd_base = (u_int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
-	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
-			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
-		panic("vm_map_protect failed");
+	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
@@ -3177,6 +3614,7 @@
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
+	pcb->pcb_gs = rgs();
 }
 
 int
@@ -3330,11 +3768,11 @@
 #endif
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
-		fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
+		fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 #endif /* CPU_ENABLE_SSE */
-		bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs,
+		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
@@ -3346,10 +3784,10 @@
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		set_fpregs_xmm((struct save87 *)fpregs,
-		    &td->td_pcb->pcb_user_save.sv_xmm);
+		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 #endif /* CPU_ENABLE_SSE */
-		bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87,
+		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 #ifdef DEV_NPX
 	npxuserinited(td);
@@ -3395,12 +3833,14 @@
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
-	get_fpcontext(td, mcp);
+	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
+	mcp->mc_xfpustate = 0;
+	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
@@ -3412,40 +3852,57 @@
  * touch the cs selector.
  */
 int
-set_mcontext(struct thread *td, const mcontext_t *mcp)
+set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
+	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
-	if (mcp->mc_len != sizeof(*mcp))
+	if (mcp->mc_len != sizeof(*mcp) ||
+	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
-	if ((ret = set_fpcontext(td, mcp)) == 0) {
-		tp->tf_fs = mcp->mc_fs;
-		tp->tf_es = mcp->mc_es;
-		tp->tf_ds = mcp->mc_ds;
-		tp->tf_edi = mcp->mc_edi;
-		tp->tf_esi = mcp->mc_esi;
-		tp->tf_ebp = mcp->mc_ebp;
-		tp->tf_ebx = mcp->mc_ebx;
-		tp->tf_edx = mcp->mc_edx;
-		tp->tf_ecx = mcp->mc_ecx;
-		tp->tf_eax = mcp->mc_eax;
-		tp->tf_eip = mcp->mc_eip;
-		tp->tf_eflags = eflags;
-		tp->tf_esp = mcp->mc_esp;
-		tp->tf_ss = mcp->mc_ss;
-		td->td_pcb->pcb_gs = mcp->mc_gs;
-		ret = 0;
-	}
-	return (ret);
+	if (mcp->mc_flags & _MC_HASFPXSTATE) {
+		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(union savefpu))
+			return (EINVAL);
+		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+		    mcp->mc_xfpustate_len);
+		if (ret != 0)
+			return (ret);
+	} else
+		xfpustate = NULL;
+	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+	if (ret != 0)
+		return (ret);
+	tp->tf_fs = mcp->mc_fs;
+	tp->tf_es = mcp->mc_es;
+	tp->tf_ds = mcp->mc_ds;
+	tp->tf_edi = mcp->mc_edi;
+	tp->tf_esi = mcp->mc_esi;
+	tp->tf_ebp = mcp->mc_ebp;
+	tp->tf_ebx = mcp->mc_ebx;
+	tp->tf_edx = mcp->mc_edx;
+	tp->tf_ecx = mcp->mc_ecx;
+	tp->tf_eax = mcp->mc_eax;
+	tp->tf_eip = mcp->mc_eip;
+	tp->tf_eflags = eflags;
+	tp->tf_esp = mcp->mc_esp;
+	tp->tf_ss = mcp->mc_ss;
+	td->td_pcb->pcb_gs = mcp->mc_gs;
+	return (0);
 }
 
 static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+    size_t xfpusave_len)
 {
+#ifdef CPU_ENABLE_SSE
+	size_t max_len, len;
+#endif
 
 #ifndef DEV_NPX
 	mcp->mc_fpformat = _MC_FPFMT_NODEV;
@@ -3453,15 +3910,30 @@
 	bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 #else
 	mcp->mc_ownedfp = npxgetregs(td);
-	bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0],
+	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
+#ifdef CPU_ENABLE_SSE
+	if (!use_xsave || xfpusave_len == 0)
+		return;
+	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
+	len = xfpusave_len;
+	if (len > max_len) {
+		len = max_len;
+		bzero(xfpusave + max_len, len - max_len);
+	}
+	mcp->mc_flags |= _MC_HASFPXSTATE;
+	mcp->mc_xfpustate_len = len;
+	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 #endif
+#endif
 }
 
 static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
+set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
+    size_t xfpustate_len)
 {
+	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
@@ -3468,22 +3940,21 @@
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
-	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
-	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+		error = 0;
+	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 #ifdef DEV_NPX
-#ifdef CPU_ENABLE_SSE
-		if (cpu_fxsr)
-			((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env.
-			    en_mxcsr &= cpu_mxcsr_mask;
+		error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
+		    xfpustate, xfpustate_len);
+#else
+		error = EINVAL;
 #endif
-		npxsetregs(td, (union savefpu *)&mcp->mc_fpstate);
-#endif
 	} else
 		return (EINVAL);
-	return (0);
+	return (error);
 }
 
 static void

Modified: trunk/sys/i386/i386/mem.c
===================================================================
--- trunk/sys/i386/i386/mem.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mem.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
@@ -37,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mem.c 309426 2016-12-02 19:02:12Z jhb $");
 
 /*
  * Memory special file
@@ -86,10 +87,6 @@
 	int error = 0;
 	vm_offset_t addr;
 
-	/* XXX UPS Why ? */
-	GIANT_REQUIRED;
-
-
 	if (dev2unit(dev) != CDEV_MINOR_MEM && dev2unit(dev) != CDEV_MINOR_KMEM)
 		return EIO;
 
@@ -112,8 +109,11 @@
 			continue;
 		}
 		if (dev2unit(dev) == CDEV_MINOR_MEM) {
-			pa = uio->uio_offset;
-			pa &= ~PAGE_MASK;
+			if (uio->uio_offset > cpu_getmaxphyaddr()) {
+				error = EFAULT;
+				break;
+			}
+			pa = trunc_page(uio->uio_offset);
 		} else {
 			/*
 			 * Extract the physical page since the mapping may
@@ -165,9 +165,11 @@
 memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
     int prot __unused, vm_memattr_t *memattr __unused)
 {
-	if (dev2unit(dev) == CDEV_MINOR_MEM)
+	if (dev2unit(dev) == CDEV_MINOR_MEM) {
+		if (offset > cpu_getmaxphyaddr())
+			return (-1);
 		*paddr = offset;
-	else if (dev2unit(dev) == CDEV_MINOR_KMEM)
+	} else if (dev2unit(dev) == CDEV_MINOR_KMEM)
         	*paddr = vtophys(offset);
 	/* else panic! */
 	return (0);

Modified: trunk/sys/i386/i386/minidump_machdep.c
===================================================================
--- trunk/sys/i386/i386/minidump_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/minidump_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 Peter Wemm
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/minidump_machdep.c 282065 2015-04-27 08:02:12Z kib $");
 
 #include "opt_watchdog.h"
 
@@ -265,7 +266,7 @@
 	mdhdr.bitmapsize = vm_page_dump_size;
 	mdhdr.ptesize = ptesize;
 	mdhdr.kernbase = KERNBASE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	mdhdr.paemode = 1;
 #endif
 

Modified: trunk/sys/i386/i386/mp_clock.c
===================================================================
--- trunk/sys/i386/i386/mp_clock.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_clock.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
@@ -8,7 +9,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mp_clock.c 170289 2007-06-04 18:25:08Z dwmalone $");
 
 /*-
  * Just when we thought life were beautiful, reality pops its grim face over

Modified: trunk/sys/i386/i386/mp_machdep.c
===================================================================
--- trunk/sys/i386/i386/mp_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mp_machdep.c 331910 2018-04-03 07:52:06Z avg $");
 
 #include "opt_apic.h"
 #include "opt_cpu.h"
@@ -81,6 +82,7 @@
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
+#include <machine/cpu.h>
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
@@ -145,11 +147,8 @@
 void *bootstacks[MAXCPU];
 static void *dpcpu;
 
-/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t *KPTphys;
-
 struct pcb stoppcbs[MAXCPU];
-struct pcb **susppcbs = NULL;
+struct susppcb **susppcbs;
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
@@ -169,6 +168,11 @@
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
+/* Default cpu_ops implementation. */
+struct cpu_ops cpu_ops = {
+	.ipi_vectored = lapic_ipi_vectored
+};
+
 /*
  * Local data and functions.
  */
@@ -175,6 +179,9 @@
 
 static volatile cpuset_t ipi_nmi_pending;
 
+volatile cpuset_t resuming_cpus;
+volatile cpuset_t toresume_cpus;
+
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
@@ -195,7 +202,7 @@
 int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
-static volatile u_int cpu_ipi_pending[MAXCPU];
+volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 static int cpu_logical;			/* logical cpus per core */
@@ -624,7 +631,7 @@
 	const char *hyperthread;
 	int i;
 
-	printf("MidnightBSD/SMP: %d package(s) x %d core(s)",
+	printf("FreeBSD/SMP: %d package(s) x %d core(s)",
 	    mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
 	if (hyperthreading_cpus > 1)
 	    printf(" x %d HTT threads", cpu_logical);
@@ -681,6 +688,8 @@
 	pc->pc_prvspace = pc;
 	pc->pc_curthread = 0;
 
+	fix_cpuid();
+
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
@@ -744,22 +753,15 @@
 	/* set up CPU registers and state */
 	cpu_setregs();
 
+	/* set up SSE/NX */
+	initializecpu();
+
 	/* set up FPU state on the AP */
-	npxinit();
+	npxinit(false);
 
-	/* set up SSE registers */
-	enable_sse();
+	if (cpu_ops.cpu_init)
+		cpu_ops.cpu_init();
 
-#ifdef PAE
-	/* Enable the PTE no-execute bit. */
-	if ((amd_feature & AMDID_NX) != 0) {
-		uint64_t msr;
-
-		msr = rdmsr(MSR_EFER) | EFER_NXE;
-		wrmsr(MSR_EFER, msr);
-	}
-#endif
-
 	/* A quick check from sanity claus */
 	cpuid = PCPU_GET(cpuid);
 	if (PCPU_GET(apic_id) != lapic_id()) {
@@ -799,7 +801,6 @@
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
-		smp_active = 1;	 /* historic */
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
@@ -826,6 +827,8 @@
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
 set_interrupt_apic_ids(void)
@@ -836,6 +839,8 @@
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
+		if (cpu_info[apic_id].cpu_bsp)
+			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
@@ -931,7 +936,6 @@
 #ifndef PC98
 	u_char mpbiosreason;
 #endif
-	uintptr_t kptbase;
 	u_int32_t mpbioswarmvec;
 	int apic_id, cpu, i;
 
@@ -949,11 +953,8 @@
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
-	kptbase = (uintptr_t)(void *)KPTphys;
 	for (i = TMPMAP_START; i < NKPT; i++)
-		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
-		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
+		PTD[i] = PTD[KPTDI + i];
 	invltlb();
 
 	/* start each AP */
@@ -962,8 +963,10 @@
 
 		/* allocate and set up a boot stack data page */
 		bootstacks[cpu] =
-		    (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
-		dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE);
+		    (char *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE,
+		    M_WAITOK | M_ZERO);
+		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
+		    M_WAITOK | M_ZERO);
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
@@ -1088,57 +1091,8 @@
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_naps;
 
-	/*
-	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
-	 * and running the target CPU. OR this INIT IPI might be latched (P5
-	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
-	 * ignored.
-	 */
+	ipi_startup(apic_id, vector);
 
-	/* do an INIT IPI: assert RESET */
-	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-
-	/* wait for pending status end */
-	lapic_ipi_wait(-1);
-
-	/* do an INIT IPI: deassert RESET */
-	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
-
-	/* wait for pending status end */
-	DELAY(10000);		/* wait ~10mS */
-	lapic_ipi_wait(-1);
-
-	/*
-	 * next we do a STARTUP IPI: the previous INIT IPI might still be
-	 * latched, (P5 bug) this 1st STARTUP would then terminate
-	 * immediately, and the previously started INIT IPI would continue. OR
-	 * the previous INIT IPI has already run. and this STARTUP IPI will
-	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
-	 * will run.
-	 */
-
-	/* do a STARTUP IPI */
-	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
-	    vector, apic_id);
-	lapic_ipi_wait(-1);
-	DELAY(200);		/* wait ~200uS */
-
-	/*
-	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
-	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
-	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
-	 * recognized after hardware RESET or INIT IPI.
-	 */
-
-	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
-	    vector, apic_id);
-	lapic_ipi_wait(-1);
-	DELAY(200);		/* wait ~200uS */
-
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
@@ -1185,6 +1139,69 @@
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
+	/*
+	 * This attempts to follow the algorithm described in the
+	 * Intel Multiprocessor Specification v1.4 in section B.4.
+	 * For each IPI, we allow the local APIC ~20us to deliver the
+	 * IPI.  If that times out, we panic.
+	 */
+
+	/*
+	 * first we do an INIT IPI: this INIT IPI might be run, resetting
+	 * and running the target CPU. OR this INIT IPI might be latched (P5
+	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+	 * ignored.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+	lapic_ipi_wait(100);
+
+	/* Explicitly deassert the INIT IPI. */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+	    apic_id);
+
+	DELAY(10000);		/* wait ~10mS */
+
+	/*
+	 * next we do a STARTUP IPI: the previous INIT IPI might still be
+	 * latched, (P5 bug) this 1st STARTUP would then terminate
+	 * immediately, and the previously started INIT IPI would continue. OR
+	 * the previous INIT IPI has already run. and this STARTUP IPI will
+	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+	 * will run.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	if (!lapic_ipi_wait(100))
+		panic("Failed to deliver first STARTUP IPI to APIC %d",
+		    apic_id);
+	DELAY(200);		/* wait ~200uS */
+
+	/*
+	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+	 * recognized after hardware RESET or INIT IPI.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	if (!lapic_ipi_wait(100))
+		panic("Failed to deliver second STARTUP IPI to APIC %d",
+		    apic_id);
+
+	DELAY(200);		/* wait ~200uS */
+}
+
+/*
  * Send an IPI to specified CPU handling the bitmap logic.
  */
 static void
@@ -1205,7 +1222,7 @@
 		if (old_pending)
 			return;
 	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+	cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 /*
@@ -1256,7 +1273,7 @@
 		ipi_all_but_self(vector);
 	} else {
 		ncpu = 0;
-		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+		while ((cpu = CPU_FFS(&mask)) != 0) {
 			cpu--;
 			CPU_CLR(cpu, &mask);
 			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
@@ -1405,7 +1422,7 @@
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
@@ -1456,7 +1473,7 @@
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+	cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
@@ -1517,30 +1534,114 @@
 {
 	u_int cpu;
 
+	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+
 	cpu = PCPU_GET(cpuid);
-
-	if (suspendctx(susppcbs[cpu])) {
+	if (savectx(&susppcbs[cpu]->sp_pcb)) {
+		npxsuspend(susppcbs[cpu]->sp_fpususpend);
 		wbinvd();
-		CPU_SET_ATOMIC(cpu, &stopped_cpus);
+		CPU_SET_ATOMIC(cpu, &suspended_cpus);
+		/*
+		 * Hack for xen, which does not use resumectx() so never
+		 * uses the next clause: set resuming_cpus early so that
+		 * resume_cpus() can wait on the same bitmap for acpi and
+		 * xen.  resuming_cpus now means eventually_resumable_cpus.
+		 */
+		CPU_SET_ATOMIC(cpu, &resuming_cpus);
 	} else {
+		npxresume(susppcbs[cpu]->sp_fpususpend);
 		pmap_init_pat();
+		initializecpu();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
-		susppcbs[cpu]->pcb_eip = 0;
+
+		/* Indicate that we are resuming */
+		CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	}
 
-	/* Wait for resume */
-	while (!CPU_ISSET(cpu, &started_cpus))
+	/* Wait for resume directive */
+	while (!CPU_ISSET(cpu, &toresume_cpus))
 		ia32_pause();
 
+	if (cpu_ops.cpu_resume)
+		cpu_ops.cpu_resume();
+
 	/* Resume MCA and local APIC */
 	mca_resume();
 	lapic_setup(0);
 
-	CPU_CLR_ATOMIC(cpu, &started_cpus);
-	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+	/* Indicate that we are resumed */
+	CPU_CLR_ATOMIC(cpu, &resuming_cpus);
+	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+	CPU_CLR_ATOMIC(cpu, &toresume_cpus);
 }
+
 /*
+ * Handlers for TLB related IPIs
+ */
+void
+invltlb_handler(void)
+{
+	uint64_t cr3;
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+	cr3 = rcr3();
+	load_cr3(cr3);
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlpg_handler(void)
+{
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+	invlpg(smp_tlb_addr1);
+
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlrng_handler(void)
+{
+	vm_offset_t addr;
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+	addr = smp_tlb_addr1;
+	do {
+		invlpg(addr);
+		addr += PAGE_SIZE;
+	} while (addr < smp_tlb_addr2);
+
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlcache_handler(void)
+{
+#ifdef COUNT_IPIS
+	(*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+	wbinvd();
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
+/*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */

Modified: trunk/sys/i386/i386/mp_watchdog.c
===================================================================
--- trunk/sys/i386/i386/mp_watchdog.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_watchdog.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Robert N. M. Watson
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/mp_watchdog.c 314667 2017-03-04 13:03:31Z avg $
  */
 
 #include "opt_mp_watchdog.h"
@@ -86,7 +87,7 @@
 watchdog_init(void *arg)
 {
 
-	callout_init(&watchdog_callout, CALLOUT_MPSAFE);
+	callout_init(&watchdog_callout, 1);
 	if (watchdog_cpu != -1)
 		watchdog_change(watchdog_cpu);
 }

Modified: trunk/sys/i386/i386/mpboot.s
===================================================================
--- trunk/sys/i386/i386/mpboot.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mpboot.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1995 Jack F. Vogel
  * All rights reserved.
@@ -26,7 +27,7 @@
  * mpboot.s:	FreeBSD machine support for the Intel MP Spec
  *		multiprocessor systems.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/mpboot.s 282065 2015-04-27 08:02:12Z kib $
  */
 
 #include "opt_pmap.h"
@@ -99,7 +100,7 @@
 	movl	%eax,%cr4
 
 	/* Now enable paging mode */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	%eax, %cr3
 	movl	%cr4, %eax


Property changes on: trunk/sys/i386/i386/mpboot.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/perfmon.c
===================================================================
--- trunk/sys/i386/i386/perfmon.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/perfmon.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 1996 Massachusetts Institute of Technology
  *
@@ -28,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/perfmon.c 220433 2011-04-07 23:28:28Z jkim $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/i386/i386/pmap.c
===================================================================
--- trunk/sys/i386/i386/pmap.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/pmap.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
@@ -75,18 +76,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/pmap.c 321363 2017-07-22 06:40:57Z alc $");
 
 /*
  *	Manages physical address maps.
  *
- *	In addition to hardware address maps, this
- *	module is called upon to provide software-use-only
- *	maps which may or may not be stored in the same
- *	form as hardware maps.  These pseudo-maps are
- *	used to store intermediate results from copy
- *	operations to and from address spaces.
- *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
@@ -140,6 +134,8 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
@@ -219,7 +215,7 @@
 extern u_int32_t KERNend;
 extern u_int32_t KPTphys;
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 pt_entry_t pg_nx;
 static uma_zone_t pdptzone;
 #endif
@@ -238,15 +234,18 @@
 static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
 
 /*
- * Isolate the global pv list lock from data and other locks to prevent false
- * sharing within the cache.
+ * pmap_mapdev support pre initialization (i.e. console)
  */
-static struct {
-	struct rwlock	lock;
-	char		padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
-} pvh_global __aligned(CACHE_LINE_SIZE);
+#define	PMAP_PREINIT_MAPPING_COUNT	8
+static struct pmap_preinit_mapping {
+	vm_paddr_t	pa;
+	vm_offset_t	va;
+	vm_size_t	sz;
+	int		mode;
+} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
+static int pmap_initialized;
 
-#define	pvh_global_lock	pvh_global.lock
+static struct rwlock_padalign pvh_global_lock;
 
 /*
  * Data for the pv entry allocation mechanism
@@ -271,11 +270,10 @@
 	caddr_t	CADDR2;
 };
 static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP1 = 0;
-static pt_entry_t *CMAP3;
+pt_entry_t *CMAP3;
 static pd_entry_t *KPTD;
-caddr_t CADDR1 = 0, ptvmmap = 0;
-static caddr_t CADDR3;
+caddr_t ptvmmap = 0;
+caddr_t CADDR3;
 struct msgbuf *msgbufp = 0;
 
 /*
@@ -319,7 +317,9 @@
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_flush_page(vm_page_t m);
-static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
+		    pd_entry_t pde);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
@@ -332,12 +332,12 @@
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
-    vm_page_t *free);
+    struct spglist *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
-    vm_page_t *free);
+    struct spglist *free);
 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
-    vm_page_t *free);
+    struct spglist *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
@@ -347,15 +347,16 @@
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
+static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-#ifdef PAE
-static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
+#if defined(PAE) || defined(PAE_TABLES)
+static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
+    int wait);
 #endif
 static void pmap_set_pg(void);
 
@@ -390,6 +391,15 @@
 	int i;
 
 	/*
+	 * Add a physical memory segment (vm_phys_seg) corresponding to the
+	 * preallocated kernel page table pages so that vm_page structures
+	 * representing these pages will be created.  The vm_page structures
+	 * are required for promotion of the corresponding kernel virtual
+	 * addresses to superpage mappings.
+	 */
+	vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+
+	/*
 	 * Initialize the first available kernel virtual address.  However,
 	 * using "firstaddr" may waste a few pages of the kernel virtual
 	 * address space, because locore may not have mapped every physical
@@ -405,10 +415,9 @@
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
-	kernel_pmap->pm_root = NULL;
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
@@ -450,7 +459,6 @@
 		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
 		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
 	}
-	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
 	/*
@@ -512,7 +520,14 @@
 	for (i = 1; i < NKPT; i++)
 		PTD[i] = 0;
 
-	/* Initialize the PAT MSR if present. */
+	/*
+	 * Initialize the PAT MSR if present.
+	 * pmap_init_pat() clears and sets CR4_PGE, which, as a
+	 * side-effect, invalidates stale PG_G TLB entries that might
+	 * have been created in our pre-boot environment.  We assume
+	 * that PAT support implies PGE and in reverse, PGE presence
+	 * comes with PAT.  Both features were added for Pentium Pro.
+	 */
 	pmap_init_pat();
 
 	/* Turn on PG_G on kernel page(s) */
@@ -540,7 +555,10 @@
 	pat_table[PAT_WRITE_PROTECTED] = 3;
 	pat_table[PAT_UNCACHED] = 3;
 
-	/* Bail if this CPU doesn't implement PAT. */
+	/*
+	 * Bail if this CPU doesn't implement PAT.
+	 * We assume that PAT support implies PGE.
+	 */
 	if ((cpu_feature & CPUID_PAT) == 0) {
 		for (i = 0; i < PAT_INDEX_SIZE; i++)
 			pat_index[i] = pat_table[i];
@@ -664,20 +682,20 @@
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 static void *
-pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
-	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
+	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL,
 	    0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
 }
 #endif
 
 /*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
+ * Abuse the pte nodes for unmapped kva to thread a kva freelist through.
  * Requirements:
  *  - Must deal with pages in order to ensure that none of the PG_* bits
  *    are ever set, PG_V in particular.
@@ -696,7 +714,7 @@
 
 	va = *head;
 	if (va == 0)
-		return (va);	/* Out of memory */
+		panic("pmap_ptelist_alloc: exhausted ptelist KVA");
 	pte = vtopte(va);
 	*head = *pte;
 	if (*head & PG_V)
@@ -739,6 +757,7 @@
 void
 pmap_init(void)
 {
+	struct pmap_preinit_mapping *ppim;
 	vm_page_t mpte;
 	vm_size_t s;
 	int i, pv_npg;
@@ -768,12 +787,18 @@
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
-	 * If the kernel is running in a virtual machine on an AMD Family 10h
-	 * processor, then it must assume that MCA is enabled by the virtual
-	 * machine monitor.
+	 * If the kernel is running on a virtual machine, then it must assume
+	 * that MCA is enabled by the hypervisor.  Moreover, the kernel must
+	 * be prepared for the hypervisor changing the vendor and family that
+	 * are reported by CPUID.  Consequently, the workaround for AMD Family
+	 * 10h Erratum 383 is enabled if the processor's feature set does not
+	 * include at least one feature that is only supported by older Intel
+	 * or newer AMD processors.
 	 */
-	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
-	    CPUID_TO_FAMILY(cpu_id) == 0x10)
+	if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+	    (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
+	    CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
+	    AMDID2_FMA4)) == 0)
 		workaround_erratum383 = 1;
 
 	/*
@@ -790,9 +815,10 @@
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
+	 * Handle the possibility that "vm_phys_segs[...].end" is zero.
 	 */
-	for (i = 0; phys_avail[i + 1]; i += 2);
-	pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
+	pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end -
+	    PAGE_SIZE) / NBPDR + 1;
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
@@ -799,22 +825,33 @@
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
-	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
+	pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
+	    M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
-	pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
-	    PAGE_SIZE * pv_maxchunks);
+	pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("pmap_init: not enough kvm for pv chunks");
 	pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
 	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
 #endif
+
+	pmap_initialized = 1;
+	if (!bootverbose)
+		return;
+	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+		ppim = pmap_preinit_mapping + i;
+		if (ppim->va == 0)
+			continue;
+		printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i,
+		    (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode);
+	}
 }
 
 
@@ -1181,22 +1218,46 @@
 }
 #endif /* !SMP */
 
+static void
+pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
+{
+
+	/*
+	 * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was
+	 * created by a promotion that did not invalidate the 512 or 1024 4KB
+	 * page mappings that might exist in the TLB.  Consequently, at this
+	 * point, the TLB may hold both 4KB and 2- or 4MB page mappings for
+	 * the address range [va, va + NBPDR).  Therefore, the entire range
+	 * must be invalidated here.  In contrast, when PG_PROMOTED is clear,
+	 * the TLB will not hold any 4KB page mappings for the address range
+	 * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the
+	 * 2- or 4MB page mapping from the TLB.
+	 */
+	if ((pde & PG_PROMOTED) != 0)
+		pmap_invalidate_range(pmap, va, va + NBPDR - 1);
+	else
+		pmap_invalidate_page(pmap, va);
+}
+
 #define	PMAP_CLFLUSH_THRESHOLD	(2 * 1024 * 1024)
 
 void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
 {
 
-	KASSERT((sva & PAGE_MASK) == 0,
-	    ("pmap_invalidate_cache_range: sva not page-aligned"));
-	KASSERT((eva & PAGE_MASK) == 0,
-	    ("pmap_invalidate_cache_range: eva not page-aligned"));
+	if (force) {
+		sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+	} else {
+		KASSERT((sva & PAGE_MASK) == 0,
+		    ("pmap_invalidate_cache_range: sva not page-aligned"));
+		KASSERT((eva & PAGE_MASK) == 0,
+		    ("pmap_invalidate_cache_range: eva not page-aligned"));
+	}
 
-	if (cpu_feature & CPUID_SS)
-		; /* If "Self Snoop" is supported, do nothing. */
-	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+	if ((cpu_feature & CPUID_SS) != 0 && !force)
+		; /* If "Self Snoop" is supported and allowed, do nothing. */
+	else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
 	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
 #ifdef DEV_APIC
 		/*
 		 * XXX: Some CPUs fault, hang, or trash the local APIC
@@ -1208,16 +1269,31 @@
 			return;
 #endif
 		/*
-		 * Otherwise, do per-cache line flush.  Use the mfence
+		 * Otherwise, do per-cache line flush.  Use the sfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
-		mfence();
+		sfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
+			clflushopt(sva);
+		sfence();
+	} else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+#ifdef DEV_APIC
+		if (pmap_kextract(sva) == lapic_paddr)
+			return;
+#endif
+		/*
+		 * Writes are ordered by CLFLUSH on Intel CPUs.
+		 */
+		if (cpu_vendor_id != CPU_VENDOR_INTEL)
+			mfence();
+		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
-		mfence();
+		if (cpu_vendor_id != CPU_VENDOR_INTEL)
+			mfence();
 	} else {
 
 		/*
@@ -1298,6 +1374,13 @@
 		mtx_unlock(&PMAP2mutex);
 }
 
+/*
+ * NB:  The sequence of updating a page table followed by accesses to the
+ * corresponding pages is subject to the situation described in the "AMD64
+ * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23,
+ * "7.3.1 Special Coherency Considerations".  Therefore, issuing the INVLPG
+ * right after modifying the PTE bits is crucial.
+ */
 static __inline void
 invlcaddr(void *caddr)
 {
@@ -1584,13 +1667,12 @@
  * Page table page management routines.....
  ***************************************************/
 static __inline void
-pmap_free_zero_pages(vm_page_t free)
+pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
-	while (free != NULL) {
-		m = free;
-		free = m->right;
+	while ((m = SLIST_FIRST(free)) != NULL) {
+		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
@@ -1602,7 +1684,8 @@
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
-pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
+pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
+    boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
@@ -1609,8 +1692,7 @@
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
-	m->right = *free;
-	*free = m;
+	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
@@ -1619,31 +1701,12 @@
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
-static void
+static __inline int
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 {
-	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	root = pmap->pm_root;
-	if (root == NULL) {
-		mpte->left = NULL;
-		mpte->right = NULL;
-	} else {
-		root = vm_page_splay(mpte->pindex, root);
-		if (mpte->pindex < root->pindex) {
-			mpte->left = root->left;
-			mpte->right = root;
-			root->left = NULL;
-		} else if (mpte->pindex == root->pindex)
-			panic("pmap_insert_pt_page: pindex already inserted");
-		else {
-			mpte->right = root->right;
-			mpte->left = root;
-			root->right = NULL;
-		}
-	}
-	pmap->pm_root = mpte;
+	return (vm_radix_insert(&pmap->pm_root, mpte));
 }
 
 /*
@@ -1651,19 +1714,12 @@
  * specified pmap's collection of idle page table pages.  Returns NULL if there
  * is no page table page corresponding to the specified virtual address.
  */
-static vm_page_t
+static __inline vm_page_t
 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 {
-	vm_page_t mpte;
-	vm_pindex_t pindex = va >> PDRSHIFT;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
-		mpte = vm_page_splay(pindex, mpte);
-		if ((pmap->pm_root = mpte)->pindex != pindex)
-			mpte = NULL;
-	}
-	return (mpte);
+	return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT));
 }
 
 /*
@@ -1671,21 +1727,12 @@
  * of idle page table pages.  The specified page table page must be a member of
  * the pmap's collection.
  */
-static void
+static __inline void
 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 {
-	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if (mpte != pmap->pm_root)
-		vm_page_splay(mpte->pindex, pmap->pm_root);
-	if (mpte->left == NULL)
-		root = mpte->right;
-	else {
-		root = vm_page_splay(mpte->pindex, mpte->left);
-		root->right = mpte->right;
-	}
-	pmap->pm_root = root;
+	vm_radix_remove(&pmap->pm_root, mpte->pindex);
 }
 
 /*
@@ -1695,7 +1742,7 @@
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
+pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
@@ -1707,7 +1754,7 @@
 }
 
 static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
+_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
 {
 	vm_offset_t pteva;
 
@@ -1743,7 +1790,7 @@
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pd_entry_t ptepde;
 	vm_page_t mpte;
@@ -1769,10 +1816,10 @@
 	 * not need to be inserted into that list.
 	 */
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
-	pmap->pm_root = NULL;
+	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1790,20 +1837,15 @@
 	vm_paddr_t pa;
 	int i;
 
-	PMAP_LOCK_INIT(pmap);
-
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL) {
-		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
-		    NBPTD);
-		if (pmap->pm_pdir == NULL) {
-			PMAP_LOCK_DESTROY(pmap);
+		pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
+		if (pmap->pm_pdir == NULL)
 			return (0);
-		}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 		KASSERT(((vm_offset_t)pmap->pm_pdpt &
 		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
@@ -1811,9 +1853,9 @@
 		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 		    ("pmap_pinit: pdpt above 4g"));
 #endif
-		pmap->pm_root = NULL;
+		pmap->pm_root.rt_root = 0;
 	}
-	KASSERT(pmap->pm_root == NULL,
+	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_pinit: pmap has reserved page table page(s)"));
 
 	/*
@@ -1845,7 +1887,7 @@
 	for (i = 0; i < NPGPTD; i++) {
 		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 		pmap->pm_pdpt[i] = pa | PG_V;
 #endif
 	}
@@ -1862,21 +1904,17 @@
  * mapped correctly.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
 {
 	vm_paddr_t ptepa;
 	vm_page_t m;
 
-	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
-	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
-	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
-		if (flags & M_WAITOK) {
+		if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
 			PMAP_UNLOCK(pmap);
 			rw_wunlock(&pvh_global_lock);
 			VM_WAIT;
@@ -1908,16 +1946,12 @@
 }
 
 static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	u_int ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
-	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
-	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
-	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
 	/*
 	 * Calculate pagetable page index
 	 */
@@ -1950,7 +1984,7 @@
 		 * been deallocated. 
 		 */
 		m = _pmap_allocpte(pmap, ptepindex, flags);
-		if (m == NULL && (flags & M_WAITOK))
+		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
 			goto retry;
 	}
 	return (m);
@@ -2007,12 +2041,12 @@
 		spins = 50000000;
 
 		/* Find least significant set bit. */
-		lsb = cpusetobj_ffs(&mask);
+		lsb = CPU_FFS(&mask);
 		MPASS(lsb != 0);
 		lsb--;
 		CPU_SETOF(lsb, &mask);
 		mtx_lock_spin(&smp_ipi_mtx);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 		lazyptd = vtophys(pmap->pm_pdpt);
 #else
 		lazyptd = vtophys(pmap->pm_pdir);
@@ -2077,7 +2111,7 @@
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
-	KASSERT(pmap->pm_root == NULL,
+	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
 	pmap_lazyfix(pmap);
@@ -2096,7 +2130,7 @@
 
 	for (i = 0; i < NPGPTD; i++) {
 		m = ptdpg[i];
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 		    ("pmap_release: got wrong ptd page"));
 #endif
@@ -2104,7 +2138,6 @@
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_free_zero(m);
 	}
-	PMAP_LOCK_DESTROY(pmap);
 }
 

 static int
@@ -2244,16 +2277,18 @@
 	pt_entry_t *pte, tpte;
 	pv_entry_t pv;
 	vm_offset_t va;
-	vm_page_t free, m, m_pc;
+	vm_page_t m, m_pc;
+	struct spglist free;
 	uint32_t inuse;
 	int bit, field, freed;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	pmap = NULL;
-	free = m_pc = NULL;
+	m_pc = NULL;
+	SLIST_INIT(&free);
 	TAILQ_INIT(&newtail);
 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
-	    free == NULL)) {
+	    SLIST_EMPTY(&free))) {
 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 		if (pmap != pc->pc_pmap) {
 			if (pmap != NULL) {
@@ -2302,7 +2337,7 @@
 					vm_page_dirty(m);
 				if ((tpte & PG_A) != 0)
 					vm_page_aflag_set(m, PGA_REFERENCED);
-				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -2358,14 +2393,14 @@
 		if (pmap != locked_pmap)
 			PMAP_UNLOCK(pmap);
 	}
-	if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
-		m_pc = free;
-		free = m_pc->right;
+	if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) {
+		m_pc = SLIST_FIRST(&free);
+		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->wire_count = 1;
 		atomic_add_int(&cnt.v_wire_count, 1);
 	}
-	pmap_free_zero_pages(free);
+	pmap_free_zero_pages(&free);
 	return (m_pc);
 }
 
@@ -2507,9 +2542,9 @@
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
-			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
+			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			break;
 		}
 	}
@@ -2537,7 +2572,7 @@
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
-	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	/* Instantiate the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
@@ -2573,7 +2608,7 @@
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 	pvh = pa_to_pvh(pa);
-	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
@@ -2620,7 +2655,7 @@
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
-	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 }
 
 /*
@@ -2636,7 +2671,7 @@
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
-		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
@@ -2656,7 +2691,7 @@
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		pvh = pa_to_pvh(pa);
-		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
@@ -2686,14 +2721,16 @@
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
-	vm_page_t free, mpte;
+	vm_page_t mpte;
+	struct spglist free;
+	vm_offset_t sva;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
-	mpte = pmap_lookup_pt_page(pmap, va);
-	if (mpte != NULL)
+	if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
+	    NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
 		KASSERT((oldpde & PG_W) == 0,
@@ -2708,10 +2745,12 @@
 		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
 		    VM_ALLOC_WIRED)) == NULL) {
-			free = NULL;
-			pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
-			pmap_invalidate_page(pmap, trunc_4mpage(va));
-			pmap_free_zero_pages(free);
+			SLIST_INIT(&free);
+			sva = trunc_4mpage(va);
+			pmap_remove_pde(pmap, pde, sva, &free);
+			if ((oldpde & PG_G) == 0)
+				pmap_invalidate_pde_page(pmap, sva, oldpde);
+			pmap_free_zero_pages(&free);
 			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
@@ -2822,11 +2861,49 @@
 }
 
 /*
+ * Removes a 2- or 4MB page mapping from the kernel pmap.
+ */
+static void
+pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+	pd_entry_t newpde;
+	vm_paddr_t mptepa;
+	vm_page_t mpte;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mpte = pmap_lookup_pt_page(pmap, va);
+	if (mpte == NULL)
+		panic("pmap_remove_kernel_pde: Missing pt page.");
+
+	pmap_remove_pt_page(pmap, mpte);
+	mptepa = VM_PAGE_TO_PHYS(mpte);
+	newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
+
+	/*
+	 * Initialize the page table page.
+	 */
+	pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
+
+	/*
+	 * Remove the mapping.
+	 */
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, newpde);
+	else 
+		pmap_kenter_pde(va, newpde);
+
+	/*
+	 * Invalidate the recursive mapping of the page table page.
+	 */
+	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+}
+
+/*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static void
 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
-    vm_page_t *free)
+    struct spglist *free)
 {
 	struct md_page *pvh;
 	pd_entry_t oldpde;
@@ -2844,8 +2921,9 @@
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
-	if (oldpde & PG_G)
-		pmap_invalidate_page(kernel_pmap, sva);
+	if ((oldpde & PG_G) != 0)
+		pmap_invalidate_pde_page(kernel_pmap, sva, oldpde);
+
 	pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 	if (oldpde & PG_MANAGED) {
 		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
@@ -2863,8 +2941,7 @@
 		}
 	}
 	if (pmap == kernel_pmap) {
-		if (!pmap_demote_pde(pmap, pdq, sva))
-			panic("pmap_remove_pde: failed demotion");
+		pmap_remove_kernel_pde(pmap, pdq, sva);
 	} else {
 		mpte = pmap_lookup_pt_page(pmap, sva);
 		if (mpte != NULL) {
@@ -2883,7 +2960,8 @@
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
+    struct spglist *free)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
@@ -2917,7 +2995,7 @@
  * Remove a single page from a process address space
  */
 static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
+pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt_entry_t *pte;
 
@@ -2942,7 +3020,7 @@
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
-	vm_page_t free = NULL;
+	struct spglist free;
 	int anyvalid;
 
 	/*
@@ -2952,6 +3030,7 @@
 		return;
 
 	anyvalid = 0;
+	SLIST_INIT(&free);
 
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
@@ -3044,7 +3123,7 @@
 		pmap_invalidate_all(pmap);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
-	pmap_free_zero_pages(free);
+	pmap_free_zero_pages(&free);
 }
 
 /*
@@ -3069,11 +3148,11 @@
 	pt_entry_t *pte, tpte;
 	pd_entry_t *pde;
 	vm_offset_t va;
-	vm_page_t free;
+	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
-	free = NULL;
+	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
@@ -3111,7 +3190,7 @@
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
-		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
@@ -3118,7 +3197,7 @@
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
-	pmap_free_zero_pages(free);
+	pmap_free_zero_pages(&free);
 }
 
 /*
@@ -3147,15 +3226,20 @@
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		newpde &= ~(PG_RW | PG_M);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
 	if (newpde != oldpde) {
-		if (!pde_cmpset(pde, oldpde, newpde))
+		/*
+		 * As an optimization to future operations on this PDE, clear
+		 * PG_PROMOTED.  The impending invalidation will remove any
+		 * lingering 4KB page mappings from the TLB.
+		 */
+		if (!pde_cmpset(pde, oldpde, newpde & ~PG_PROMOTED))
 			goto retry;
-		if (oldpde & PG_G)
-			pmap_invalidate_page(pmap, sva);
+		if ((oldpde & PG_G) != 0)
+			pmap_invalidate_pde_page(kernel_pmap, sva, oldpde);
 		else
 			anychanged = TRUE;
 	}
@@ -3174,12 +3258,13 @@
 	pt_entry_t *pte;
 	boolean_t anychanged, pv_lists_locked;
 
-	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
+	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
@@ -3282,13 +3367,13 @@
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 #endif
 
 			if (pbits != obits) {
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 				if (!atomic_cmpset_64(pte, obits, pbits))
 					goto retry;
 #else
@@ -3414,7 +3499,13 @@
 	    ("pmap_promote_pde: page table page is out of range"));
 	KASSERT(mpte->pindex == va >> PDRSHIFT,
 	    ("pmap_promote_pde: page table page's pindex is wrong"));
-	pmap_insert_pt_page(pmap, mpte);
+	if (pmap_insert_pt_page(pmap, mpte)) {
+		pmap_pde_p_failures++;
+		CTR2(KTR_PMAP,
+		    "pmap_promote_pde: failure for va %#x in pmap %p", va,
+		    pmap);
+		return;
+	}
 
 	/*
 	 * Promote the pv entries.
@@ -3434,9 +3525,9 @@
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 	else if (pmap == kernel_pmap)
-		pmap_kenter_pde(va, PG_PS | newpde);
+		pmap_kenter_pde(va, PG_PROMOTED | PG_PS | newpde);
 	else
-		pde_store(pde, PG_PS | newpde);
+		pde_store(pde, PG_PROMOTED | PG_PS | newpde);
 
 	pmap_pde_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
@@ -3455,9 +3546,9 @@
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
-void
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
-    vm_prot_t prot, boolean_t wired)
+int
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+    u_int flags, int8_t psind)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
@@ -3465,19 +3556,19 @@
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
-	boolean_t invlva;
+	boolean_t invlva, wired;
 
 	va = trunc_page(va);
+	mpte = NULL;
+	wired = (flags & PMAP_ENTER_WIRED) != 0;
+
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
-	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
-	    VM_OBJECT_LOCKED(m->object),
-	    ("pmap_enter: page %p is not busy", m));
+	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
+		VM_OBJECT_ASSERT_LOCKED(m->object);
 
-	mpte = NULL;
-
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
@@ -3487,7 +3578,15 @@
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
-		mpte = pmap_allocpte(pmap, va, M_WAITOK);
+		mpte = pmap_allocpte(pmap, va, flags);
+		if (mpte == NULL) {
+			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
+			    ("pmap_allocpte failed with sleep allowed"));
+			sched_unpin();
+			rw_wunlock(&pvh_global_lock);
+			PMAP_UNLOCK(pmap);
+			return (KERN_RESOURCE_SHORTAGE);
+		}
 	}
 
 	pde = pmap_pde(pmap, va);
@@ -3567,7 +3666,7 @@
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
 		pv->pv_va = va;
-		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		pa |= PG_MANAGED;
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
@@ -3588,7 +3687,7 @@
 		if ((newpte & PG_MANAGED) != 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 #endif
@@ -3605,7 +3704,7 @@
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		newpte |= PG_A;
-		if ((access & VM_PROT_WRITE) != 0)
+		if ((flags & VM_PROT_WRITE) != 0)
 			newpte |= PG_M;
 		if (origpte & PG_V) {
 			invlva = FALSE;
@@ -3615,7 +3714,7 @@
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				if (opa != VM_PAGE_TO_PHYS(m))
 					invlva = TRUE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 				if ((origpte & PG_NX) == 0 &&
 				    (newpte & PG_NX) != 0)
 					invlva = TRUE;
@@ -3650,6 +3749,7 @@
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -3685,7 +3785,7 @@
 			return (FALSE);
 		}
 	}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
@@ -3698,7 +3798,8 @@
 	pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 
 	/*
-	 * Map the superpage.
+	 * Map the superpage.  (This is not a promoted mapping; there will not
+	 * be any lingering 4KB page mappings in the TLB.)
 	 */
 	pde_store(pde, newpde);
 
@@ -3728,7 +3829,8 @@
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
-	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
+	VM_OBJECT_ASSERT_LOCKED(m_start->object);
+
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
@@ -3737,8 +3839,7 @@
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
-		    (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
-		    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
+		    m->psind == 1 && pg_ps_enabled &&
 		    pmap_enter_pde(pmap, va, m, prot))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
@@ -3776,7 +3877,7 @@
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
-	vm_page_t free;
+	struct spglist free;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
@@ -3815,7 +3916,7 @@
 				mpte->wire_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex,
-				    M_NOWAIT);
+				    PMAP_ENTER_NOSLEEP);
 				if (mpte == NULL)
 					return (mpte);
 			}
@@ -3845,10 +3946,10 @@
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpte != NULL) {
-			free = NULL;
+			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
-				pmap_free_zero_pages(free);
+				pmap_free_zero_pages(&free);
 			}
 			
 			mpte = NULL;
@@ -3862,7 +3963,7 @@
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 #endif
@@ -3906,7 +4007,7 @@
 	vm_page_t p;
 	int pat_mode;
 
-	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
@@ -3966,59 +4067,100 @@
 }
 
 /*
- *	Routine:	pmap_change_wiring
- *	Function:	Change the wiring attribute for a map/virtual-address
- *			pair.
- *	In/out conditions:
- *			The mapping must already exist in the pmap.
+ *	Clear the wired attribute from the mappings for the specified range of
+ *	addresses in the given pmap.  Every valid mapping within that range
+ *	must have the wired attribute set.  In contrast, invalid mappings
+ *	cannot have the wired attribute set, so they are ignored.
+ *
+ *	The wired attribute of the page table entry is not a hardware feature,
+ *	so there is no need to invalidate any TLB entries.
  */
 void
-pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
+	vm_offset_t pdnxt;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
-	boolean_t are_queues_locked;
+	boolean_t pv_lists_locked;
 
-	are_queues_locked = FALSE;
-retry:
+	if (pmap_is_current(pmap))
+		pv_lists_locked = FALSE;
+	else {
+		pv_lists_locked = TRUE;
+resume:
+		rw_wlock(&pvh_global_lock);
+		sched_pin();
+	}
 	PMAP_LOCK(pmap);
-	pde = pmap_pde(pmap, va);
-	if ((*pde & PG_PS) != 0) {
-		if (!wired != ((*pde & PG_W) == 0)) {
-			if (!are_queues_locked) {
-				are_queues_locked = TRUE;
-				if (!rw_try_wlock(&pvh_global_lock)) {
-					PMAP_UNLOCK(pmap);
-					rw_wlock(&pvh_global_lock);
-					goto retry;
+	for (; sva < eva; sva = pdnxt) {
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pdnxt < sva)
+			pdnxt = eva;
+		pde = pmap_pde(pmap, sva);
+		if ((*pde & PG_V) == 0)
+			continue;
+		if ((*pde & PG_PS) != 0) {
+			if ((*pde & PG_W) == 0)
+				panic("pmap_unwire: pde %#jx is missing PG_W",
+				    (uintmax_t)*pde);
+
+			/*
+			 * Are we unwiring the entire large page?  If not,
+			 * demote the mapping and fall through.
+			 */
+			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+				/*
+				 * Regardless of whether a pde (or pte) is 32
+				 * or 64 bits in size, PG_W is among the least
+				 * significant 32 bits.
+				 */
+				atomic_clear_int((u_int *)pde, PG_W);
+				pmap->pm_stats.wired_count -= NBPDR /
+				    PAGE_SIZE;
+				continue;
+			} else {
+				if (!pv_lists_locked) {
+					pv_lists_locked = TRUE;
+					if (!rw_try_wlock(&pvh_global_lock)) {
+						PMAP_UNLOCK(pmap);
+						/* Repeat sva. */
+						goto resume;
+					}
+					sched_pin();
 				}
+				if (!pmap_demote_pde(pmap, pde, sva))
+					panic("pmap_unwire: demotion failed");
 			}
-			if (!pmap_demote_pde(pmap, pde, va))
-				panic("pmap_change_wiring: demotion failed");
-		} else
-			goto out;
+		}
+		if (pdnxt > eva)
+			pdnxt = eva;
+		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+		    sva += PAGE_SIZE) {
+			if ((*pte & PG_V) == 0)
+				continue;
+			if ((*pte & PG_W) == 0)
+				panic("pmap_unwire: pte %#jx is missing PG_W",
+				    (uintmax_t)*pte);
+
+			/*
+			 * PG_W must be cleared atomically.  Although the pmap
+			 * lock synchronizes access to PG_W, another processor
+			 * could be setting PG_M and/or PG_A concurrently.
+			 *
+			 * PG_W is among the least significant 32 bits.
+			 */
+			atomic_clear_int((u_int *)pte, PG_W);
+			pmap->pm_stats.wired_count--;
+		}
 	}
-	pte = pmap_pte(pmap, va);
-
-	if (wired && !pmap_pte_w(pte))
-		pmap->pm_stats.wired_count++;
-	else if (!wired && pmap_pte_w(pte))
-		pmap->pm_stats.wired_count--;
-
-	/*
-	 * Wiring is not a hardware characteristic so there is no need to
-	 * invalidate TLB.
-	 */
-	pmap_pte_set_w(pte, wired);
-	pmap_pte_release(pte);
-out:
-	if (are_queues_locked)
+	if (pv_lists_locked) {
+		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
+	}
 	PMAP_UNLOCK(pmap);
 }
 
 
-
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
@@ -4031,7 +4173,7 @@
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
-	vm_page_t   free;
+	struct spglist free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
@@ -4070,6 +4212,8 @@
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
+			if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
+				continue;
 			if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 			    ((srcptepaddr & PG_MANAGED) == 0 ||
 			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
@@ -4098,7 +4242,7 @@
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				dstmpte = pmap_allocpte(dst_pmap, addr,
-				    M_NOWAIT);
+				    PMAP_ENTER_NOSLEEP);
 				if (dstmpte == NULL)
 					goto out;
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
@@ -4114,12 +4258,12 @@
 					    PG_A);
 					dst_pmap->pm_stats.resident_count++;
 	 			} else {
-					free = NULL;
+					SLIST_INIT(&free);
 					if (pmap_unwire_ptp(dst_pmap, dstmpte,
 					    &free)) {
 						pmap_invalidate_page(dst_pmap,
 						    addr);
-						pmap_free_zero_pages(free);
+						pmap_free_zero_pages(&free);
 					}
 					goto out;
 				}
@@ -4243,12 +4387,12 @@
 	if (*sysmaps->CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	sched_pin();
-	invlpg((u_int)sysmaps->CADDR1);
-	invlpg((u_int)sysmaps->CADDR2);
 	*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
 	    pmap_cache_bits(src->md.pat_mode, 0);
+	invlcaddr(sysmaps->CADDR1);
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
 	    pmap_cache_bits(dst->md.pat_mode, 0);
+	invlcaddr(sysmaps->CADDR2);
 	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 	*sysmaps->CMAP1 = 0;
 	*sysmaps->CMAP2 = 0;
@@ -4256,6 +4400,8 @@
 	mtx_unlock(&sysmaps->lock);
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
@@ -4274,8 +4420,6 @@
 		panic("pmap_copy_pages: CMAP2 busy");
 	sched_pin();
 	while (xfersize > 0) {
-		invlpg((u_int)sysmaps->CADDR1);
-		invlpg((u_int)sysmaps->CADDR2);
 		a_pg = ma[a_offset >> PAGE_SHIFT];
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
@@ -4283,9 +4427,11 @@
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A |
-		    pmap_cache_bits(b_pg->md.pat_mode, 0);
+		    pmap_cache_bits(a_pg->md.pat_mode, 0);
+		invlcaddr(sysmaps->CADDR1);
 		*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A |
 		    PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0);
+		invlcaddr(sysmaps->CADDR2);
 		a_cp = sysmaps->CADDR1 + a_pg_offset;
 		b_cp = sysmaps->CADDR2 + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
@@ -4318,7 +4464,7 @@
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
@@ -4329,7 +4475,7 @@
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
-		TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
@@ -4381,7 +4527,7 @@
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	sched_pin();
-	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4424,11 +4570,11 @@
 pmap_remove_pages(pmap_t pmap)
 {
 	pt_entry_t *pte, tpte;
-	vm_page_t free = NULL;
 	vm_page_t m, mpte, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
+	struct spglist free;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
@@ -4438,6 +4584,7 @@
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
+	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
@@ -4508,7 +4655,7 @@
 				if ((tpte & PG_PS) != 0) {
 					pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
-					TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
+					TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							if (TAILQ_EMPTY(&mt->md.pv_list))
@@ -4526,7 +4673,7 @@
 					}
 				} else {
 					pmap->pm_stats.resident_count--;
-					TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+					TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 					if (TAILQ_EMPTY(&m->md.pv_list) &&
 					    (m->flags & PG_FICTITIOUS) == 0) {
 						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4546,7 +4693,7 @@
 	pmap_invalidate_all(pmap);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
-	pmap_free_zero_pages(free);
+	pmap_free_zero_pages(&free);
 }
 
 /*
@@ -4564,13 +4711,12 @@
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
-	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
@@ -4596,7 +4742,7 @@
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
-	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4669,7 +4815,7 @@
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
-	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4699,13 +4845,12 @@
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
-	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
-	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no page table entries need updating.
+	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+	 * set by another thread while the object is locked.  Thus,
+	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
-	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
-	if ((m->oflags & VPO_BUSY) == 0 &&
-	    (m->aflags & PGA_WRITEABLE) == 0)
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
@@ -4712,7 +4857,7 @@
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
-	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
@@ -4722,7 +4867,7 @@
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
@@ -4751,6 +4896,8 @@
 	rw_wunlock(&pvh_global_lock);
 }
 
+#define	PMAP_TS_REFERENCED_MAX	5
+
 /*
  *	pmap_ts_referenced:
  *
@@ -4767,73 +4914,88 @@
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
-	pv_entry_t pv, pvf, pvn;
+	pv_entry_t pv, pvf;
 	pmap_t pmap;
-	pd_entry_t oldpde, *pde;
+	pd_entry_t *pde;
 	pt_entry_t *pte;
-	vm_offset_t va;
+	vm_paddr_t pa;
 	int rtval = 0;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
-	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	pa = VM_PAGE_TO_PHYS(m);
+	pvh = pa_to_pvh(pa);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
-	if ((m->flags & PG_FICTITIOUS) != 0)
+	if ((m->flags & PG_FICTITIOUS) != 0 ||
+	    (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
-	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
-		va = pv->pv_va;
+	pv = pvf;
+	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
-		pde = pmap_pde(pmap, va);
-		oldpde = *pde;
-		if ((oldpde & PG_A) != 0) {
-			if (pmap_demote_pde(pmap, pde, va)) {
-				if ((oldpde & PG_W) == 0) {
-					/*
-					 * Remove the mapping to a single page
-					 * so that a subsequent access may
-					 * repromote.  Since the underlying
-					 * page table page is fully populated,
-					 * this removal never frees a page
-					 * table page.
-					 */
-					va += VM_PAGE_TO_PHYS(m) - (oldpde &
-					    PG_PS_FRAME);
-					pmap_remove_page(pmap, va, NULL);
-					rtval++;
-					if (rtval > 4) {
-						PMAP_UNLOCK(pmap);
-						goto out;
-					}
-				}
+		pde = pmap_pde(pmap, pv->pv_va);
+		if ((*pde & PG_A) != 0) {
+			/*
+			 * Since this reference bit is shared by either 1024
+			 * or 512 4KB pages, it should not be cleared every
+			 * time it is tested.  Apply a simple "hash" function
+			 * on the physical page number, the virtual superpage
+			 * number, and the pmap address to select one 4KB page
+			 * out of the 1024 or 512 on which testing the
+			 * reference bit will result in clearing that bit.
+			 * This function is designed to avoid the selection of
+			 * the same 4KB page for every 2- or 4MB page mapping.
+			 *
+			 * On demotion, a mapping that hasn't been referenced
+			 * is simply destroyed.  To avoid the possibility of a
+			 * subsequent page fault on a demoted wired mapping,
+			 * always leave its reference bit set.  Moreover,
+			 * since the superpage is wired, the current state of
+			 * its reference bit won't affect page replacement.
+			 */
+			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^
+			    (uintptr_t)pmap) & (NPTEPG - 1)) == 0 &&
+			    (*pde & PG_W) == 0) {
+				atomic_clear_int((u_int *)pde, PG_A);
+				pmap_invalidate_page(pmap, pv->pv_va);
 			}
+			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
-	}
+		/* Rotate the PV list if it has more than one entry. */
+		if (TAILQ_NEXT(pv, pv_next) != NULL) {
+			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+		}
+		if (rtval >= PMAP_TS_REFERENCED_MAX)
+			goto out;
+	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
-	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-		pvf = pv;
-		do {
-			pvn = TAILQ_NEXT(pv, pv_list);
-			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
-			pmap = PV_PMAP(pv);
-			PMAP_LOCK(pmap);
-			pde = pmap_pde(pmap, pv->pv_va);
-			KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
-			    " found a 4mpage in page %p's pv list", m));
-			pte = pmap_pte_quick(pmap, pv->pv_va);
-			if ((*pte & PG_A) != 0) {
-				atomic_clear_int((u_int *)pte, PG_A);
-				pmap_invalidate_page(pmap, pv->pv_va);
-				rtval++;
-				if (rtval > 4)
-					pvn = NULL;
-			}
-			PMAP_UNLOCK(pmap);
-		} while ((pv = pvn) != NULL && pv != pvf);
-	}
+	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
+		goto out;
+	pv = pvf;
+	do {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pde = pmap_pde(pmap, pv->pv_va);
+		KASSERT((*pde & PG_PS) == 0,
+		    ("pmap_ts_referenced: found a 4mpage in page %p's pv list",
+		    m));
+		pte = pmap_pte_quick(pmap, pv->pv_va);
+		if ((*pte & PG_A) != 0) {
+			atomic_clear_int((u_int *)pte, PG_A);
+			pmap_invalidate_page(pmap, pv->pv_va);
+			rtval++;
+		}
+		PMAP_UNLOCK(pmap);
+		/* Rotate the PV list if it has more than one entry. */
+		if (TAILQ_NEXT(pv, pv_next) != NULL) {
+			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+		}
+	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval <
+	    PMAP_TS_REFERENCED_MAX);
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
@@ -4841,6 +5003,112 @@
 }
 
 /*
+ *	Apply the given advice to the specified range of addresses within the
+ *	given pmap.  Depending on the advice, clear the referenced and/or
+ *	modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+	pd_entry_t oldpde, *pde;
+	pt_entry_t *pte;
+	vm_offset_t pdnxt;
+	vm_page_t m;
+	boolean_t anychanged, pv_lists_locked;
+
+	if (advice != MADV_DONTNEED && advice != MADV_FREE)
+		return;
+	if (pmap_is_current(pmap))
+		pv_lists_locked = FALSE;
+	else {
+		pv_lists_locked = TRUE;
+resume:
+		rw_wlock(&pvh_global_lock);
+		sched_pin();
+	}
+	anychanged = FALSE;
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = pdnxt) {
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pdnxt < sva)
+			pdnxt = eva;
+		pde = pmap_pde(pmap, sva);
+		oldpde = *pde;
+		if ((oldpde & PG_V) == 0)
+			continue;
+		else if ((oldpde & PG_PS) != 0) {
+			if ((oldpde & PG_MANAGED) == 0)
+				continue;
+			if (!pv_lists_locked) {
+				pv_lists_locked = TRUE;
+				if (!rw_try_wlock(&pvh_global_lock)) {
+					if (anychanged)
+						pmap_invalidate_all(pmap);
+					PMAP_UNLOCK(pmap);
+					goto resume;
+				}
+				sched_pin();
+			}
+			if (!pmap_demote_pde(pmap, pde, sva)) {
+				/*
+				 * The large page mapping was destroyed.
+				 */
+				continue;
+			}
+
+			/*
+			 * Unless the page mappings are wired, remove the
+			 * mapping to a single page so that a subsequent
+			 * access may repromote.  Since the underlying page
+			 * table page is fully populated, this removal never
+			 * frees a page table page.
+			 */
+			if ((oldpde & PG_W) == 0) {
+				pte = pmap_pte_quick(pmap, sva);
+				KASSERT((*pte & PG_V) != 0,
+				    ("pmap_advise: invalid PTE"));
+				pmap_remove_pte(pmap, pte, sva, NULL);
+				anychanged = TRUE;
+			}
+		}
+		if (pdnxt > eva)
+			pdnxt = eva;
+		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+		    sva += PAGE_SIZE) {
+			if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+			    PG_V))
+				continue;
+			else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+				if (advice == MADV_DONTNEED) {
+					/*
+					 * Future calls to pmap_is_modified()
+					 * can be avoided by making the page
+					 * dirty now.
+					 */
+					m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+					vm_page_dirty(m);
+				}
+				atomic_clear_int((u_int *)pte, PG_M | PG_A);
+			} else if ((*pte & PG_A) != 0)
+				atomic_clear_int((u_int *)pte, PG_A);
+			else
+				continue;
+			if ((*pte & PG_G) != 0)
+				pmap_invalidate_page(pmap, sva);
+			else
+				anychanged = TRUE;
+		}
+	}
+	if (anychanged)
+		pmap_invalidate_all(pmap);
+	if (pv_lists_locked) {
+		sched_unpin();
+		rw_wunlock(&pvh_global_lock);
+	}
+	PMAP_UNLOCK(pmap);
+}
+
+/*
  *	Clear the modify bits on the specified physical page.
  */
 void
@@ -4855,14 +5123,14 @@
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
-	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
-	KASSERT((m->oflags & VPO_BUSY) == 0,
-	    ("pmap_clear_modify: page %p is busy", m));
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	KASSERT(!vm_page_xbusied(m),
+	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
-	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
@@ -4871,7 +5139,7 @@
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
-	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
@@ -4908,7 +5176,7 @@
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
@@ -4931,73 +5199,6 @@
 }
 
 /*
- *	pmap_clear_reference:
- *
- *	Clear the reference bit on the specified physical page.
- */
-void
-pmap_clear_reference(vm_page_t m)
-{
-	struct md_page *pvh;
-	pv_entry_t next_pv, pv;
-	pmap_t pmap;
-	pd_entry_t oldpde, *pde;
-	pt_entry_t *pte;
-	vm_offset_t va;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_clear_reference: page %p is not managed", m));
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	if ((m->flags & PG_FICTITIOUS) != 0)
-		goto small_mappings;
-	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
-	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
-		va = pv->pv_va;
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pde = pmap_pde(pmap, va);
-		oldpde = *pde;
-		if ((oldpde & PG_A) != 0) {
-			if (pmap_demote_pde(pmap, pde, va)) {
-				/*
-				 * Remove the mapping to a single page so
-				 * that a subsequent access may repromote.
-				 * Since the underlying page table page is
-				 * fully populated, this removal never frees
-				 * a page table page.
-				 */
-				va += VM_PAGE_TO_PHYS(m) - (oldpde &
-				    PG_PS_FRAME);
-				pmap_remove_page(pmap, va, NULL);
-			}
-		}
-		PMAP_UNLOCK(pmap);
-	}
-small_mappings:
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pde = pmap_pde(pmap, pv->pv_va);
-		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
-		    " a 4mpage in page %p's pv list", m));
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		if ((*pte & PG_A) != 0) {
-			/*
-			 * Regardless of whether a pte is 32 or 64 bits
-			 * in size, PG_A is among the least significant
-			 * 32 bits. 
-			 */
-			atomic_clear_int((u_int *)pte, PG_A);
-			pmap_invalidate_page(pmap, pv->pv_va);
-		}
-		PMAP_UNLOCK(pmap);
-	}
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-}
-
-/*
  * Miscellaneous support routines follow
  */
 
@@ -5044,24 +5245,51 @@
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
+	struct pmap_preinit_mapping *ppim;
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
+	int i;
 
 	offset = pa & PAGE_MASK;
-	size = roundup(offset + size, PAGE_SIZE);
+	size = round_page(offset + size);
 	pa = pa & PG_FRAME;
 
 	if (pa < KERNLOAD && pa + size <= KERNLOAD)
 		va = KERNBASE + pa;
-	else
-		va = kmem_alloc_nofault(kernel_map, size);
-	if (!va)
-		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
+	else if (!pmap_initialized) {
+		va = 0;
+		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+			ppim = pmap_preinit_mapping + i;
+			if (ppim->va == 0) {
+				ppim->pa = pa;
+				ppim->sz = size;
+				ppim->mode = mode;
+				ppim->va = virtual_avail;
+				virtual_avail += size;
+				va = ppim->va;
+				break;
+			}
+		}
+		if (va == 0)
+			panic("%s: too many preinit mappings", __func__);
+	} else {
+		/*
+		 * If we have a preinit mapping, re-use it.
+		 */
+		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+			ppim = pmap_preinit_mapping + i;
+			if (ppim->pa == pa && ppim->sz == size &&
+			    ppim->mode == mode)
+				return ((void *)(ppim->va + offset));
+		}
+		va = kva_alloc(size);
+		if (va == 0)
+			panic("%s: Couldn't allocate KVA", __func__);
+	}
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
-	pmap_invalidate_cache_range(va, va + size);
+	pmap_invalidate_cache_range(va, va + size, FALSE);
 	return ((void *)(va + offset));
 }
 
@@ -5082,14 +5310,31 @@
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
-	vm_offset_t base, offset;
+	struct pmap_preinit_mapping *ppim;
+	vm_offset_t offset;
+	int i;
 
 	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 		return;
-	base = trunc_page(va);
 	offset = va & PAGE_MASK;
-	size = roundup(offset + size, PAGE_SIZE);
-	kmem_free(kernel_map, base, size);
+	size = round_page(offset + size);
+	va = trunc_page(va);
+	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+		ppim = pmap_preinit_mapping + i;
+		if (ppim->va == va && ppim->sz == size) {
+			if (pmap_initialized)
+				return;
+			ppim->pa = 0;
+			ppim->va = 0;
+			ppim->sz = 0;
+			ppim->mode = 0;
+			if (va + size == virtual_avail)
+				virtual_avail = va;
+			return;
+		}
+	}
+	if (pmap_initialized)
+		kva_free(va, size);
 }
 
 /*
@@ -5129,8 +5374,10 @@
 {
 	struct sysmaps *sysmaps;
 	vm_offset_t sva, eva;
+	bool useclflushopt;
 
-	if ((cpu_feature & CPUID_CLFSH) != 0) {
+	useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
+	if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) {
 		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 		mtx_lock(&sysmaps->lock);
 		if (*sysmaps->CMAP2)
@@ -5143,14 +5390,25 @@
 		eva = sva + PAGE_SIZE;
 
 		/*
-		 * Use mfence despite the ordering implied by
-		 * mtx_{un,}lock() because clflush is not guaranteed
-		 * to be ordered by any other instruction.
+		 * Use mfence or sfence despite the ordering implied by
+		 * mtx_{un,}lock() because clflush on non-Intel CPUs
+		 * and clflushopt are not guaranteed to be ordered by
+		 * any other instruction.
 		 */
-		mfence();
-		for (; sva < eva; sva += cpu_clflush_line_size)
-			clflush(sva);
-		mfence();
+		if (useclflushopt)
+			sfence();
+		else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+			mfence();
+		for (; sva < eva; sva += cpu_clflush_line_size) {
+			if (useclflushopt)
+				clflushopt(sva);
+			else
+				clflush(sva);
+		}
+		if (useclflushopt)
+			sfence();
+		else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+			mfence();
 		*sysmaps->CMAP2 = 0;
 		sched_unpin();
 		mtx_unlock(&sysmaps->lock);
@@ -5179,7 +5437,7 @@
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
-	size = roundup(offset + size, PAGE_SIZE);
+	size = round_page(offset + size);
 
 	/*
 	 * Only supported on kernel virtual addresses above the recursive map.
@@ -5267,7 +5525,7 @@
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
-		pmap_invalidate_cache_range(base, tmpva);
+		pmap_invalidate_cache_range(base, tmpva, FALSE);
 	}
 	return (0);
 }
@@ -5342,7 +5600,7 @@
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 	CPU_SET(cpuid, &pmap->pm_active);
 #endif
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	cr3 = vtophys(pmap->pm_pdpt);
 #else
 	cr3 = vtophys(pmap->pm_pdir);
@@ -5486,7 +5744,7 @@
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
 		pads(pmap);

Modified: trunk/sys/i386/i386/ptrace_machdep.c
===================================================================
--- trunk/sys/i386/i386/ptrace_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/ptrace_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005 Doug Rabson
  * All rights reserved.
@@ -26,14 +27,16 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/ptrace_machdep.c 286311 2015-08-05 08:17:10Z kib $");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
+#include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
@@ -41,9 +44,82 @@
 #define CPU_ENABLE_SSE
 #endif
 
-int
-cpu_ptrace(struct thread *td, int req, void *addr, int data)
+#ifdef CPU_ENABLE_SSE
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
 {
+	struct ptrace_xstate_info info;
+	char *savefpu;
+	int error;
+
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	switch (req) {
+	case PT_GETXSTATE_OLD:
+		npxgetregs(td);
+		savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+		error = copyout(savefpu, addr,
+		    cpu_max_ext_state_size - sizeof(union savefpu));
+		break;
+
+	case PT_SETXSTATE_OLD:
+		if (data > cpu_max_ext_state_size - sizeof(union savefpu)) {
+			error = EINVAL;
+			break;
+		}
+		savefpu = malloc(data, M_TEMP, M_WAITOK);
+		error = copyin(addr, savefpu, data);
+		if (error == 0) {
+			npxgetregs(td);
+			error = npxsetxstate(td, savefpu, data);
+		}
+		free(savefpu, M_TEMP);
+		break;
+
+	case PT_GETXSTATE_INFO:
+		if (data != sizeof(info)) {
+			error  = EINVAL;
+			break;
+		}
+		info.xsave_len = cpu_max_ext_state_size;
+		info.xsave_mask = xsave_mask;
+		error = copyout(&info, addr, data);
+		break;
+
+	case PT_GETXSTATE:
+		npxgetregs(td);
+		savefpu = (char *)(get_pcb_user_save_td(td));
+		error = copyout(savefpu, addr, cpu_max_ext_state_size);
+		break;
+
+	case PT_SETXSTATE:
+		if (data < sizeof(union savefpu) ||
+		    data > cpu_max_ext_state_size) {
+			error = EINVAL;
+			break;
+		}
+		savefpu = malloc(data, M_TEMP, M_WAITOK);
+		error = copyin(addr, savefpu, data);
+		if (error == 0)
+			error = npxsetregs(td, (union savefpu *)savefpu,
+			    savefpu + sizeof(union savefpu), data -
+			    sizeof(union savefpu));
+		free(savefpu, M_TEMP);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+#endif
+
+static int
+cpu_ptrace_xmm(struct thread *td, int req, void *addr, int data)
+{
 #ifdef CPU_ENABLE_SSE
 	struct savexmm *fpstate;
 	int error;
@@ -51,7 +127,7 @@
 	if (!cpu_fxsr)
 		return (EINVAL);
 
-	fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
+	fpstate = &get_pcb_user_save_td(td)->sv_xmm;
 	switch (req) {
 	case PT_GETXMMREGS:
 		npxgetregs(td);
@@ -64,6 +140,14 @@
 		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
 		break;
 
+	case PT_GETXSTATE_OLD:
+	case PT_SETXSTATE_OLD:
+	case PT_GETXSTATE_INFO:
+	case PT_GETXSTATE:
+	case PT_SETXSTATE:
+		error = cpu_ptrace_xstate(td, req, addr, data);
+		break;
+
 	default:
 		return (EINVAL);
 	}
@@ -73,3 +157,51 @@
 	return (EINVAL);
 #endif
 }
+
+int
+cpu_ptrace(struct thread *td, int req, void *addr, int data)
+{
+	struct segment_descriptor *sdp, sd;
+	register_t r;
+	int error;
+
+	switch (req) {
+	case PT_GETXMMREGS:
+	case PT_SETXMMREGS:
+	case PT_GETXSTATE_OLD:
+	case PT_SETXSTATE_OLD:
+	case PT_GETXSTATE_INFO:
+	case PT_GETXSTATE:
+	case PT_SETXSTATE:
+		error = cpu_ptrace_xmm(td, req, addr, data);
+		break;
+
+	case PT_GETFSBASE:
+	case PT_GETGSBASE:
+		sdp = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsd :
+		    &td->td_pcb->pcb_gsd;
+		r = sdp->sd_hibase << 24 | sdp->sd_lobase;
+		error = copyout(&r, addr, sizeof(r));
+		break;
+
+	case PT_SETFSBASE:
+	case PT_SETGSBASE:
+		error = copyin(addr, &r, sizeof(r));
+		if (error != 0)
+			break;
+		fill_based_sd(&sd, r);
+		if (req == PT_SETFSBASE) {
+			td->td_pcb->pcb_fsd = sd;
+			td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
+		} else {
+			td->td_pcb->pcb_gsd = sd;
+			td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
+		}
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	return (error);
+}

Modified: trunk/sys/i386/i386/stack_machdep.c
===================================================================
--- trunk/sys/i386/i386/stack_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/stack_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005 Antoine Brodin
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/stack_machdep.c 286396 2015-08-07 04:31:02Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -40,7 +41,7 @@
 #include <vm/pmap.h>
 
 static void
-stack_capture(struct stack *st, register_t ebp)
+stack_capture(struct thread *td, struct stack *st, register_t ebp)
 {
 	struct i386_frame *frame;
 	vm_offset_t callpc;
@@ -56,8 +57,8 @@
 		if (stack_put(st, callpc) == -1)
 			break;
 		if (frame->f_frame <= frame ||
-		    (vm_offset_t)frame->f_frame >=
-		    (vm_offset_t)ebp + KSTACK_PAGES * PAGE_SIZE)
+		    (vm_offset_t)frame->f_frame >= td->td_kstack +
+		    td->td_kstack_pages * PAGE_SIZE)
 			break;
 		frame = frame->f_frame;
 	}
@@ -74,7 +75,7 @@
 		panic("stack_save_td: running");
 
 	ebp = td->td_pcb->pcb_ebp;
-	stack_capture(st, ebp);
+	stack_capture(td, st, ebp);
 }
 
 void
@@ -83,5 +84,5 @@
 	register_t ebp;
 
 	__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
-	stack_capture(st, ebp);
+	stack_capture(curthread, st, ebp);
 }

Modified: trunk/sys/i386/i386/support.s
===================================================================
--- trunk/sys/i386/i386/support.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/support.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/support.s 274648 2014-11-18 12:53:32Z kib $
  */
 
 #include "opt_npx.h"
@@ -62,8 +63,8 @@
 	stosb
 	popl	%edi
 	ret
-END(bzero)	
-	
+END(bzero)
+
 ENTRY(sse2_pagezero)
 	pushl	%ebx
 	movl	8(%esp),%ecx
@@ -181,11 +182,13 @@
  *  ws at tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
+	pushl	%ebp
+	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
-	movl	12(%esp),%esi
-	movl	16(%esp),%edi
-	movl	20(%esp),%ecx
+	movl	8(%ebp),%esi
+	movl	12(%ebp),%edi
+	movl	16(%ebp),%ecx
 
 	movl	%edi,%eax
 	subl	%esi,%eax
@@ -196,12 +199,13 @@
 	cld					/* nope, copy forwards */
 	rep
 	movsl
-	movl	20(%esp),%ecx
+	movl	16(%ebp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
+	popl	%ebp
 	ret
 
 	ALIGN_TEXT
@@ -214,7 +218,7 @@
 	std
 	rep
 	movsb
-	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
+	movl	16(%ebp),%ecx			/* copy remainder by 32-bit words */
 	shrl	$2,%ecx
 	subl	$3,%esi
 	subl	$3,%edi
@@ -223,6 +227,7 @@
 	popl	%edi
 	popl	%esi
 	cld
+	popl	%ebp
 	ret
 END(bcopy)
 
@@ -385,16 +390,16 @@
 	ret
 
 /*
- * casuword.  Compare and set user word.  Returns -1 or the current value.
+ * casueword.  Compare and set user word.  Returns -1 on fault,
+ * 0 on non-faulting access.  The current value is in *oldp.
  */
-
-ALTENTRY(casuword32)
-ENTRY(casuword)
+ALTENTRY(casueword32)
+ENTRY(casueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* dst */
 	movl	8(%esp),%eax			/* old */
-	movl	12(%esp),%ecx			/* new */
+	movl	16(%esp),%ecx			/* new */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
@@ -412,17 +417,20 @@
 
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
+	movl	12(%esp),%edx			/* oldp */
+	movl	%eax,(%edx)
+	xorl	%eax,%eax
 	ret
-END(casuword32)
-END(casuword)
+END(casueword32)
+END(casueword)
 
 /*
  * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
- * memory.  All these functions are MPSAFE.
+ * memory.
  */
 
-ALTENTRY(fuword32)
-ENTRY(fuword)
+ALTENTRY(fueword32)
+ENTRY(fueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* from */
@@ -432,9 +440,12 @@
 
 	movl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
+	movl	8(%esp),%edx
+	movl	%eax,(%edx)
+	xorl	%eax,%eax
 	ret
-END(fuword32)
-END(fuword)
+END(fueword32)
+END(fueword)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
@@ -690,7 +701,7 @@
 	movl	4(%esp),%eax
 	lgdt	(%eax)
 #endif
-	
+
 	/* flush the prefetch q */
 	jmp	1f
 	nop
@@ -736,13 +747,13 @@
 
 /* void reset_dbregs() */
 ENTRY(reset_dbregs)
-	movl    $0,%eax
-	movl    %eax,%dr7     /* disable all breapoints first */
-	movl    %eax,%dr0
-	movl    %eax,%dr1
-	movl    %eax,%dr2
-	movl    %eax,%dr3
-	movl    %eax,%dr6
+	movl	$0,%eax
+	movl	%eax,%dr7	/* disable all breakpoints first */
+	movl	%eax,%dr0
+	movl	%eax,%dr1
+	movl	%eax,%dr2
+	movl	%eax,%dr3
+	movl	%eax,%dr6
 	ret
 END(reset_dbregs)
 


Property changes on: trunk/sys/i386/i386/support.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/swtch.s
===================================================================
--- trunk/sys/i386/i386/swtch.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/swtch.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/swtch.s 282065 2015-04-27 08:02:12Z kib $
  */
 
 #include "opt_npx.h"
@@ -174,7 +175,7 @@
 
 	/* switch address space */
 	movl	PCB_CR3(%edx),%eax
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	cmpl	%eax,IdlePDPT			/* Kernel address space? */
 #else
 	cmpl	%eax,IdlePTD			/* Kernel address space? */
@@ -386,70 +387,6 @@
 	pushfl
 	popl	PCB_PSL(%ecx)
 
-#ifdef DEV_NPX
-	/*
-	 * If fpcurthread == NULL, then the npx h/w state is irrelevant and the
-	 * state had better already be in the pcb.  This is true for forks
-	 * but not for dumps (the old book-keeping with FP flags in the pcb
-	 * always lost for dumps because the dump pcb has 0 flags).
-	 *
-	 * If fpcurthread != NULL, then we have to save the npx h/w state to
-	 * fpcurthread's pcb and copy it to the requested pcb, or save to the
-	 * requested pcb and reload.  Copying is easier because we would
-	 * have to handle h/w bugs for reloading.  We used to lose the
-	 * parent's npx state for forks by forgetting to reload.
-	 */
-	pushfl
-	CLI
-	movl	PCPU(FPCURTHREAD),%eax
-	testl	%eax,%eax
-	je	1f
-
-	pushl	%ecx
-	movl	TD_PCB(%eax),%eax
-	movl	PCB_SAVEFPU(%eax),%eax
-	pushl	%eax
-	pushl	%eax
-	call	npxsave
-	addl	$4,%esp
-	popl	%eax
-	popl	%ecx
-
-	pushl	$PCB_SAVEFPU_SIZE
-	leal	PCB_USERFPU(%ecx),%ecx
-	pushl	%ecx
-	pushl	%eax
-	call	bcopy
-	addl	$12,%esp
-1:
-	popfl
-#endif	/* DEV_NPX */
-
-	ret
-END(savectx)
-
-/*
- * suspendctx(pcb)
- * Update pcb, suspending current processor state.
- */
-ENTRY(suspendctx)
-	/* Fetch PCB. */
-	movl	4(%esp),%ecx
-
-	/* Save context by calling savectx(). */
-	pushl	%ecx
-	call	savectx
-	addl	$4,%esp
-
-	/* Fetch PCB again. */
-	movl	4(%esp),%ecx
-
-	/* Update caller's return address and stack pointer. */
-	movl	(%esp),%eax
-	movl	%eax,PCB_EIP(%ecx)
-	movl	%esp,PCB_ESP(%ecx)
-
-	/* Save other registers and descriptor tables. */
 	movl	%cr0,%eax
 	movl	%eax,PCB_CR0(%ecx)
 	movl	%cr2,%eax
@@ -482,16 +419,13 @@
 
 	movl	$1,%eax
 	ret
-END(suspendctx)
+END(savectx)
 
 /*
- * resumectx(pcb in %esi)
+ * resumectx(pcb) __fastcall
  * Resuming processor state from pcb.
  */
 ENTRY(resumectx)
-	/* Fetch PCB. */
-	movl	%esi,%ecx
-
 	/* Restore GDT. */
 	lgdt	PCB_GDT(%ecx)
 
@@ -547,10 +481,6 @@
 	movl	PCB_DR7(%ecx),%eax
 	movl	%eax,%dr7
 
-#ifdef DEV_NPX
-	/* XXX FIX ME */
-#endif
-
 	/* Restore other registers */
 	movl	PCB_EDI(%ecx),%edi
 	movl	PCB_ESI(%ecx),%esi


Property changes on: trunk/sys/i386/i386/swtch.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/symbols.raw
===================================================================
--- trunk/sys/i386/i386/symbols.raw	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/symbols.raw	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,7 +1,7 @@
 #	@(#)symbols.raw	7.6 (Berkeley) 5/8/91
 #
-# $FreeBSD$
-#
+# $FreeBSD: stable/10/sys/i386/i386/symbols.raw 253361 2013-07-15 12:18:36Z glebius $
+# $MidnightBSD$
 
 
 #gdb
@@ -43,7 +43,6 @@
 	_averunnable
 	_boottime
 #netstat
-	_mbstat
 	_ipstat
 	_tcb
 	_tcpstat


Property changes on: trunk/sys/i386/i386/symbols.raw
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/sys_machdep.c
===================================================================
--- trunk/sys/i386/i386/sys_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/sys_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -30,13 +31,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 306961 2016-10-10 11:53:54Z tijl $");
 
 #include "opt_capsicum.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -88,6 +89,27 @@
 	union descriptor *descs);
 static int i386_ldt_grow(struct thread *td, int len);
 
+void
+fill_based_sd(struct segment_descriptor *sdp, uint32_t base)
+{
+
+	sdp->sd_lobase = base & 0xffffff;
+	sdp->sd_hibase = (base >> 24) & 0xff;
+#ifdef XEN
+	/* need to do nosegneg like Linux */
+	sdp->sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
+#else			
+	sdp->sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
+#endif
+	sdp->sd_hilimit = 0xf;
+	sdp->sd_type = SDT_MEMRWA;
+	sdp->sd_dpl = SEL_UPL;
+	sdp->sd_p = 1;
+	sdp->sd_xx = 0;
+	sdp->sd_def32 = 1;
+	sdp->sd_gran = 1;
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct sysarch_args {
 	int op;
@@ -105,6 +127,7 @@
 	union {
 		struct i386_ldt_args largs;
 		struct i386_ioperm_args iargs;
+		struct i386_get_xfpustate xfpu;
 	} kargs;
 	uint32_t base;
 	struct segment_descriptor sd, *sdp;
@@ -126,10 +149,15 @@
 		case I386_SET_FSBASE:
 		case I386_GET_GSBASE:
 		case I386_SET_GSBASE:
+		case I386_GET_XFPUSTATE:
 			break;
 
 		case I386_SET_IOPERM:
 		default:
+#ifdef KTRACE
+			if (KTRPOINT(td, KTR_CAPFAIL))
+				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
+#endif
 			return (ECAPMODE);
 		}
 	}
@@ -150,6 +178,11 @@
 		if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
 			return (EINVAL);
 		break;
+	case I386_GET_XFPUSTATE:
+		if ((error = copyin(uap->parms, &kargs.xfpu,
+		    sizeof(struct i386_get_xfpustate))) != 0)
+			return (error);
+		break;
 	default:
 		break;
 	}
@@ -160,18 +193,14 @@
 		break;
 	case I386_SET_LDT:
 		if (kargs.largs.descs != NULL) {
-			lp = (union descriptor *)kmem_alloc(kernel_map,
-			    kargs.largs.num * sizeof(union descriptor));
-			if (lp == NULL) {
-				error = ENOMEM;
-				break;
-			}
+			lp = (union descriptor *)malloc(
+			    kargs.largs.num * sizeof(union descriptor),
+			    M_TEMP, M_WAITOK);
 			error = copyin(kargs.largs.descs, lp,
 			    kargs.largs.num * sizeof(union descriptor));
 			if (error == 0)
 				error = i386_set_ldt(td, &kargs.largs, lp);
-			kmem_free(kernel_map, (vm_offset_t)lp,
-			    kargs.largs.num * sizeof(union descriptor));
+			free(lp, M_TEMP);
 		} else {
 			error = i386_set_ldt(td, &kargs.largs, NULL);
 		}
@@ -195,7 +224,7 @@
 		break;
 	case I386_SET_FSBASE:
 		error = copyin(uap->parms, &base, sizeof(base));
-		if (!error) {
+		if (error == 0) {
 			/*
 			 * Construct a descriptor and store it in the pcb for
 			 * the next context switch.  Also store it in the gdt
@@ -202,21 +231,7 @@
 			 * so that the load of tf_fs into %fs will activate it
 			 * at return to userland.
 			 */
-			sd.sd_lobase = base & 0xffffff;
-			sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
-			/* need to do nosegneg like Linux */
-			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else			
-			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
-#endif
-			sd.sd_hilimit = 0xf;
-			sd.sd_type  = SDT_MEMRWA;
-			sd.sd_dpl   = SEL_UPL;
-			sd.sd_p     = 1;
-			sd.sd_xx    = 0;
-			sd.sd_def32 = 1;
-			sd.sd_gran  = 1;
+			fill_based_sd(&sd, base);
 			critical_enter();
 			td->td_pcb->pcb_fsd = sd;
 #ifdef XEN
@@ -236,28 +251,13 @@
 		break;
 	case I386_SET_GSBASE:
 		error = copyin(uap->parms, &base, sizeof(base));
-		if (!error) {
+		if (error == 0) {
 			/*
 			 * Construct a descriptor and store it in the pcb for
 			 * the next context switch.  Also store it in the gdt
 			 * because we have to do a load_gs() right now.
 			 */
-			sd.sd_lobase = base & 0xffffff;
-			sd.sd_hibase = (base >> 24) & 0xff;
-
-#ifdef XEN
-			/* need to do nosegneg like Linux */
-			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else	
-			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
-#endif
-			sd.sd_hilimit = 0xf;
-			sd.sd_type  = SDT_MEMRWA;
-			sd.sd_dpl   = SEL_UPL;
-			sd.sd_p     = 1;
-			sd.sd_xx    = 0;
-			sd.sd_def32 = 1;
-			sd.sd_gran  = 1;
+			fill_based_sd(&sd, base);
 			critical_enter();
 			td->td_pcb->pcb_gsd = sd;
 #ifdef XEN
@@ -270,6 +270,14 @@
 			load_gs(GSEL(GUGS_SEL, SEL_UPL));
 		}
 		break;
+	case I386_GET_XFPUSTATE:
+		if (kargs.xfpu.len > cpu_max_ext_state_size -
+		    sizeof(union savefpu))
+			return (EINVAL);
+		npxgetregs(td);
+		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
+		    kargs.xfpu.addr, kargs.xfpu.len);
+		break;
 	default:
 		error = EINVAL;
 		break;
@@ -294,13 +302,10 @@
 		0			/* granularity */
 	};
 
-	ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
-	if (ext == 0)
-		return (ENOMEM);
-	bzero(ext, sizeof(struct pcb_ext)); 
+	ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
+	    M_WAITOK | M_ZERO);
 	/* -16 is so we can convert a trapframe into vm86trapframe inplace */
-	ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
-	    sizeof(struct pcb) - 16;
+	ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16;
 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	/*
 	 * The last byte of the i/o map must be followed by an 0xff byte.
@@ -340,8 +345,9 @@
 	struct thread *td;
 	struct i386_ioperm_args *uap;
 {
-	int i, error;
 	char *iomap;
+	u_int i;
+	int error;
 
 	if ((error = priv_check(td, PRIV_IO)) != 0)
 		return (error);
@@ -359,7 +365,8 @@
 			return (error);
 	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
 
-	if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
+	if (uap->start > uap->start + uap->length ||
+	    uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	for (i = uap->start; i < uap->start + uap->length; i++) {
@@ -467,13 +474,8 @@
                 M_SUBPROC, M_WAITOK); 
  
         new_ldt->ldt_len = len = NEW_MAX_LD(len); 
-        new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 
-                round_page(len * sizeof(union descriptor))); 
-        if (new_ldt->ldt_base == NULL) { 
-                free(new_ldt, M_SUBPROC);
-		mtx_lock_spin(&dt_lock);
-                return (NULL);
-        } 
+        new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 
+	    round_page(len * sizeof(union descriptor)), M_WAITOK);
         new_ldt->ldt_refcnt = 1; 
         new_ldt->ldt_active = 0; 
  
@@ -507,13 +509,8 @@
 		M_SUBPROC, M_WAITOK);
 
 	new_ldt->ldt_len = len = NEW_MAX_LD(len);
-	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
-		len * sizeof(union descriptor));
-	if (new_ldt->ldt_base == NULL) {
-		free(new_ldt, M_SUBPROC);
-		mtx_lock_spin(&dt_lock);
-		return (NULL);
-	}
+	new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
+	    len * sizeof(union descriptor), M_WAITOK | M_ZERO);
 	new_ldt->ldt_refcnt = 1;
 	new_ldt->ldt_active = 0;
 
@@ -570,7 +567,7 @@
 	mtx_assert(&dt_lock, MA_OWNED);
 	if (--pldt->ldt_refcnt == 0) {
 		mtx_unlock_spin(&dt_lock);
-		kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+		kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
 			pldt->ldt_len * sizeof(union descriptor));
 		free(pldt, M_SUBPROC);
 	} else
@@ -849,7 +846,7 @@
 				 * free the new object and return.
 				 */
 				mtx_unlock_spin(&dt_lock);
-				kmem_free(kernel_map,
+				kmem_free(kernel_arena,
 				   (vm_offset_t)new_ldt->ldt_base,
 				   new_ldt->ldt_len * sizeof(union descriptor));
 				free(new_ldt, M_SUBPROC);
@@ -883,7 +880,7 @@
 		mtx_unlock_spin(&dt_lock);
 #endif
 		if (old_ldt_base != NULL_LDT_BASE) {
-			kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
+			kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
 			    old_ldt_len * sizeof(union descriptor));
 			free(new_ldt, M_SUBPROC);
 		}

Modified: trunk/sys/i386/i386/trap.c
===================================================================
--- trunk/sys/i386/i386/trap.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/trap.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
@@ -38,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/trap.c 333370 2018-05-08 17:05:39Z emaste $");
 
 /*
  * 386 Trap and System call handling
@@ -105,29 +106,6 @@
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
-
-/*
- * This is a hook which is initialised by the dtrace module
- * to handle traps which might occur during DTrace probe
- * execution.
- */
-dtrace_trap_func_t	dtrace_trap_func;
-
-dtrace_doubletrap_func_t	dtrace_doubletrap_func;
-
-/*
- * This is a hook which is initialised by the systrace module
- * when it is loaded. This keeps the DTrace syscall provider
- * implementation opaque. 
- */
-systrace_probe_func_t	systrace_probe_func;
-
-/*
- * These hooks are necessary for the pid, usdt and fasttrap providers.
- */
-dtrace_fasttrap_probe_ptr_t	dtrace_fasttrap_probe_ptr;
-dtrace_pid_probe_ptr_t		dtrace_pid_probe_ptr;
-dtrace_return_probe_ptr_t	dtrace_return_probe_ptr;
 #endif
 
 extern void trap(struct trapframe *frame);
@@ -139,7 +117,9 @@
 
 extern inthand_t IDTVEC(lcall_syscall);
 
-#define MAX_TRAP_MSG		33
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
+#define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
@@ -174,12 +154,10 @@
 	"reserved (unknown) fault",		/* 30 T_RESERVED */
 	"",					/* 31 unused (reserved) */
 	"DTrace pid return trap",               /* 32 T_DTRACE_RET */
-	"DTrace fasttrap probe trap",           /* 33 T_DTRACE_PROBE */
-
 };
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
+int has_f00f_bug = 0;		/* Initialized so that it can be patched. */
 #endif
 
 #ifdef KDB
@@ -210,6 +188,9 @@
 void
 trap(struct trapframe *frame)
 {
+#ifdef KDTRACE_HOOKS
+	struct reg regs;
+#endif
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	int i = 0, ucode = 0, code;
@@ -266,33 +247,10 @@
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
-	 * a flag in it's per-cpu flags to indicate that it doesn't
+	 * a flag in its per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
-	 *
-	 * If the DTrace kernel module has registered a trap handler,
-	 * call it and if it returns non-zero, assume that it has
-	 * handled the trap and modified the trap frame so that this
-	 * function can return normally.
 	 */
-	if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
-	    type == T_BPTFLT) {
-		struct reg regs;
-
-		fill_frame_regs(frame, &regs);
-		if (type == T_DTRACE_PROBE &&
-		    dtrace_fasttrap_probe_ptr != NULL &&
-		    dtrace_fasttrap_probe_ptr(&regs) == 0)
-			goto out;
-		if (type == T_BPTFLT &&
-		    dtrace_pid_probe_ptr != NULL &&
-		    dtrace_pid_probe_ptr(&regs) == 0)
-			goto out;
-		if (type == T_DTRACE_RET &&
-		    dtrace_return_probe_ptr != NULL &&
-		    dtrace_return_probe_ptr(&regs) == 0)
-			goto out;
-	}
 	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
 	    dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
 		goto out;
@@ -310,8 +268,9 @@
 			uprintf(
 			    "pid %ld (%s): trap %d with interrupts disabled\n",
 			    (long)curproc->p_pid, curthread->td_name, type);
-		else if (type != T_BPTFLT && type != T_TRCTRAP &&
-			 frame->tf_eip != (int)cpu_switch_load_gs) {
+		else if (type != T_NMI && type != T_BPTFLT &&
+		    type != T_TRCTRAP &&
+		    frame->tf_eip != (int)cpu_switch_load_gs) {
 			/*
 			 * XXX not quite right, since this may be for a
 			 * multiple fault in user mode.
@@ -321,9 +280,9 @@
 			/*
 			 * Page faults need interrupts disabled until later,
 			 * and we shouldn't enable interrupts while holding
-			 * a spin lock or if servicing an NMI.
+			 * a spin lock.
 			 */
-			if (type != T_NMI && type != T_PAGEFLT &&
+			if (type != T_PAGEFLT &&
 			    td->td_md.md_spinlock_count == 0)
 				enable_intr();
 		}
@@ -365,6 +324,14 @@
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			enable_intr();
+#ifdef KDTRACE_HOOKS
+			if (type == T_BPTFLT) {
+				fill_frame_regs(frame, &regs);
+				if (dtrace_pid_probe_ptr != NULL &&
+				    dtrace_pid_probe_ptr(&regs) == 0)
+					goto out;
+			}
+#endif
 			frame->tf_eflags &= ~PSL_T;
 			i = SIGTRAP;
 			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
@@ -405,6 +372,10 @@
 			i = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
+		case T_ALIGNFLT:
+			i = SIGBUS;
+			ucode = BUS_ADRALN;
+			break;
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			i = SIGBUS;
@@ -501,7 +472,7 @@
 				goto userout;
 			} else if (panic_on_nmi)
 				panic("NMI indicates hardware failure");
-			break;
+			goto out;
 #endif /* POWERFAIL_NMI */
 #endif /* DEV_ISA */
 
@@ -544,6 +515,15 @@
 #endif
 			i = SIGFPE;
 			break;
+#ifdef KDTRACE_HOOKS
+		case T_DTRACE_RET:
+			enable_intr();
+			fill_frame_regs(frame, &regs);
+			if (dtrace_return_probe_ptr != NULL &&
+			    dtrace_return_probe_ptr(&regs) == 0)
+				goto out;
+			goto userout;
+#endif
 		}
 	} else {
 		/* kernel trap */
@@ -557,8 +537,8 @@
 
 		case T_DNA:
 #ifdef DEV_NPX
-			KASSERT(!PCB_USER_FPU(td->td_pcb),
-			    ("Unregistered use of FPU in kernel"));
+			if (PCB_USER_FPU(td->td_pcb))
+				panic("Unregistered use of FPU in kernel");
 			if (npxdna())
 				goto out;
 #endif
@@ -590,11 +570,7 @@
 					vm86_trap((struct vm86frame *)frame);
 				goto out;
 			}
-			if (type == T_STKFLT)
-				break;
-
 			/* FALL THROUGH */
-
 		case T_SEGNPFLT:	/* segment not present fault */
 			if (curpcb->pcb_flags & PCB_VM86CALL)
 				break;
@@ -635,6 +611,9 @@
 				frame->tf_eip = (int)doreti_iret_fault;
 				goto out;
 			}
+			if (type == T_STKFLT)
+				break;
+
 			if (frame->tf_eip == (int)doreti_popl_ds) {
 				frame->tf_eip = (int)doreti_popl_ds_fault;
 				goto out;
@@ -707,7 +686,35 @@
 				load_dr6(rdr6() & 0xfffffff0);
 				goto out;
 			}
+
 			/*
+			 * Malicious user code can configure a debug
+			 * register watchpoint to trap on data access
+			 * to the top of stack and then execute 'pop
+			 * %ss; int 3'.  Due to exception deferral for
+			 * 'pop %ss', the CPU will not interrupt 'int
+			 * 3' to raise the DB# exception for the debug
+			 * register but will postpone the DB# until
+			 * execution of the first instruction of the
+			 * BP# handler (in kernel mode).  Normally the
+			 * previous check would ignore DB# exceptions
+			 * for watchpoints on user addresses raised in
+			 * kernel mode.  However, some CPU errata
+			 * include cases where DB# exceptions do not
+			 * properly set bits in %dr6, e.g. Haswell
+			 * HSD23 and Skylake-X SKZ24.
+			 *
+			 * A deferred DB# can also be raised on the
+			 * first instructions of system call entry
+			 * points or single-step traps via similar use
+			 * of 'pop %ss' or 'mov xxx, %ss'.
+			 */
+			if (frame->tf_eip ==
+			    (uintptr_t)IDTVEC(int0x80_syscall) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+				return;
+			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */
 		case T_BPTFLT:
@@ -794,7 +801,6 @@
 
 user:
 	userret(td, frame);
-	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("Return from trap with kernel FPU ctx leaked"));
 userout:
@@ -809,7 +815,7 @@
 	vm_offset_t eva;
 {
 	vm_offset_t va;
-	struct vmspace *vm = NULL;
+	struct vmspace *vm;
 	vm_map_t map;
 	int rv = 0;
 	vm_prot_t ftype;
@@ -872,7 +878,7 @@
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
-			return -2;
+			return (-2);
 #endif
 		if (usermode)
 			goto nogo;
@@ -880,17 +886,21 @@
 		map = kernel_map;
 	} else {
 		/*
-		 * This is a fault on non-kernel virtual memory.
-		 * vm is initialized above to NULL. If curproc is NULL
-		 * or curproc->p_vmspace is NULL the fault is fatal.
+		 * This is a fault on non-kernel virtual memory.  If either
+		 * p or p->p_vmspace is NULL, then the fault is fatal.
 		 */
-		if (p != NULL)
-			vm = p->p_vmspace;
-
-		if (vm == NULL)
+		if (p == NULL || (vm = p->p_vmspace) == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
+
+		/*
+		 * When accessing a user-space address, kernel must be
+		 * ready to accept the page fault, and provide a
+		 * handling routine.  Since accessing the address
+		 * without the handler is a bug, do not try to handle
+		 * it normally, and panic immediately.
+		 */
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    curpcb->pcb_onfault == NULL)) {
 			trap_fatal(frame, eva);
@@ -899,12 +909,20 @@
 	}
 
 	/*
+	 * If the trap was caused by errant bits in the PTE then panic.
+	 */
+	if (frame->tf_err & PGEX_RSV) {
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+
+	/*
 	 * PGEX_I is defined only if the execute disable bit capability is
 	 * supported and enabled.
 	 */
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_WRITE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
 		ftype = VM_PROT_EXECUTE;
 #endif
@@ -957,8 +975,7 @@
 		trap_fatal(frame, eva);
 		return (-1);
 	}
-
-	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
@@ -970,6 +987,9 @@
 	u_int type;
 	struct soft_segment_descriptor softseg;
 	char *msg;
+#ifdef KDB
+	bool handled;
+#endif
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
@@ -989,9 +1009,15 @@
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
-		printf("fault code		= %s %s, %s\n",
+		printf("fault code		= %s %s%s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
+#if defined(PAE) || defined(PAE_TABLES)
+			pg_nx != 0 ?
+			(code & PGEX_I ? " instruction" : " data") :
+#endif
+			"",
+			code & PGEX_RSV ? "reserved bits in PTE" :
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
@@ -1022,21 +1048,18 @@
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
-	printf("current process		= ");
-	if (curproc) {
-		printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name);
-	} else {
-		printf("Idle\n");
-	}
+	printf("current process		= %d (%s)\n",
+	    curproc->p_pid, curthread->td_name);
 
 #ifdef KDB
-	if (debugger_on_panic || kdb_active) {
+	if (debugger_on_panic) {
+		kdb_why = KDB_WHY_TRAP;
 		frame->tf_err = eva;	/* smuggle fault address to ddb */
-		if (kdb_trap(type, 0, frame)) {
-			frame->tf_err = code;	/* restore error code */
+		handled = kdb_trap(type, 0, frame);
+		frame->tf_err = code;	/* restore error code */
+		kdb_why = KDB_WHY_UNSET;
+		if (handled)
 			return;
-		}
-		frame->tf_err = code;		/* restore error code */
 	}
 #endif
 	printf("trap number		= %d\n", type);
@@ -1083,6 +1106,7 @@
 	struct proc *p;
 	struct trapframe *frame;
 	caddr_t params;
+	long tmp;
 	int error;
 
 	p = td->td_proc;
@@ -1098,7 +1122,10 @@
 		/*
 		 * Code is first argument, followed by actual args.
 		 */
-		sa->code = fuword(params);
+		error = fueword(params, &tmp);
+		if (error == -1)
+			return (EFAULT);
+		sa->code = tmp;
 		params += sizeof(int);
 	} else if (sa->code == SYS___syscall) {
 		/*
@@ -1105,7 +1132,10 @@
 		 * Like syscall, but code is a quad, so as to maintain
 		 * quad alignment for the rest of the arguments.
 		 */
-		sa->code = fuword(params);
+		error = fueword(params, &tmp);
+		if (error == -1)
+			return (EFAULT);
+		sa->code = tmp;
 		params += sizeof(quad_t);
 	}
 
@@ -1174,7 +1204,7 @@
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("System call %s returning with kernel FPU ctx leaked",
 	     syscallname(td->td_proc, sa.code)));
-	KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
+	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 	    ("System call %s returning with mangled pcb_save",
 	     syscallname(td->td_proc, sa.code)));
 

Modified: trunk/sys/i386/i386/uio_machdep.c
===================================================================
--- trunk/sys/i386/i386/uio_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/uio_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Alan L. Cox <alc at cs.rice.edu>
  * Copyright (c) 1982, 1986, 1991, 1993
@@ -16,7 +17,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -36,9 +37,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/uio_machdep.c 266312 2014-05-17 13:59:11Z ian $");
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -45,7 +47,6 @@
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sf_buf.h>
-#include <sys/systm.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>

Modified: trunk/sys/i386/i386/vm86.c
===================================================================
--- trunk/sys/i386/i386/vm86.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm86.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1997 Jonathan Lemon
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/vm86.c 332325 2018-04-09 13:01:43Z emaste $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -128,8 +129,7 @@
 }
 
 int
-vm86_emulate(vmf)
-	struct vm86frame *vmf;
+vm86_emulate(struct vm86frame *vmf)
 {
 	struct vm86_kernel *vm86;
 	caddr_t addr;
@@ -586,10 +586,7 @@
  * caller's cs:ip routine.  
  */
 int
-vm86_datacall(intnum, vmf, vmc)
-	int intnum;
-	struct vm86frame *vmf;
-	struct vm86context *vmc;
+vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
 {
 	pt_entry_t *pte = (pt_entry_t *)vm86paddr;
 	vm_paddr_t page;
@@ -634,11 +631,8 @@
 }
 
 int
-vm86_getptr(vmc, kva, sel, off)
-	struct vm86context *vmc;
-	vm_offset_t kva;
-	u_short *sel;
-	u_short *off;
+vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
+     u_short *off)
 {
 	int i;
 
@@ -650,13 +644,10 @@
 			return (1);
 		}
 	return (0);
-	panic("vm86_getptr: address not found");
 }
 	
 int
-vm86_sysarch(td, args)
-	struct thread *td;
-	char *args;
+vm86_sysarch(struct thread *td, char *args)
 {
 	int error = 0;
 	struct i386_vm86_args ua;

Modified: trunk/sys/i386/i386/vm86bios.s
===================================================================
--- trunk/sys/i386/i386/vm86bios.s	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm86bios.s	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Jonathan Lemon
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/vm86bios.s 282065 2015-04-27 08:02:12Z kib $
  */
 
 #include "opt_npx.h"
@@ -122,7 +123,7 @@
 	movl	SCR_NEWPTD(%edx),%eax	/* mapping for vm86 page table */
 	movl	%eax,0(%ebx)		/* ... install as PTD entry 0 */
 
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	movl	IdlePDPT,%ecx
 #endif
 	movl	%ecx,%cr3		/* new page tables */


Property changes on: trunk/sys/i386/i386/vm86bios.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/vm_machdep.c
===================================================================
--- trunk/sys/i386/i386/vm_machdep.c	2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm_machdep.c	2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
@@ -41,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/vm_machdep.c 332759 2018-04-19 06:20:53Z avg $");
 
 #include "opt_isa.h"
 #include "opt_npx.h"
@@ -106,10 +107,15 @@
 #define	NSFBUFS		(512 + maxusers * 16)
 #endif
 
-CTASSERT((struct thread **)OFFSETOF_CURTHREAD ==
-    &((struct pcpu *)NULL)->pc_curthread);
-CTASSERT((struct pcb **)OFFSETOF_CURPCB == &((struct pcpu *)NULL)->pc_curpcb);
+#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
 
+_Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
+    "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
+_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
+    "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
+
 static void	cpu_reset_real(void);
 #ifdef SMP
 static void	cpu_reset_proxy(void);
@@ -116,6 +122,18 @@
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
+
+static int nsfbufs;
+static int nsfbufspeak;
+static int nsfbufsused;
+
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
+    "Maximum number of sendfile(2) sf_bufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
+    "Number of sendfile(2) sf_bufs at peak usage");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
+    "Number of sendfile(2) sf_bufs in use");
+
 static void	sf_buf_init(void *arg);
 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
 
@@ -137,8 +155,55 @@
  */
 static struct mtx sf_buf_lock;
 
-extern int	_ucodesel, _udatasel;
+union savefpu *
+get_pcb_user_save_td(struct thread *td)
+{
+	vm_offset_t p;
+	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
+	KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
+	return ((union savefpu *)p);
+}
 
+union savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+	vm_offset_t p;
+
+	p = (vm_offset_t)(pcb + 1);
+	return ((union savefpu *)p);
+}
+
+struct pcb *
+get_pcb_td(struct thread *td)
+{
+	vm_offset_t p;
+
+	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
+	    sizeof(struct pcb);
+	return ((struct pcb *)p);
+}
+
+void *
+alloc_fpusave(int flags)
+{
+	void *res;
+#ifdef CPU_ENABLE_SSE
+	struct savefpu_ymm *sf;
+#endif
+
+	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
+#ifdef CPU_ENABLE_SSE
+	if (use_xsave) {
+		sf = (struct savefpu_ymm *)res;
+		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
+		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
+	}
+#endif
+	return (res);
+}
+
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
@@ -145,11 +210,7 @@
  * ready to run and return to user mode.
  */
 void
-cpu_fork(td1, p2, td2, flags)
-	register struct thread *td1;
-	register struct proc *p2;
-	struct thread *td2;
-	int flags;
+cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 {
 	register struct proc *p1;
 	struct pcb *pcb2;
@@ -188,8 +249,7 @@
 #endif
 
 	/* Point the pcb to the top of the stack */
-	pcb2 = (struct pcb *)(td2->td_kstack +
-	    td2->td_kstack_pages * PAGE_SIZE) - 1;
+	pcb2 = get_pcb_td(td2);
 	td2->td_pcb = pcb2;
 
 	/* Copy td1's pcb */
@@ -196,7 +256,9 @@
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Properly initialize pcb_save */
-	pcb2->pcb_save = &pcb2->pcb_user_save;
+	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+	bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
+	    cpu_max_ext_state_size);
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
@@ -230,7 +292,7 @@
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
 #else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
@@ -354,7 +416,7 @@
 		 * XXX do we need to move the TSS off the allocated pages
 		 * before freeing them?  (not done here)
 		 */
-		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
+		kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		pcb->pcb_ext = NULL;
 	}
@@ -373,12 +435,22 @@
 void
 cpu_thread_alloc(struct thread *td)
 {
+	struct pcb *pcb;
+#ifdef CPU_ENABLE_SSE
+	struct xstate_hdr *xhdr;
+#endif
 
-	td->td_pcb = (struct pcb *)(td->td_kstack +
-	    td->td_kstack_pages * PAGE_SIZE) - 1;
-	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
-	td->td_pcb->pcb_ext = NULL; 
-	td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
+	td->td_pcb = pcb = get_pcb_td(td);
+	td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+	pcb->pcb_ext = NULL; 
+	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
+#ifdef CPU_ENABLE_SSE
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
+		bzero(xhdr, sizeof(*xhdr));
+		xhdr->xstate_bv = xsave_mask;
+	}
+#endif
 }
 
 void
@@ -411,13 +483,7 @@
 		break;
 
 	default:
-		if (td->td_proc->p_sysent->sv_errsize) {
-			if (error >= td->td_proc->p_sysent->sv_errsize)
-				error = -1;	/* XXX */
-			else
-				error = td->td_proc->p_sysent->sv_errtbl[error];
-		}
-		td->td_frame->tf_eax = error;
+		td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error);
 		td->td_frame->tf_eflags |= PSL_C;
 		break;
 	}
@@ -444,8 +510,11 @@
 	 * values here.
 	 */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
-	pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE);
-	pcb2->pcb_save = &pcb2->pcb_user_save;
+	pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
+	    PCB_KERNNPX);
+	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
+	    cpu_max_ext_state_size);
 
 	/*
 	 * Create a new fresh stack for the new thread.
@@ -577,13 +646,11 @@
 static void
 cpu_reset_proxy()
 {
-	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
-		;	/* Wait for other cpu to see that we've started */
-	CPU_SETOF(cpu_reset_proxyid, &tcrp);
-	stop_cpus(tcrp);
+		ia32_pause(); /* Wait for other cpu to see that we've started */
+
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -605,7 +672,7 @@
 	cpuset_t map;
 	u_int cnt;
 
-	if (smp_active) {
+	if (smp_started) {
 		map = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &map);
 		CPU_NAND(&map, &stopped_cpus);
@@ -621,20 +688,22 @@
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
-			/* XXX: restart_cpus(1 << 0); */
 			CPU_SETOF(0, &started_cpus);
 			wmb();
 
 			cnt = 0;
-			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
+			while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
+				ia32_pause();
 				cnt++;	/* Wait for BSP to announce restart */
-			if (cpu_reset_proxy_active == 0)
+			}
+			if (cpu_reset_proxy_active == 0) {
 				printf("cpu_reset: Failed to restart BSP\n");
-			enable_intr();
-			cpu_reset_proxy_active = 2;
-
-			while (1);
-			/* NOTREACHED */
+			} else {
+				cpu_reset_proxy_active = 2;
+				while (1)
+					ia32_pause();
+				/* NOTREACHED */
+			}
 		}
 
 		DELAY(1000000);
@@ -750,7 +819,7 @@
 
 	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
 	TAILQ_INIT(&sf_buf_freelist);
-	sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
+	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
 	    M_NOWAIT | M_ZERO);
 	for (i = 0; i < nsfbufs; i++) {
@@ -785,7 +854,7 @@
 			 */
 			pmap_qenter(sf->kva, &m, 1);
 			pmap_invalidate_cache_range(sf->kva, sf->kva +
-			    PAGE_SIZE);
+			    PAGE_SIZE, FALSE);
 			ret = TRUE;
 			break;
 		}
@@ -832,7 +901,7 @@
 		if (flags & SFB_NOWAIT)
 			goto done;
 		sf_buf_alloc_want++;
-		mbstat.sf_allocwait++;
+		SFSTAT_INC(sf_allocwait);
 		error = msleep(&sf_buf_freelist, &sf_buf_lock,
 		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
 		sf_buf_alloc_want--;

Added: trunk/sys/i386/include/counter.h
===================================================================
--- trunk/sys/i386/include/counter.h	                        (rev 0)
+++ trunk/sys/i386/include/counter.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,179 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Konstantin Belousov <kib at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/i386/include/counter.h 252434 2013-07-01 02:48:27Z kib $
+ */
+
+#ifndef __MACHINE_COUNTER_H__
+#define __MACHINE_COUNTER_H__
+
+#include <sys/pcpu.h>
+#ifdef INVARIANTS
+#include <sys/proc.h>
+#endif
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#define	counter_enter()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_enter();			\
+} while (0)
+
+#define	counter_exit()	do {				\
+	if ((cpu_feature & CPUID_CX8) == 0)		\
+		critical_exit();			\
+} while (0)
+
+extern struct pcpu __pcpu[MAXCPU];
+
+static inline void
+counter_64_inc_8b(uint64_t *p, int64_t inc)
+{
+
+	__asm __volatile(
+	"movl	%%fs:(%%esi),%%eax\n\t"
+	"movl	%%fs:4(%%esi),%%edx\n"
+"1:\n\t"
+	"movl	%%eax,%%ebx\n\t"
+	"movl	%%edx,%%ecx\n\t"
+	"addl	(%%edi),%%ebx\n\t"
+	"adcl	4(%%edi),%%ecx\n\t"
+	"cmpxchg8b %%fs:(%%esi)\n\t"
+	"jnz	1b"
+	:
+	: "S" ((char *)p - (char *)&__pcpu[0]), "D" (&inc)
+	: "memory", "cc", "eax", "edx", "ebx", "ecx");
+}
+
+#ifdef IN_SUBR_COUNTER_C
+static inline uint64_t
+counter_u64_read_one_8b(uint64_t *p)
+{
+	uint32_t res_lo, res_high;
+
+	__asm __volatile(
+	"movl	%%eax,%%ebx\n\t"
+	"movl	%%edx,%%ecx\n\t"
+	"cmpxchg8b	(%2)"
+	: "=a" (res_lo), "=d"(res_high)
+	: "SD" (p)
+	: "cc", "ebx", "ecx");
+	return (res_lo + ((uint64_t)res_high << 32));
+}
+
+static inline uint64_t
+counter_u64_fetch_inline(uint64_t *p)
+{
+	uint64_t res;
+	int i;
+
+	res = 0;
+	if ((cpu_feature & CPUID_CX8) == 0) {
+		/*
+		 * The machines without cmpxchg8b are not SMP.
+		 * Disabling the preemption provides atomicity of the
+		 * counter reading, since update is done in the
+		 * critical section as well.
+		 */
+		critical_enter();
+		for (i = 0; i < mp_ncpus; i++) {
+			res += *(uint64_t *)((char *)p +
+			    sizeof(struct pcpu) * i);
+		}
+		critical_exit();
+	} else {
+		for (i = 0; i < mp_ncpus; i++)
+			res += counter_u64_read_one_8b((uint64_t *)((char *)p +
+			    sizeof(struct pcpu) * i));
+	}
+	return (res);
+}
+
+static inline void
+counter_u64_zero_one_8b(uint64_t *p)
+{
+
+	__asm __volatile(
+	"movl	(%0),%%eax\n\t"
+	"movl	4(%0),%%edx\n"
+	"xorl	%%ebx,%%ebx\n\t"
+	"xorl	%%ecx,%%ecx\n\t"
+"1:\n\t"
+	"cmpxchg8b	(%0)\n\t"
+	"jnz	1b"
+	:
+	: "SD" (p)
+	: "memory", "cc", "eax", "edx", "ebx", "ecx");
+}
+
+static void
+counter_u64_zero_one_cpu(void *arg)
+{
+	uint64_t *p;
+
+	p = (uint64_t *)((char *)arg + sizeof(struct pcpu) * PCPU_GET(cpuid));
+	counter_u64_zero_one_8b(p);
+}
+
+static inline void
+counter_u64_zero_inline(counter_u64_t c)
+{
+	int i;
+
+	if ((cpu_feature & CPUID_CX8) == 0) {
+		critical_enter();
+		for (i = 0; i < mp_ncpus; i++)
+			*(uint64_t *)((char *)c + sizeof(struct pcpu) * i) = 0;
+		critical_exit();
+	} else {
+		smp_rendezvous(smp_no_rendevous_barrier,
+		    counter_u64_zero_one_cpu, smp_no_rendevous_barrier, c);
+	}
+}
+#endif
+
+#define	counter_u64_add_protected(c, inc)	do {	\
+	if ((cpu_feature & CPUID_CX8) == 0) {		\
+		CRITICAL_ASSERT(curthread);		\
+		*(uint64_t *)zpcpu_get(c) += (inc);	\
+	} else						\
+		counter_64_inc_8b((c), (inc));		\
+} while (0)
+
+static inline void
+counter_u64_add(counter_u64_t c, int64_t inc)
+{
+
+	if ((cpu_feature & CPUID_CX8) == 0) {
+		critical_enter();
+		*(uint64_t *)zpcpu_get(c) += inc;
+		critical_exit();
+	} else {
+		counter_64_inc_8b(c, inc);
+	}
+}
+
+#endif	/* ! __MACHINE_COUNTER_H__ */


Property changes on: trunk/sys/i386/include/counter.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/i386/include/fdt.h
===================================================================
--- trunk/sys/i386/include/fdt.h	                        (rev 0)
+++ trunk/sys/i386/include/fdt.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,7 @@
+/* $MidnightBSD$ */
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: stable/10/sys/i386/include/fdt.h 250840 2013-05-21 03:05:49Z marcel $ */
+
+#include <x86/fdt.h>


Property changes on: trunk/sys/i386/include/fdt.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/i386/include/ofw_machdep.h
===================================================================
--- trunk/sys/i386/include/ofw_machdep.h	                        (rev 0)
+++ trunk/sys/i386/include/ofw_machdep.h	2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,7 @@
+/* $MidnightBSD$ */
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: stable/10/sys/i386/include/ofw_machdep.h 250840 2013-05-21 03:05:49Z marcel $ */
+
+#include <x86/ofw_machdep.h>


Property changes on: trunk/sys/i386/include/ofw_machdep.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property


More information about the Midnightbsd-cvs mailing list