[Midnightbsd-cvs] src [9998] trunk/sys/i386: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sun May 27 12:35:42 EDT 2018
Revision: 9998
http://svnweb.midnightbsd.org/src/?rev=9998
Author: laffer1
Date: 2018-05-27 12:35:41 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/i386/Makefile
trunk/sys/i386/acpica/Makefile
trunk/sys/i386/acpica/acpi_machdep.c
trunk/sys/i386/acpica/acpi_wakecode.S
trunk/sys/i386/bios/apm.c
trunk/sys/i386/bios/apm.h
trunk/sys/i386/bios/mca_machdep.c
trunk/sys/i386/bios/mca_machdep.h
trunk/sys/i386/bios/smapi.c
trunk/sys/i386/bios/smapi_bios.S
trunk/sys/i386/i386/apic_vector.s
trunk/sys/i386/i386/atomic.c
trunk/sys/i386/i386/atpic_vector.s
trunk/sys/i386/i386/autoconf.c
trunk/sys/i386/i386/bios.c
trunk/sys/i386/i386/bioscall.s
trunk/sys/i386/i386/bpf_jit_machdep.c
trunk/sys/i386/i386/bpf_jit_machdep.h
trunk/sys/i386/i386/db_disasm.c
trunk/sys/i386/i386/db_interface.c
trunk/sys/i386/i386/db_trace.c
trunk/sys/i386/i386/elan-mmcr.c
trunk/sys/i386/i386/elf_machdep.c
trunk/sys/i386/i386/exception.s
trunk/sys/i386/i386/gdb_machdep.c
trunk/sys/i386/i386/genassym.c
trunk/sys/i386/i386/geode.c
trunk/sys/i386/i386/i686_mem.c
trunk/sys/i386/i386/identcpu.c
trunk/sys/i386/i386/in_cksum.c
trunk/sys/i386/i386/initcpu.c
trunk/sys/i386/i386/io.c
trunk/sys/i386/i386/k6_mem.c
trunk/sys/i386/i386/legacy.c
trunk/sys/i386/i386/locore.s
trunk/sys/i386/i386/longrun.c
trunk/sys/i386/i386/machdep.c
trunk/sys/i386/i386/mem.c
trunk/sys/i386/i386/minidump_machdep.c
trunk/sys/i386/i386/mp_clock.c
trunk/sys/i386/i386/mp_machdep.c
trunk/sys/i386/i386/mp_watchdog.c
trunk/sys/i386/i386/mpboot.s
trunk/sys/i386/i386/perfmon.c
trunk/sys/i386/i386/pmap.c
trunk/sys/i386/i386/ptrace_machdep.c
trunk/sys/i386/i386/stack_machdep.c
trunk/sys/i386/i386/support.s
trunk/sys/i386/i386/swtch.s
trunk/sys/i386/i386/symbols.raw
trunk/sys/i386/i386/sys_machdep.c
trunk/sys/i386/i386/trap.c
trunk/sys/i386/i386/uio_machdep.c
trunk/sys/i386/i386/vm86.c
trunk/sys/i386/i386/vm86bios.s
trunk/sys/i386/i386/vm_machdep.c
Added Paths:
-----------
trunk/sys/i386/include/counter.h
trunk/sys/i386/include/fdt.h
trunk/sys/i386/include/ofw_machdep.h
Property Changed:
----------------
trunk/sys/i386/acpica/acpi_wakecode.S
trunk/sys/i386/bios/smapi_bios.S
trunk/sys/i386/i386/apic_vector.s
trunk/sys/i386/i386/atpic_vector.s
trunk/sys/i386/i386/bioscall.s
trunk/sys/i386/i386/exception.s
trunk/sys/i386/i386/locore.s
trunk/sys/i386/i386/mpboot.s
trunk/sys/i386/i386/support.s
trunk/sys/i386/i386/swtch.s
trunk/sys/i386/i386/symbols.raw
trunk/sys/i386/i386/vm86bios.s
Modified: trunk/sys/i386/Makefile
===================================================================
--- trunk/sys/i386/Makefile 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/Makefile 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,4 +1,5 @@
-# $FreeBSD$
+# $MidnightBSD$
+# $FreeBSD: stable/10/sys/i386/Makefile 224063 2011-07-15 17:27:26Z mckusick $
# @(#)Makefile 8.1 (Berkeley) 6/11/93
# Makefile for i386 links, tags file
Modified: trunk/sys/i386/acpica/Makefile
===================================================================
--- trunk/sys/i386/acpica/Makefile 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/Makefile 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+# $MidnightBSD$
# $FreeBSD: src/sys/i386/acpica/Makefile,v 1.8 2006/10/19 05:55:09 ru Exp $
# Correct path for kernel builds
Modified: trunk/sys/i386/acpica/acpi_machdep.c
===================================================================
--- trunk/sys/i386/acpica/acpi_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/acpi_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/i386/acpica/acpi_machdep.c 237822 2012-06-29 21:24:56Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/i386/acpica/acpi_machdep.c 246855 2013-02-15 22:43:08Z jkim $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -140,7 +140,7 @@
void *data;
off = pa & PAGE_MASK;
- length = roundup(length + off, PAGE_SIZE);
+ length = round_page(length + off);
pa = pa & PG_FRAME;
va = (vm_offset_t)pmap_kenter_temporary(pa, offset) +
(offset * PAGE_SIZE);
@@ -164,7 +164,7 @@
va = (vm_offset_t)data;
off = va & PAGE_MASK;
- length = roundup(length + off, PAGE_SIZE);
+ length = round_page(length + off);
va &= ~PAGE_MASK;
while (length > 0) {
pmap_kremove(va);
Modified: trunk/sys/i386/acpica/acpi_wakecode.S
===================================================================
--- trunk/sys/i386/acpica/acpi_wakecode.S 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/acpica/acpi_wakecode.S 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Takanori Watanabe <takawata at jp.freebsd.org>
* Copyright (c) 2001-2012 Mitsuru IWASAKI <iwasaki at jp.freebsd.org>
@@ -26,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: release/9.2.0/sys/i386/acpica/acpi_wakecode.S 235796 2012-05-22 17:44:01Z iwasaki $
+ * $FreeBSD: stable/10/sys/i386/acpica/acpi_wakecode.S 237027 2012-06-13 21:03:01Z jkim $
*/
#include <machine/asmacros.h>
@@ -142,8 +143,8 @@
mov %ax, %ds
/* Get PCB and return address. */
- movl wakeup_pcb - wakeup_start(%ebx), %esi
- movl wakeup_ret - wakeup_start(%ebx), %edi
+ movl wakeup_pcb - wakeup_start(%ebx), %ecx
+ movl wakeup_ret - wakeup_start(%ebx), %edx
/* Restore CR4 and CR3. */
movl wakeup_cr4 - wakeup_start(%ebx), %eax
@@ -166,7 +167,7 @@
jmp 1f
1:
/* Jump to return address. */
- jmp *%edi
+ jmp *%edx
.data
@@ -202,4 +203,7 @@
.long 0
wakeup_ret:
.long 0
+wakeup_gdt: /* not used */
+ .word 0
+ .long 0
dummy:
Property changes on: trunk/sys/i386/acpica/acpi_wakecode.S
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/bios/apm.c
===================================================================
--- trunk/sys/i386/bios/apm.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/apm.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* APM (Advanced Power Management) BIOS Device Driver
*
@@ -17,7 +18,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/apm.c 241885 2012-10-22 13:06:09Z eadler $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/i386/bios/apm.h
===================================================================
--- trunk/sys/i386/bios/apm.h 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/apm.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* APM (Advanced Power Management) BIOS Device Driver
*
@@ -15,7 +16,7 @@
*
* Sep, 1994 Implemented on FreeBSD 1.1.5.1R (Toshiba AVS001WD)
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/bios/apm.h 183328 2008-09-24 18:46:39Z jhb $
*/
#ifndef __APM_H__
Modified: trunk/sys/i386/bios/mca_machdep.c
===================================================================
--- trunk/sys/i386/bios/mca_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/mca_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Matthew N. Dodd <winter at jurai.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/mca_machdep.c 134582 2004-08-31 21:51:51Z mdodd $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/i386/bios/mca_machdep.h
===================================================================
--- trunk/sys/i386/bios/mca_machdep.h 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/mca_machdep.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Matthew N. Dodd <winter at jurai.net>
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/bios/mca_machdep.h 50823 1999-09-03 02:04:28Z mdodd $
*/
extern int MCA_system;
Modified: trunk/sys/i386/bios/smapi.c
===================================================================
--- trunk/sys/i386/bios/smapi.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/smapi.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2003 Matthew N. Dodd <winter at jurai.net>
* All rights reserved.
@@ -25,11 +26,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/smapi.c 299230 2016-05-08 09:02:51Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/bus.h>
@@ -79,7 +81,7 @@
.d_version = D_VERSION,
.d_ioctl = smapi_ioctl,
.d_name = "smapi",
- .d_flags = D_MEM | D_NEEDGIANT,
+ .d_flags = D_NEEDGIANT,
};
static void smapi_identify(driver_t *, device_t);
@@ -292,6 +294,7 @@
for (i = 0; i < count; i++) {
device_delete_child(device_get_parent(devs[i]), devs[i]);
}
+ free(devs, M_TEMP);
break;
default:
break;
Modified: trunk/sys/i386/bios/smapi_bios.S
===================================================================
--- trunk/sys/i386/bios/smapi_bios.S 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/bios/smapi_bios.S 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/bios/smapi_bios.S 239869 2012-08-29 18:22:52Z dim $");
/*
* This is cribbed from the Linux thinkpad-4.1 driver by
Property changes on: trunk/sys/i386/bios/smapi_bios.S
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/apic_vector.s
===================================================================
--- trunk/sys/i386/i386/apic_vector.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/apic_vector.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
@@ -28,7 +29,7 @@
* SUCH DAMAGE.
*
* from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/apic_vector.s 302041 2016-06-21 04:51:55Z sephe $
*/
/*
@@ -138,6 +139,25 @@
MEXITCOUNT
jmp doreti
+#ifdef XENHVM
+/*
+ * Xen event channel upcall interrupt handler.
+ * Only used when the hypervisor supports direct vector callbacks.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(xen_intr_upcall)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+ FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
+ call xen_intr_handle_upcall
+ add $4, %esp
+ MEXITCOUNT
+ jmp doreti
+#endif
+
#ifdef SMP
/*
* Global address space TLB shootdown.
@@ -144,40 +164,22 @@
*/
.text
SUPERALIGN_TEXT
-IDTVEC(invltlb)
- pushl %eax
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- movl %eax, %ds
-
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- movl %eax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
-#ifdef COUNT_XINVLTLB_HITS
- incl xhits_gbl(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
- movl ipi_invltlb_counts(,%eax,4),%eax
- incl (%eax)
-#endif
-#endif
-
- movl %cr3, %eax /* invalidate the TLB */
- movl %eax, %cr3
-
+invltlb_ret:
movl lapic, %eax
movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ POP_FRAME
+ iret
- lock
- incl smp_tlb_wait
+ SUPERALIGN_TEXT
+IDTVEC(invltlb)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
- popl %ds
- popl %eax
- iret
+ call invltlb_handler
+ jmp invltlb_ret
+
/*
* Single page TLB shootdown
*/
@@ -184,39 +186,14 @@
.text
SUPERALIGN_TEXT
IDTVEC(invlpg)
- pushl %eax
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- movl %eax, %ds
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- movl %eax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
-#ifdef COUNT_XINVLTLB_HITS
- incl xhits_pg(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
- movl ipi_invlpg_counts(,%eax,4),%eax
- incl (%eax)
-#endif
-#endif
+ call invlpg_handler
- movl smp_tlb_addr1, %eax
- invlpg (%eax) /* invalidate single page */
+ jmp invltlb_ret
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popl %ds
- popl %eax
- iret
-
/*
* Page range TLB shootdown.
*/
@@ -223,45 +200,14 @@
.text
SUPERALIGN_TEXT
IDTVEC(invlrng)
- pushl %eax
- pushl %edx
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- movl %eax, %ds
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
-#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- movl %eax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
-#ifdef COUNT_XINVLTLB_HITS
- incl xhits_rng(,%eax,4)
-#endif
-#ifdef COUNT_IPIS
- movl ipi_invlrng_counts(,%eax,4),%eax
- incl (%eax)
-#endif
-#endif
+ call invlrng_handler
- movl smp_tlb_addr1, %edx
- movl smp_tlb_addr2, %eax
-1: invlpg (%edx) /* invalidate single page */
- addl $PAGE_SIZE, %edx
- cmpl %eax, %edx
- jb 1b
+ jmp invltlb_ret
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popl %ds
- popl %edx
- popl %eax
- iret
-
/*
* Invalidate cache.
*/
@@ -268,33 +214,14 @@
.text
SUPERALIGN_TEXT
IDTVEC(invlcache)
- pushl %eax
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- movl %eax, %ds
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
-#ifdef COUNT_IPIS
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- movl %eax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
- movl ipi_invlcache_counts(,%eax,4),%eax
- incl (%eax)
-#endif
+ call invlcache_handler
- wbinvd
+ jmp invltlb_ret
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
-
- lock
- incl smp_tlb_wait
-
- popl %ds
- popl %eax
- iret
-
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
*/
Property changes on: trunk/sys/i386/i386/apic_vector.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/atomic.c
===================================================================
--- trunk/sys/i386/i386/atomic.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/atomic.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Peter Jeremy
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/atomic.c 119452 2003-08-25 09:48:48Z obrien $");
/* This file creates publically callable functions to perform various
* simple arithmetic on memory which is atomic in the presence of
Modified: trunk/sys/i386/i386/atpic_vector.s
===================================================================
--- trunk/sys/i386/i386/atpic_vector.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/atpic_vector.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
@@ -28,7 +29,7 @@
* SUCH DAMAGE.
*
* from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/atpic_vector.s 209483 2010-06-23 20:44:07Z kib $
*/
/*
Property changes on: trunk/sys/i386/i386/atpic_vector.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/autoconf.c
===================================================================
--- trunk/sys/i386/i386/autoconf.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/autoconf.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/autoconf.c 146794 2005-05-29 23:44:22Z marcel $");
/*
* Setup the system to run on the current machine.
Modified: trunk/sys/i386/i386/bios.c
===================================================================
--- trunk/sys/i386/i386/bios.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bios.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997 Michael Smith
* Copyright (c) 1998 Jonathan Lemon
@@ -26,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/bios.c 282065 2015-04-27 08:02:12Z kib $");
/*
* Code for dealing with the BIOS in x86 PC systems.
@@ -372,9 +373,11 @@
break;
default:
+ va_end(ap);
return (EINVAL);
}
}
+ va_end(ap);
if (flags & BIOSARGS_FLAG) {
if (arg_end - arg_start > ctob(16))
@@ -387,7 +390,7 @@
args->seg.code32.limit = 0xffff;
ptd = (pd_entry_t *)rcr3();
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if (ptd == IdlePDPT)
#else
if (ptd == IdlePTD)
@@ -448,9 +451,11 @@
break;
default:
+ va_end(ap);
return (EINVAL);
}
}
+ va_end(ap);
set_bios_selectors(&args->seg, flags);
bioscall_vector.vec16.offset = (u_short)args->entry;
Modified: trunk/sys/i386/i386/bioscall.s
===================================================================
--- trunk/sys/i386/i386/bioscall.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bioscall.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997 Jonathan Lemon
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/bioscall.s 103436 2002-09-17 01:49:00Z peter $
*/
/*
Property changes on: trunk/sys/i386/i386/bioscall.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/bpf_jit_machdep.c
===================================================================
--- trunk/sys/i386/i386/bpf_jit_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bpf_jit_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
* Copyright (C) 2005-2009 Jung-uk Kim <jkim at FreeBSD.org>
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/bpf_jit_machdep.c 207081 2010-04-22 23:47:19Z jkim $");
#ifdef _KERNEL
#include "opt_bpf.h"
Modified: trunk/sys/i386/i386/bpf_jit_machdep.h
===================================================================
--- trunk/sys/i386/i386/bpf_jit_machdep.h 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/bpf_jit_machdep.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
* Copyright (C) 2005-2009 Jung-uk Kim <jkim at FreeBSD.org>
@@ -28,7 +29,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/bpf_jit_machdep.h 207081 2010-04-22 23:47:19Z jkim $
*/
#ifndef _BPF_JIT_MACHDEP_H_
Modified: trunk/sys/i386/i386/db_disasm.c
===================================================================
--- trunk/sys/i386/i386/db_disasm.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_disasm.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Mach Operating System
* Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_disasm.c 280272 2015-03-19 23:13:19Z markj $");
/*
* Instruction disassembler.
@@ -195,6 +196,26 @@
/*0f*/ { "", FALSE, NONE, 0, 0 },
};
+static const struct inst db_inst_0f1x[] = {
+/*10*/ { "", FALSE, NONE, 0, 0 },
+/*11*/ { "", FALSE, NONE, 0, 0 },
+/*12*/ { "", FALSE, NONE, 0, 0 },
+/*13*/ { "", FALSE, NONE, 0, 0 },
+/*14*/ { "", FALSE, NONE, 0, 0 },
+/*15*/ { "", FALSE, NONE, 0, 0 },
+/*16*/ { "", FALSE, NONE, 0, 0 },
+/*17*/ { "", FALSE, NONE, 0, 0 },
+
+/*18*/ { "", FALSE, NONE, 0, 0 },
+/*19*/ { "", FALSE, NONE, 0, 0 },
+/*1a*/ { "", FALSE, NONE, 0, 0 },
+/*1b*/ { "", FALSE, NONE, 0, 0 },
+/*1c*/ { "", FALSE, NONE, 0, 0 },
+/*1d*/ { "", FALSE, NONE, 0, 0 },
+/*1e*/ { "", FALSE, NONE, 0, 0 },
+/*1f*/ { "nopl", TRUE, SDEP, 0, "nopw" },
+};
+
static const struct inst db_inst_0f2x[] = {
/*20*/ { "mov", TRUE, LONG, op2(CR,El), 0 },
/*21*/ { "mov", TRUE, LONG, op2(DR,El), 0 },
@@ -356,7 +377,7 @@
static const struct inst * const db_inst_0f[] = {
db_inst_0f0x,
- 0,
+ db_inst_0f1x,
db_inst_0f2x,
db_inst_0f3x,
db_inst_0f4x,
@@ -782,7 +803,7 @@
/*c7*/ { "mov", TRUE, LONG, op2(I, E), 0 },
/*c8*/ { "enter", FALSE, NONE, op2(Iw, Ib), 0 },
-/*c9*/ { "leave", FALSE, NONE, 0, 0 },
+/*c9*/ { "leave", FALSE, NONE, 0, 0 },
/*ca*/ { "lret", FALSE, NONE, op1(Iw), 0 },
/*cb*/ { "lret", FALSE, NONE, 0, 0 },
/*cc*/ { "int", FALSE, NONE, op1(o3), 0 },
@@ -1266,7 +1287,7 @@
case 0xc8:
i_name = "monitor";
i_size = NONE;
- i_mode = 0;
+ i_mode = 0;
break;
case 0xc9:
i_name = "mwait";
Modified: trunk/sys/i386/i386/db_interface.c
===================================================================
--- trunk/sys/i386/i386/db_interface.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_interface.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Mach Operating System
* Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_interface.c 139724 2005-01-05 19:10:48Z imp $");
/*
* Interface to new debugger.
Modified: trunk/sys/i386/i386/db_trace.c
===================================================================
--- trunk/sys/i386/i386/db_trace.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/db_trace.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Mach Operating System
* Copyright (c) 1991,1990 Carnegie Mellon University
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/db_trace.c 290731 2015-11-12 23:49:47Z jhb $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -48,16 +49,10 @@
#include <ddb/db_sym.h>
#include <ddb/db_variables.h>
-static db_varfcn_t db_dr0;
-static db_varfcn_t db_dr1;
-static db_varfcn_t db_dr2;
-static db_varfcn_t db_dr3;
-static db_varfcn_t db_dr4;
-static db_varfcn_t db_dr5;
-static db_varfcn_t db_dr6;
-static db_varfcn_t db_dr7;
static db_varfcn_t db_esp;
static db_varfcn_t db_frame;
+static db_varfcn_t db_frame_seg;
+static db_varfcn_t db_gs;
static db_varfcn_t db_ss;
/*
@@ -65,10 +60,11 @@
*/
#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x)
struct db_variable db_regs[] = {
- { "cs", DB_OFFSET(tf_cs), db_frame },
- { "ds", DB_OFFSET(tf_ds), db_frame },
- { "es", DB_OFFSET(tf_es), db_frame },
- { "fs", DB_OFFSET(tf_fs), db_frame },
+ { "cs", DB_OFFSET(tf_cs), db_frame_seg },
+ { "ds", DB_OFFSET(tf_ds), db_frame_seg },
+ { "es", DB_OFFSET(tf_es), db_frame_seg },
+ { "fs", DB_OFFSET(tf_fs), db_frame_seg },
+ { "gs", NULL, db_gs },
{ "ss", NULL, db_ss },
{ "eax", DB_OFFSET(tf_eax), db_frame },
{ "ecx", DB_OFFSET(tf_ecx), db_frame },
@@ -80,41 +76,9 @@
{ "edi", DB_OFFSET(tf_edi), db_frame },
{ "eip", DB_OFFSET(tf_eip), db_frame },
{ "efl", DB_OFFSET(tf_eflags), db_frame },
-#define DB_N_SHOW_REGS 15 /* Don't show registers after here. */
- { "dr0", NULL, db_dr0 },
- { "dr1", NULL, db_dr1 },
- { "dr2", NULL, db_dr2 },
- { "dr3", NULL, db_dr3 },
- { "dr4", NULL, db_dr4 },
- { "dr5", NULL, db_dr5 },
- { "dr6", NULL, db_dr6 },
- { "dr7", NULL, db_dr7 },
};
-struct db_variable *db_eregs = db_regs + DB_N_SHOW_REGS;
+struct db_variable *db_eregs = db_regs + nitems(db_regs);
-#define DB_DRX_FUNC(reg) \
-static int \
-db_ ## reg (vp, valuep, op) \
- struct db_variable *vp; \
- db_expr_t * valuep; \
- int op; \
-{ \
- if (op == DB_VAR_GET) \
- *valuep = r ## reg (); \
- else \
- load_ ## reg (*valuep); \
- return (1); \
-}
-
-DB_DRX_FUNC(dr0)
-DB_DRX_FUNC(dr1)
-DB_DRX_FUNC(dr2)
-DB_DRX_FUNC(dr3)
-DB_DRX_FUNC(dr4)
-DB_DRX_FUNC(dr5)
-DB_DRX_FUNC(dr6)
-DB_DRX_FUNC(dr7)
-
static __inline int
get_esp(struct trapframe *tf)
{
@@ -139,6 +103,22 @@
}
static int
+db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+ uint16_t *reg;
+
+ if (kdb_frame == NULL)
+ return (0);
+
+ reg = (uint16_t *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
+ if (op == DB_VAR_GET)
+ *valuep = *reg;
+ else
+ *reg = *valuep;
+ return (1);
+}
+
+static int
db_esp(struct db_variable *vp, db_expr_t *valuep, int op)
{
@@ -153,6 +133,17 @@
}
static int
+db_gs(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+
+ if (op == DB_VAR_GET)
+ *valuep = rgs();
+ else
+ load_gs(*valuep);
+ return (1);
+}
+
+static int
db_ss(struct db_variable *vp, db_expr_t *valuep, int op)
{
@@ -390,7 +381,7 @@
static int
db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
- db_addr_t pc, int count)
+ db_addr_t pc, register_t sp, int count)
{
struct i386_frame *actframe;
#define MAXNARG 16
@@ -447,7 +438,21 @@
*/
actframe = frame;
if (first) {
- if (tf != NULL) {
+ first = FALSE;
+ if (sym == C_DB_SYM_NULL && sp != 0) {
+ /*
+ * If a symbol couldn't be found, we've probably
+ * jumped to a bogus location, so try and use
+ * the return address to find our caller.
+ */
+ db_print_stack_entry(name, 0, 0, 0, pc,
+ NULL);
+ pc = db_get_value(sp, 4, FALSE);
+ if (db_search_symbol(pc, DB_STGY_PROC,
+ &offset) == C_DB_SYM_NULL)
+ break;
+ continue;
+ } else if (tf != NULL) {
instr = db_get_value(pc, 4, FALSE);
if ((instr & 0xffffff) == 0x00e58955) {
/* pushl %ebp; movl %esp, %ebp */
@@ -475,7 +480,6 @@
actframe);
break;
}
- first = FALSE;
}
argp = &actframe->f_arg0;
@@ -522,7 +526,7 @@
frame = (struct i386_frame *)ebp;
callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE);
frame = frame->f_frame;
- db_backtrace(curthread, NULL, frame, callpc, -1);
+ db_backtrace(curthread, NULL, frame, callpc, 0, -1);
}
int
@@ -529,10 +533,12 @@
db_trace_thread(struct thread *thr, int count)
{
struct pcb *ctx;
+ struct trapframe *tf;
ctx = kdb_thr_ctx(thr);
- return (db_backtrace(thr, NULL, (struct i386_frame *)ctx->pcb_ebp,
- ctx->pcb_eip, count));
+ tf = thr == kdb_thread ? kdb_frame : NULL;
+ return (db_backtrace(thr, tf, (struct i386_frame *)ctx->pcb_ebp,
+ ctx->pcb_eip, ctx->pcb_esp, count));
}
int
Modified: trunk/sys/i386/i386/elan-mmcr.c
===================================================================
--- trunk/sys/i386/i386/elan-mmcr.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/elan-mmcr.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* ----------------------------------------------------------------------------
* "THE BEER-WARE LICENSE" (Revision 42):
@@ -39,7 +40,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/elan-mmcr.c 214346 2010-10-25 15:28:03Z jhb $");
#include "opt_cpu.h"
#include <sys/param.h>
Modified: trunk/sys/i386/i386/elf_machdep.c
===================================================================
--- trunk/sys/i386/i386/elf_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/elf_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 1996-1998 John D. Polstra.
* All rights reserved.
@@ -24,8 +25,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/elf_machdep.c 294136 2016-01-16 07:56:49Z dchagin $");
+#include "opt_cpu.h"
+
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
@@ -45,7 +48,12 @@
#include <machine/elf.h>
#include <machine/md_var.h>
+#include <machine/npx.h>
+#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+
struct sysentvec elf32_freebsd_sysvec = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
@@ -81,6 +89,8 @@
.sv_shared_page_base = SHAREDPAGE,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
+ .sv_trap = NULL,
};
INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
@@ -116,14 +126,49 @@
(sysinit_cfunc_t) elf32_insert_brand_entry,
&freebsd_brand_oinfo);
+static Elf32_Brandinfo kfreebsd_brand_info = {
+ .brand = ELFOSABI_FREEBSD,
+ .machine = EM_386,
+ .compat_3_brand = "FreeBSD",
+ .emul_path = NULL,
+ .interp_path = "/lib/ld.so.1",
+ .sysvec = &elf32_freebsd_sysvec,
+ .interp_newpath = NULL,
+ .brand_note = &elf32_kfreebsd_brandnote,
+ .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
+};
+SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
+ (sysinit_cfunc_t) elf32_insert_brand_entry,
+ &kfreebsd_brand_info);
+
+
void
-elf32_dump_thread(struct thread *td __unused, void *dst __unused,
- size_t *off __unused)
+elf32_dump_thread(struct thread *td, void *dst, size_t *off)
{
+#ifdef CPU_ENABLE_SSE
+ void *buf;
+#endif
+ size_t len;
+
+ len = 0;
+#ifdef CPU_ENABLE_SSE
+ if (use_xsave) {
+ if (dst != NULL) {
+ npxgetregs(td);
+ len += elf32_populate_note(NT_X86_XSTATE,
+ get_pcb_user_save_td(td), dst,
+ cpu_max_ext_state_size, &buf);
+ *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
+ xsave_mask;
+ } else
+ len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
+ cpu_max_ext_state_size, NULL);
+ }
+#endif
+ *off = len;
}
-
/* Process one elf relocation with addend. */
static int
elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
@@ -135,6 +180,7 @@
Elf_Word rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -170,8 +216,8 @@
break;
case R_386_32: /* S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
addr += addend;
if (*where != addr)
@@ -179,8 +225,8 @@
break;
case R_386_PC32: /* S + A - P */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
addr += addend - (Elf_Addr)where;
if (*where != addr)
@@ -197,8 +243,8 @@
break;
case R_386_GLOB_DAT: /* S */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
if (*where != addr)
*where = addr;
Modified: trunk/sys/i386/i386/exception.s
===================================================================
--- trunk/sys/i386/i386/exception.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/exception.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
@@ -31,7 +32,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/exception.s 322755 2017-08-21 15:44:57Z kib $
*/
#include "opt_apic.h"
@@ -54,13 +55,13 @@
.globl dtrace_invop_jump_addr
.align 4
.type dtrace_invop_jump_addr, @object
- .size dtrace_invop_jump_addr, 4
+ .size dtrace_invop_jump_addr, 4
dtrace_invop_jump_addr:
.zero 4
.globl dtrace_invop_calltrap_addr
.align 4
.type dtrace_invop_calltrap_addr, @object
- .size dtrace_invop_calltrap_addr, 4
+ .size dtrace_invop_calltrap_addr, 4
dtrace_invop_calltrap_addr:
.zero 8
#endif
@@ -75,22 +76,22 @@
* Trap and fault vector routines.
*
* Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts. A few of the traps we are use are interrupt gates,
* SDT_SYS386IGT, which are nearly the same thing except interrupts are
* disabled on entry.
*
* The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
+ * the current process. The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not. See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
* the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu. The cpu
- * will also push an 'error' code for certain traps. We push a dummy
- * error code for those traps where the cpu doesn't in order to maintain
+ * and the return instruction pointer are pushed by the cpu. The cpu
+ * will also push an 'error' code for certain traps. We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
* a consistent frame. We also push a contrived 'trap number'.
*
- * The cpu does not push the general registers, we must do that, and we
+ * The cpu does not push the general registers, we must do that, and we
* must restore them prior to calling 'iret'. The cpu adjusts the %cs and
* %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
* must load them with appropriate values for supervisor mode operation.
@@ -145,21 +146,25 @@
pushl $0; TRAP(T_XMMFLT)
/*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Interrupt gates are
- * used by page faults, non-maskable interrupts, debug and breakpoint
+ * All traps except ones for syscalls jump to alltraps. If
+ * interrupts were enabled when the trap occurred, then interrupts
+ * are enabled now if the trap was through a trap gate, else
+ * disabled if the trap was through an interrupt gate. Note that
+ * int0x80_syscall is a trap gate. Interrupt gates are used by
+ * page faults, non-maskable interrupts, debug and breakpoint
* exceptions.
*/
-
SUPERALIGN_TEXT
.globl alltraps
.type alltraps, at function
alltraps:
pushal
- pushl %ds
- pushl %es
- pushl %fs
+ pushl $0
+ movw %ds,(%esp)
+ pushl $0
+ movw %es,(%esp)
+ pushl $0
+ movw %fs,(%esp)
alltraps_with_regs_pushed:
SET_KERNEL_SREGS
cld
@@ -168,7 +173,7 @@
pushl %esp
call trap
add $4, %esp
-
+
/*
* Return via doreti to handle ASTs.
*/
@@ -181,21 +186,29 @@
#ifdef KDTRACE_HOOKS
SUPERALIGN_TEXT
IDTVEC(ill)
- /* Check if there is no DTrace hook registered. */
- cmpl $0,dtrace_invop_jump_addr
+ /*
+ * Check if a DTrace hook is registered. The default (data) segment
+ * cannot be used for this since %ds is not known good until we
+ * verify that the entry was from kernel mode.
+ */
+ cmpl $0,%ss:dtrace_invop_jump_addr
je norm_ill
- /* Check if this is a user fault. */
- cmpl $GSEL_KPL, 4(%esp) /* Check the code segment. */
-
- /* If so, just handle it as a normal trap. */
+ /*
+ * Check if this is a user fault. If so, just handle it as a normal
+ * trap.
+ */
+ cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */
jne norm_ill
-
+ testl $PSL_VM, 8(%esp) /* and vm86 mode. */
+ jnz norm_ill
+
/*
* This is a kernel instruction fault that might have been caused
* by a DTrace provider.
*/
- pushal /* Push all registers onto the stack. */
+ pushal
+ cld
/*
* Set our jump address for the jump back in the event that
@@ -215,10 +228,10 @@
#endif
/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
+ * Call gate entry for syscalls (lcall 7,0).
+ * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables.
*
* The intersegment call has been set up to specify one dummy parameter.
- *
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
@@ -231,11 +244,14 @@
pushfl /* save eflags */
popl 8(%esp) /* shuffle into tf_eflags */
pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
+ pushl $0 /* tf_trapno */
pushal
- pushl %ds
- pushl %es
- pushl %fs
+ pushl $0
+ movw %ds,(%esp)
+ pushl $0
+ movw %es,(%esp)
+ pushl $0
+ movw %fs,(%esp)
SET_KERNEL_SREGS
cld
FAKE_MCOUNT(TF_EIP(%esp))
@@ -246,20 +262,25 @@
jmp doreti
/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ * Trap gate entry for syscalls (int 0x80).
+ * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all
+ * Linux executables.
*
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
+ * Even though the name says 'int0x80', this is actually a trap gate, not an
+ * interrupt gate. Thus interrupts are enabled on entry just as they are for
+ * a normal syscall.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
- subl $4,%esp /* skip over tf_trapno */
+ pushl $0 /* tf_trapno */
pushal
- pushl %ds
- pushl %es
- pushl %fs
+ pushl $0
+ movw %ds,(%esp)
+ pushl $0
+ movw %es,(%esp)
+ pushl $0
+ movw %fs,(%esp)
SET_KERNEL_SREGS
cld
FAKE_MCOUNT(TF_EIP(%esp))
@@ -332,6 +353,7 @@
.text
SUPERALIGN_TEXT
.type doreti, at function
+ .globl doreti
doreti:
FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
doreti_next:
@@ -348,13 +370,14 @@
/*
* PSL_VM must be checked first since segment registers only
* have an RPL in non-VM86 mode.
+ * ASTs can not be handled now if we are in a vm86 call.
*/
- testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
+ testl $PSL_VM,TF_EFLAGS(%esp)
jz doreti_notvm86
movl PCPU(CURPCB),%ecx
- testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */
- jz doreti_ast /* can handle ASTS now if not */
- jmp doreti_exit
+ testl $PCB_VM86CALL,PCB_FLAGS(%ecx)
+ jz doreti_ast
+ jmp doreti_exit
doreti_notvm86:
testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
@@ -401,7 +424,7 @@
doreti_iret:
iret
- /*
+ /*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
* above. trap() (i386/i386/trap.c) catches this specific
@@ -413,13 +436,16 @@
doreti_iret_fault:
subl $8,%esp
pushal
- pushl %ds
+ pushl $0
+ movw %ds,(%esp)
.globl doreti_popl_ds_fault
doreti_popl_ds_fault:
- pushl %es
+ pushl $0
+ movw %es,(%esp)
.globl doreti_popl_es_fault
doreti_popl_es_fault:
- pushl %fs
+ pushl $0
+ movw %fs,(%esp)
.globl doreti_popl_fs_fault
doreti_popl_fs_fault:
sti
Property changes on: trunk/sys/i386/i386/exception.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/gdb_machdep.c
===================================================================
--- trunk/sys/i386/i386/gdb_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/gdb_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Marcel Moolenaar
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/gdb_machdep.c 290734 2015-11-13 00:50:34Z jhb $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -45,14 +46,22 @@
void *
gdb_cpu_getreg(int regnum, size_t *regsz)
{
+ static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL);
+ static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL);
+ static uint32_t _kprivsel = GSEL(GPRIV_SEL, SEL_KPL);
*regsz = gdb_cpu_regsz(regnum);
- if (kdb_thread == curthread) {
+ if (kdb_thread == curthread) {
switch (regnum) {
case 0: return (&kdb_frame->tf_eax);
case 1: return (&kdb_frame->tf_ecx);
case 2: return (&kdb_frame->tf_edx);
+ case 9: return (&kdb_frame->tf_eflags);
+ case 10: return (&kdb_frame->tf_cs);
+ case 12: return (&kdb_frame->tf_ds);
+ case 13: return (&kdb_frame->tf_es);
+ case 14: return (&kdb_frame->tf_fs);
}
}
switch (regnum) {
@@ -62,6 +71,12 @@
case 6: return (&kdb_thrctx->pcb_esi);
case 7: return (&kdb_thrctx->pcb_edi);
case 8: return (&kdb_thrctx->pcb_eip);
+ case 10: return (&_kcodesel);
+ case 11: return (&_kdatasel);
+ case 12: return (&_kdatasel);
+ case 13: return (&_kdatasel);
+ case 14: return (&_kprivsel);
+ case 15: return (&kdb_thrctx->pcb_gs);
}
return (NULL);
}
Modified: trunk/sys/i386/i386/genassym.c
===================================================================
--- trunk/sys/i386/i386/genassym.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/genassym.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1990 The Regents of the University of California.
* All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/genassym.c 286878 2015-08-18 09:09:39Z kib $");
#include "opt_apic.h"
#include "opt_compat.h"
@@ -103,6 +104,7 @@
ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
/* ASSYM(UPAGES, UPAGES);*/
ASSYM(KSTACK_PAGES, KSTACK_PAGES);
+ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES);
ASSYM(PAGE_SIZE, PAGE_SIZE);
ASSYM(NPTEPG, NPTEPG);
ASSYM(NPDEPG, NPDEPG);
@@ -144,7 +146,6 @@
ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
@@ -154,7 +155,6 @@
ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_SIZE, sizeof(struct pcb));
@@ -246,9 +246,8 @@
#endif
#ifdef XEN
-#include <xen/hypervisor.h>
ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START);
+ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
#endif
#ifdef HWPMC_HOOKS
Modified: trunk/sys/i386/i386/geode.c
===================================================================
--- trunk/sys/i386/i386/geode.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/geode.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2003-2004 Poul-Henning Kamp
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/geode.c 208111 2010-05-15 10:31:11Z phk $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/i386/i386/i686_mem.c
===================================================================
--- trunk/sys/i386/i386/i686_mem.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/i686_mem.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Michael Smith <msmith at freebsd.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/i686_mem.c 217506 2011-01-17 17:30:35Z jkim $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/i386/i386/identcpu.c
===================================================================
--- trunk/sys/i386/i386/identcpu.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/identcpu.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1992 Terrence R. Lambert.
* Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
Modified: trunk/sys/i386/i386/in_cksum.c
===================================================================
--- trunk/sys/i386/i386/in_cksum.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/in_cksum.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
@@ -31,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/in_cksum.c 189572 2009-03-09 13:11:16Z rwatson $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/i386/i386/initcpu.c
===================================================================
--- trunk/sys/i386/i386/initcpu.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/initcpu.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) KATO Takenori, 1997, 1998.
*
@@ -28,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/initcpu.c 313150 2017-02-03 12:20:44Z kib $");
#include "opt_cpu.h"
@@ -48,12 +49,6 @@
#define CPU_ENABLE_SSE
#endif
-#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
-void enable_K5_wt_alloc(void);
-void enable_K6_wt_alloc(void);
-void enable_K6_2_wt_alloc(void);
-#endif
-
#ifdef I486_CPU
static void init_5x86(void);
static void init_bluelightning(void);
@@ -65,6 +60,12 @@
static void init_6x86(void);
#endif /* I486_CPU */
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+static void enable_K5_wt_alloc(void);
+static void enable_K6_wt_alloc(void);
+static void enable_K6_2_wt_alloc(void);
+#endif
+
#ifdef I686_CPU
static void init_6x86MX(void);
static void init_ppro(void);
@@ -81,22 +82,34 @@
*/
static int hw_clflush_disable = -1;
-/* Must *NOT* be BSS or locore will bzero these after setting them */
-int cpu = 0; /* Are we 386, 386sx, 486, etc? */
-u_int cpu_feature = 0; /* Feature flags */
-u_int cpu_feature2 = 0; /* Feature flags */
-u_int amd_feature = 0; /* AMD feature flags */
-u_int amd_feature2 = 0; /* AMD feature flags */
-u_int amd_pminfo = 0; /* AMD advanced power management info */
-u_int via_feature_rng = 0; /* VIA RNG features */
-u_int via_feature_xcrypt = 0; /* VIA ACE features */
-u_int cpu_high = 0; /* Highest arg to CPUID */
-u_int cpu_id = 0; /* Stepping ID */
-u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */
-u_int cpu_procinfo2 = 0; /* Multicore info */
-char cpu_vendor[20] = ""; /* CPU Origin code */
-u_int cpu_vendor_id = 0; /* CPU vendor ID */
+int cpu; /* Are we 386, 386sx, 486, etc? */
+u_int cpu_feature; /* Feature flags */
+u_int cpu_feature2; /* Feature flags */
+u_int amd_feature; /* AMD feature flags */
+u_int amd_feature2; /* AMD feature flags */
+u_int amd_pminfo; /* AMD advanced power management info */
+u_int via_feature_rng; /* VIA RNG features */
+u_int via_feature_xcrypt; /* VIA ACE features */
+u_int cpu_high; /* Highest arg to CPUID */
+u_int cpu_exthigh; /* Highest arg to extended CPUID */
+u_int cpu_id; /* Stepping ID */
+u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */
+u_int cpu_procinfo2; /* Multicore info */
+char cpu_vendor[20]; /* CPU Origin code */
+u_int cpu_vendor_id; /* CPU vendor ID */
+#ifdef CPU_ENABLE_SSE
+u_int cpu_fxsr; /* SSE enabled */
+u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
+#endif
u_int cpu_clflush_line_size = 32;
+u_int cpu_stdext_feature;
+u_int cpu_stdext_feature2;
+u_int cpu_max_ext_state_size;
+u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */
+u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */
+u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */
+u_int cyrix_did; /* Device ID of Cyrix CPU */
+u_int cpu_maxphyaddr; /* Max phys addr width in bits */
SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
&via_feature_rng, 0, "VIA RNG feature available in CPU");
@@ -103,11 +116,6 @@
SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
&via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU");
-#ifdef CPU_ENABLE_SSE
-u_int cpu_fxsr; /* SSE enabled */
-u_int cpu_mxcsr_mask; /* valid bits in mxcsr */
-#endif
-
#ifdef I486_CPU
/*
* IBM Blue Lightning
@@ -421,6 +429,19 @@
#ifdef I586_CPU
/*
+ * Rise mP6
+ */
+static void
+init_rise(void)
+{
+
+ /*
+ * The CMPXCHG8B instruction is always available but hidden.
+ */
+ cpu_feature |= CPUID_CX8;
+}
+
+/*
* IDT WinChip C6/2/2A/2B/3
*
* http://www.centtech.com/winchip_bios_writers_guide_v4_0.pdf
@@ -440,7 +461,7 @@
fcr &= ~(1ULL << 11);
/*
- * Additioanlly, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3.
+ * Additionally, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3.
*/
if (CPUID_TO_MODEL(cpu_id) >= 8)
fcr |= (1 << 12) | (1 << 19) | (1 << 20);
@@ -516,6 +537,8 @@
intr_restore(saveintr);
}
+static int ppro_apic_used = -1;
+
static void
init_ppro(void)
{
@@ -524,12 +547,32 @@
/*
* Local APIC should be disabled if it is not going to be used.
*/
- apicbase = rdmsr(MSR_APICBASE);
- apicbase &= ~APICBASE_ENABLED;
- wrmsr(MSR_APICBASE, apicbase);
+ if (ppro_apic_used != 1) {
+ apicbase = rdmsr(MSR_APICBASE);
+ apicbase &= ~APICBASE_ENABLED;
+ wrmsr(MSR_APICBASE, apicbase);
+ ppro_apic_used = 0;
+ }
}
/*
+ * If the local APIC is going to be used after being disabled above,
+ * re-enable it and don't disable it in the future.
+ */
+void
+ppro_reenable_apic(void)
+{
+ u_int64_t apicbase;
+
+ if (ppro_apic_used == 0) {
+ apicbase = rdmsr(MSR_APICBASE);
+ apicbase |= APICBASE_ENABLED;
+ wrmsr(MSR_APICBASE, apicbase);
+ ppro_apic_used = 1;
+ }
+}
+
+/*
* Initialize BBL_CR_CTL3 (Control register 3: used to configure the
* L2 cache).
*/
@@ -635,20 +678,6 @@
}
#endif
-/*
- * Initialize CR4 (Control register 4) to enable SSE instructions.
- */
-void
-enable_sse(void)
-{
-#if defined(CPU_ENABLE_SSE)
- if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
- load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
- cpu_fxsr = hw_instruction_sse = 1;
- }
-#endif
-}
-
extern int elf32_nxstack;
void
@@ -681,6 +710,27 @@
#ifdef I586_CPU
case CPU_586:
switch (cpu_vendor_id) {
+ case CPU_VENDOR_AMD:
+#ifdef CPU_WT_ALLOC
+ if (((cpu_id & 0x0f0) > 0) &&
+ ((cpu_id & 0x0f0) < 0x60) &&
+ ((cpu_id & 0x00f) > 3))
+ enable_K5_wt_alloc();
+ else if (((cpu_id & 0x0f0) > 0x80) ||
+ (((cpu_id & 0x0f0) == 0x80) &&
+ (cpu_id & 0x00f) > 0x07))
+ enable_K6_2_wt_alloc();
+ else if ((cpu_id & 0x0f0) > 0x50)
+ enable_K6_wt_alloc();
+#endif
+ if ((cpu_id & 0xf0) == 0xa0)
+ /*
+ * Make sure the TSC runs through
+ * suspension, otherwise we can't use
+ * it as timecounter
+ */
+ wrmsr(0x1900, rdmsr(0x1900) | 0x20ULL);
+ break;
case CPU_VENDOR_CENTAUR:
init_winchip();
break;
@@ -687,6 +737,9 @@
case CPU_VENDOR_TRANSMETA:
init_transmeta();
break;
+ case CPU_VENDOR_RISE:
+ init_rise();
+ break;
}
break;
#endif
@@ -733,23 +786,33 @@
init_transmeta();
break;
}
-#ifdef PAE
- if ((amd_feature & AMDID_NX) != 0) {
- uint64_t msr;
-
- msr = rdmsr(MSR_EFER) | EFER_NXE;
- wrmsr(MSR_EFER, msr);
- pg_nx = PG_NX;
- elf32_nxstack = 1;
- }
-#endif
break;
#endif
default:
break;
}
- enable_sse();
+#if defined(CPU_ENABLE_SSE)
+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cpu_fxsr = hw_instruction_sse = 1;
+ }
+#endif
+#if defined(PAE) || defined(PAE_TABLES)
+ if ((amd_feature & AMDID_NX) != 0) {
+ uint64_t msr;
+ msr = rdmsr(MSR_EFER) | EFER_NXE;
+ wrmsr(MSR_EFER, msr);
+ pg_nx = PG_NX;
+ elf32_nxstack = 1;
+ }
+#endif
+}
+
+void
+initializecpucache(void)
+{
+
/*
* CPUID with %eax = 1, %ebx returns
* Bits 15-8: CLFLUSH line size
@@ -764,14 +827,18 @@
* CPUID_SS feature even though the native CPU supports it.
*/
TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
- if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+ if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
cpu_feature &= ~CPUID_CLFSH;
+ cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+ }
/*
- * Allow to disable CLFLUSH feature manually by
- * hw.clflush_disable tunable.
+ * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+ * by setting the hw.clflush_disable tunable.
*/
- if (hw_clflush_disable == 1)
+ if (hw_clflush_disable == 1) {
cpu_feature &= ~CPUID_CLFSH;
+ cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+ }
#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
/*
@@ -825,7 +892,7 @@
* Enable write allocate feature of AMD processors.
* Following two functions require the Maxmem variable being set.
*/
-void
+static void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
@@ -871,7 +938,7 @@
}
}
-void
+static void
enable_K6_wt_alloc(void)
{
quad_t size;
@@ -931,7 +998,7 @@
intr_restore(saveintr);
}
-void
+static void
enable_K6_2_wt_alloc(void)
{
quad_t size;
Modified: trunk/sys/i386/i386/io.c
===================================================================
--- trunk/sys/i386/i386/io.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/io.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Mark R V Murray
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/io.c 207329 2010-04-28 15:38:01Z attilio $");
#include <sys/param.h>
#include <sys/proc.h>
Modified: trunk/sys/i386/i386/k6_mem.c
===================================================================
--- trunk/sys/i386/i386/k6_mem.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/k6_mem.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Brian Fundakowski Feldman
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/k6_mem.c 189903 2009-03-17 00:48:11Z jkim $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/i386/i386/legacy.c
===================================================================
--- trunk/sys/i386/i386/legacy.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/legacy.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 1998 Massachusetts Institute of Technology
*
Modified: trunk/sys/i386/i386/locore.s
===================================================================
--- trunk/sys/i386/i386/locore.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/locore.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
@@ -30,7 +31,7 @@
* SUCH DAMAGE.
*
* from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/locore.s 286878 2015-08-18 09:09:39Z kib $
*
* originally from: locore.s, by William F. Jolitz
*
@@ -99,7 +100,7 @@
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
.globl IdlePDPT
IdlePDPT: .long 0 /* phys addr of kernel PDPT */
#endif
@@ -281,7 +282,7 @@
1:
/* Now enable paging */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %eax
@@ -302,18 +303,15 @@
begin:
/* set up bootstrap stack */
movl proc0kstack,%eax /* location of in-kernel stack */
- /* bootstrap stack end location */
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
+ /*
+ * Only use bottom page for init386(). init386() calculates the
+ * PCB + FPU save area size and returns the true top of stack.
+ */
+ leal PAGE_SIZE(%eax),%esp
+
xorl %ebp,%ebp /* mark end of frames */
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
-
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@@ -324,6 +322,9 @@
*/
addl $4,%esp
+ /* Switch to true top of stack. */
+ movl %eax,%esp
+
call mi_startup /* autoconfiguration, mountroot etc */
/* NOTREACHED */
addl $0,%esp /* for db_numargs() again */
@@ -722,7 +723,7 @@
movl %esi,R(KPTmap)
/* Allocate Page Table Directory */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
/* XXX only need 32 bytes (easier for now) */
ALLOCPAGES(1)
movl %esi,R(IdlePDPT)
@@ -731,7 +732,7 @@
movl %esi,R(IdlePTD)
/* Allocate KSTACK */
- ALLOCPAGES(KSTACK_PAGES)
+ ALLOCPAGES(TD0_KSTACK_PAGES)
movl %esi,R(p0kpa)
addl $KERNBASE, %esi
movl %esi, R(proc0kstack)
@@ -775,8 +776,7 @@
* if we've enabled PSE above, we'll just switch the corresponding kernel
* PDEs before we turn on paging.
*
- * XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE
- * THIS! SMP needs the page table to be there to map the kernel P==V.
+ * XXX: We waste some pages here in the PSE case!
*/
xorl %eax, %eax
movl R(KERNend),%ecx
@@ -789,7 +789,7 @@
fillkptphys($PG_RW)
/* Map page directory. */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl $1, %ecx
fillkptphys($PG_RW)
@@ -801,7 +801,7 @@
/* Map proc0's KSTACK in the physical way ... */
movl R(p0kpa), %eax
- movl $(KSTACK_PAGES), %ecx
+ movl $(TD0_KSTACK_PAGES), %ecx
fillkptphys($PG_RW)
/* Map ISA hole */
@@ -891,7 +891,7 @@
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePTD), %eax
xorl %ebx, %ebx
movl $NPGPTD, %ecx
Property changes on: trunk/sys/i386/i386/locore.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/longrun.c
===================================================================
--- trunk/sys/i386/i386/longrun.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/longrun.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Tamotsu Hattori.
* Copyright (c) 2001 Mitsuru IWASAKI.
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/longrun.c 214346 2010-10-25 15:28:03Z jhb $");
#include "opt_cpu.h"
Modified: trunk/sys/i386/i386/machdep.c
===================================================================
--- trunk/sys/i386/i386/machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1992 Terrence R. Lambert.
* Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
@@ -38,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/machdep.c 325543 2017-11-08 11:39:42Z kib $");
#include "opt_apic.h"
#include "opt_atalk.h"
@@ -54,6 +55,7 @@
#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
+#include "opt_platform.h"
#include "opt_xbox.h"
#include "opt_kdtrace.h"
@@ -81,6 +83,7 @@
#include <sys/pcpu.h>
#include <sys/ptrace.h>
#include <sys/reboot.h>
+#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#ifdef SMP
@@ -110,7 +113,11 @@
#include <ddb/db_sym.h>
#endif
+#ifdef PC98
+#include <pc98/pc98/pc98_machdep.h>
+#else
#include <isa/rtc.h>
+#endif
#include <net/netisr.h>
@@ -137,6 +144,9 @@
#ifdef SMP
#include <machine/smp.h>
#endif
+#ifdef FDT
+#include <x86/fdt.h>
+#endif
#ifdef DEV_APIC
#include <machine/apicvar.h>
@@ -155,9 +165,8 @@
#ifdef XEN
/* XEN includes */
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
#include <machine/xen/xenvar.h>
#include <machine/xen/xenfunc.h>
#include <xen/xen_intr.h>
@@ -175,13 +184,9 @@
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
-extern void init386(int first);
+extern register_t init386(int first);
extern void dblfault_handler(void);
-extern void printcpuinfo(void); /* XXX header file */
-extern void finishidentcpu(void);
-extern void panicifcpuunsupported(void);
-
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
@@ -191,8 +196,10 @@
static void cpu_startup(void *);
static void fpstate_drop(struct thread *td);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+ char *xfpusave, size_t xfpusave_len);
+static int set_fpcontext(struct thread *td, mcontext_t *mcp,
+ char *xfpustate, size_t xfpustate_len);
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm(struct save87 *, struct savexmm *);
static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -210,6 +217,14 @@
int _udatasel, _ucodesel;
u_int basemem;
+#ifdef PC98
+int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */
+int need_post_dma_flush; /* If 1, use invd after DMA transfer. */
+
+static int ispc98 = 1;
+SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
+#endif
+
int cold = 1;
#ifdef COMPAT_43
@@ -256,7 +271,8 @@
{
uintmax_t memsize;
char *sysenv;
-
+
+#ifndef PC98
/*
* On MacBooks, we need to disallow the legacy USB circuit to
* generate an SMI# because this can cause several problems,
@@ -269,9 +285,11 @@
if (sysenv != NULL) {
if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
strncmp(sysenv, "MacBook3,1", 10) == 0 ||
+ strncmp(sysenv, "MacBook4,1", 10) == 0 ||
strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
+ strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
strncmp(sysenv, "Macmini1,1", 10) == 0) {
if (bootverbose)
printf("Disabling LEGACY_USB_EN bit on "
@@ -280,6 +298,7 @@
}
freeenv(sysenv);
}
+#endif /* !PC98 */
/*
* Good {morning,afternoon,evening,night}.
@@ -290,7 +309,6 @@
#ifdef PERFMON
perfmon_init();
#endif
- realmem = Maxmem;
/*
* Display physical memory if SMBIOS reports reasonable amount.
@@ -304,6 +322,7 @@
if (memsize < ptoa((uintmax_t)cnt.v_free_count))
memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
+ realmem = atop(memsize);
/*
* Display any holes after the first chunk of extended memory.
@@ -338,11 +357,6 @@
#ifndef XEN
cpu_setregs();
#endif
-
- /*
- * Add BSP as an interrupt target.
- */
- intr_add_cpu(0);
}
/*
@@ -349,7 +363,7 @@
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
+ * at top to call routine, followed by call
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
@@ -387,10 +401,6 @@
} else
fp = (struct osigframe *)regs->tf_esp - 1;
- /* Translate the signal if appropriate. */
- if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
- sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
/* Build the argument list for the signal handler. */
sf.sf_signum = sig;
sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
@@ -538,10 +548,6 @@
} else
sfp = (struct sigframe4 *)regs->tf_esp - 1;
- /* Translate the signal if appropriate. */
- if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
- sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
/* Build the argument list for the signal handler. */
sf.sf_signum = sig;
sf.sf_ucontext = (register_t)&sfp->sf_uc;
@@ -628,6 +634,8 @@
char *sp;
struct trapframe *regs;
struct segment_descriptor *sdp;
+ char *xfpusave;
+ size_t xfpusave_len;
int sig;
int oonstack;
@@ -652,6 +660,18 @@
regs = td->td_frame;
oonstack = sigonstack(regs->tf_esp);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
+ xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
+ xfpusave = __builtin_alloca(xfpusave_len);
+ } else {
+#else
+ {
+#endif
+ xfpusave_len = 0;
+ xfpusave = NULL;
+ }
+
/* Save user context. */
bzero(&sf, sizeof(sf));
sf.sf_uc.uc_sigmask = *mask;
@@ -662,7 +682,7 @@
sf.sf_uc.uc_mcontext.mc_gs = rgs();
bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
- get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
fpstate_drop(td);
/*
* Unconditionally fill the fsbase and gsbase into the mcontext.
@@ -673,7 +693,6 @@
sdp = &td->td_pcb->pcb_gsd;
sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
sdp->sd_lobase;
- sf.sf_uc.uc_mcontext.mc_flags = 0;
bzero(sf.sf_uc.uc_mcontext.mc_spare2,
sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
@@ -681,13 +700,19 @@
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
SIGISMEMBER(psp->ps_sigonstack, sig)) {
- sp = td->td_sigstk.ss_sp +
- td->td_sigstk.ss_size - sizeof(struct sigframe);
+ sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
#if defined(COMPAT_43)
td->td_sigstk.ss_flags |= SS_ONSTACK;
#endif
} else
- sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+ sp = (char *)regs->tf_esp - 128;
+ if (xfpusave != NULL) {
+ sp -= xfpusave_len;
+ sp = (char *)((unsigned int)sp & ~0x3F);
+ sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+ }
+ sp -= sizeof(struct sigframe);
+
/* Align to 16 bytes. */
sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
@@ -748,7 +773,10 @@
/*
* Copy the sigframe out to the user's stack.
*/
- if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+ (xfpusave != NULL && copyout(xfpusave,
+ (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+ != 0)) {
#ifdef DEBUG
printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
@@ -758,6 +786,8 @@
regs->tf_esp = (int)sfp;
regs->tf_eip = p->p_sysent->sv_sigcode_base;
+ if (regs->tf_eip == 0)
+ regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
regs->tf_eflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@@ -841,17 +871,7 @@
/*
* Don't allow users to change privileged or reserved flags.
*/
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ if (!EFL_SECURE(eflags, regs->tf_eflags)) {
return (EINVAL);
}
@@ -967,17 +987,7 @@
/*
* Don't allow users to change privileged or reserved flags.
*/
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ if (!EFL_SECURE(eflags, regs->tf_eflags)) {
uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
td->td_proc->p_pid, td->td_name, eflags);
return (EINVAL);
@@ -1026,15 +1036,25 @@
} */ *uap;
{
ucontext_t uc;
+ struct proc *p;
struct trapframe *regs;
ucontext_t *ucp;
+ char *xfpustate;
+ size_t xfpustate_len;
int cs, eflags, error, ret;
ksiginfo_t ksi;
+ p = td->td_proc;
+
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
return (error);
ucp = &uc;
+ if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
+ uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
+ td->td_name, ucp->uc_mcontext.mc_flags);
+ return (EINVAL);
+ }
regs = td->td_frame;
eflags = ucp->uc_mcontext.mc_eflags;
if (eflags & PSL_VM) {
@@ -1081,17 +1101,7 @@
/*
* Don't allow users to change privileged or reserved flags.
*/
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ if (!EFL_SECURE(eflags, regs->tf_eflags)) {
uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
td->td_proc->p_pid, td->td_name, eflags);
return (EINVAL);
@@ -1115,7 +1125,30 @@
return (EINVAL);
}
- ret = set_fpcontext(td, &ucp->uc_mcontext);
+ if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+ xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+ if (xfpustate_len > cpu_max_ext_state_size -
+ sizeof(union savefpu)) {
+ uprintf(
+ "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+ p->p_pid, td->td_name, xfpustate_len);
+ return (EINVAL);
+ }
+ xfpustate = __builtin_alloca(xfpustate_len);
+ error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+ xfpustate, xfpustate_len);
+ if (error != 0) {
+ uprintf(
+ "pid %d (%s): sigreturn copying xfpustate failed\n",
+ p->p_pid, td->td_name);
+ return (error);
+ }
+ } else {
+ xfpustate = NULL;
+ xfpustate_len = 0;
+ }
+ ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
+ xfpustate_len);
if (ret != 0)
return (ret);
bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
@@ -1216,6 +1249,13 @@
#ifdef XEN
+static void
+idle_block(void)
+{
+
+ HYPERVISOR_sched_op(SCHEDOP_block, 0);
+}
+
void
cpu_halt(void)
{
@@ -1225,7 +1265,7 @@
int scheduler_running;
static void
-cpu_idle_hlt(int busy)
+cpu_idle_hlt(sbintime_t sbt)
{
scheduler_running = 1;
@@ -1241,12 +1281,12 @@
cpu_halt(void)
{
for (;;)
- __asm__ ("hlt");
+ halt();
}
#endif
-void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
@@ -1257,34 +1297,51 @@
#define STATE_MWAIT 0x1
#define STATE_SLEEPING 0x2
+#ifndef PC98
static void
-cpu_idle_acpi(int busy)
+cpu_idle_acpi(sbintime_t sbt)
{
int *state;
state = (int *)PCPU_PTR(monitorbuf);
*state = STATE_SLEEPING;
+
+ /* See comments in cpu_idle_hlt(). */
disable_intr();
if (sched_runnable())
enable_intr();
else if (cpu_idle_hook)
- cpu_idle_hook();
+ cpu_idle_hook(sbt);
else
__asm __volatile("sti; hlt");
*state = STATE_RUNNING;
}
+#endif /* !PC98 */
#ifndef XEN
static void
-cpu_idle_hlt(int busy)
+cpu_idle_hlt(sbintime_t sbt)
{
int *state;
state = (int *)PCPU_PTR(monitorbuf);
*state = STATE_SLEEPING;
+
/*
- * We must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
+ * Since we may be in a critical section from cpu_idle(), if
+ * an interrupt fires during that critical section we may have
+ * a pending preemption. If the CPU halts, then that thread
+ * may not execute until a later interrupt awakens the CPU.
+ * To handle this race, check for a runnable thread after
+ * disabling interrupts and immediately return if one is
+ * found. Also, we must absolutely guarentee that hlt is
+ * the next instruction after sti. This ensures that any
+ * interrupt that fires after the call to disable_intr() will
+ * immediately awaken the CPU from hlt. Finally, please note
+ * that on x86 this works fine because of interrupts enabled only
+ * after the instruction following sti takes place, while IF is set
+ * to 1 immediately, allowing hlt instruction to acknowledge the
+ * interrupt.
*/
disable_intr();
if (sched_runnable())
@@ -1295,32 +1352,31 @@
}
#endif
-/*
- * MWAIT cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
static void
-cpu_idle_mwait(int busy)
+cpu_idle_mwait(sbintime_t sbt)
{
int *state;
state = (int *)PCPU_PTR(monitorbuf);
*state = STATE_MWAIT;
- if (!sched_runnable()) {
- cpu_monitor(state, 0, 0);
- if (*state == STATE_MWAIT)
- cpu_mwait(0, MWAIT_C1);
+
+ /* See comments in cpu_idle_hlt(). */
+ disable_intr();
+ if (sched_runnable()) {
+ enable_intr();
+ *state = STATE_RUNNING;
+ return;
}
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
+ else
+ enable_intr();
*state = STATE_RUNNING;
}
static void
-cpu_idle_spin(int busy)
+cpu_idle_spin(sbintime_t sbt)
{
int *state;
int i;
@@ -1327,6 +1383,12 @@
state = (int *)PCPU_PTR(monitorbuf);
*state = STATE_RUNNING;
+
+ /*
+ * The sched_runnable() call is racy but as long as there is
+ * a loop missing it one time will have just a little impact if any
+ * (and it is much better than missing the check at all).
+ */
for (i = 0; i < 1000; i++) {
if (sched_runnable())
return;
@@ -1363,10 +1425,10 @@
}
}
-#ifdef XEN
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+#if defined(PC98) || defined(XEN)
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
#else
-void (*cpu_idle_fn)(int) = cpu_idle_acpi;
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
#endif
void
@@ -1375,6 +1437,7 @@
#ifndef XEN
uint64_t msr;
#endif
+ sbintime_t sbt = -1;
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
@@ -1394,12 +1457,12 @@
/* If we have time - switch timers into idle mode. */
if (!busy) {
critical_enter();
- cpu_idleclock();
+ sbt = cpu_idleclock();
}
#ifndef XEN
/* Apply AMD APIC timer C1E workaround. */
- if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+ if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
msr = rdmsr(MSR_AMDK8_IPM);
if (msr & AMDK8_CMPHALT)
wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
@@ -1407,7 +1470,7 @@
#endif
/* Call main idle method. */
- cpu_idle_fn(busy);
+ cpu_idle_fn(sbt);
/* Switch timers mack into active mode. */
if (!busy) {
@@ -1450,7 +1513,9 @@
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
{ cpu_idle_hlt, "hlt" },
+#ifndef PC98
{ cpu_idle_acpi, "acpi" },
+#endif
{ NULL, NULL }
};
@@ -1467,9 +1532,11 @@
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
+#ifndef PC98
if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
cpu_idle_hook == NULL)
continue;
+#endif
p += sprintf(p, "%s%s", p != avail ? ", " : "",
idle_tbl[i].id_name);
}
@@ -1504,9 +1571,11 @@
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
+#ifndef PC98
if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
cpu_idle_hook == NULL)
continue;
+#endif
if (strcmp(idle_tbl[i].id_name, buf))
continue;
cpu_idle_fn = idle_tbl[i].id_fn;
@@ -1518,22 +1587,6 @@
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
-uint64_t (*atomic_load_acq_64)(volatile uint64_t *) =
- atomic_load_acq_64_i386;
-void (*atomic_store_rel_64)(volatile uint64_t *, uint64_t) =
- atomic_store_rel_64_i386;
-
-static void
-cpu_probe_cmpxchg8b(void)
-{
-
- if ((cpu_feature & CPUID_CX8) != 0 ||
- cpu_vendor_id == CPU_VENDOR_RISE) {
- atomic_load_acq_64 = atomic_load_acq_64_i586;
- atomic_store_rel_64 = atomic_store_rel_64_i586;
- }
-}
-
/*
* Reset registers to default values on exec.
*/
@@ -1585,17 +1638,9 @@
*/
reset_dbregs();
}
- pcb->pcb_flags &= ~PCB_DBREGS;
+ pcb->pcb_flags &= ~PCB_DBREGS;
}
- /*
- * Initialize the math emulator (if any) for the current process.
- * Actually, just clear the bit that says that the emulator has
- * been initialized. Initialization is delayed until the process
- * traps to the emulator (if it is done at all) mainly because
- * emulators don't provide an entry point for initialization.
- */
- td->td_pcb->pcb_flags &= ~FP_SOFTFP;
pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
/*
@@ -1644,6 +1689,10 @@
SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
+static char bootmethod[16] = "BIOS";
+SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
+ "System firmware boot method");
+
/*
* Initialize 386 and configure to run kernel
*/
@@ -1666,10 +1715,6 @@
struct region_descriptor r_gdt, r_idt; /* table descriptors */
struct mtx dt_lock; /* lock for GDT and LDT */
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
-#endif
-
static struct i386tss dblfault_tss;
static char dblfault_stack[PAGE_SIZE];
@@ -1946,6 +1991,9 @@
#ifdef KDTRACE_HOOKS
IDTVEC(dtrace_ret),
#endif
+#ifdef XENHVM
+ IDTVEC(xen_intr_upcall),
+#endif
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
#ifdef DDB
@@ -1988,7 +2036,30 @@
db_printf("cr2\t0x%08x\n", rcr2());
db_printf("cr3\t0x%08x\n", rcr3());
db_printf("cr4\t0x%08x\n", rcr4());
+ if (rcr4() & CR4_XSAVE)
+ db_printf("xcr0\t0x%016llx\n", rxcr(0));
+ if (amd_feature & (AMDID_NX | AMDID_LM))
+ db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
+ if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
+ db_printf("FEATURES_CTL\t0x%016llx\n",
+ rdmsr(MSR_IA32_FEATURE_CONTROL));
+ if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
+ cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
+ db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
+ if (cpu_feature & CPUID_PAT)
+ db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
}
+
+DB_SHOW_COMMAND(dbregs, db_show_dbregs)
+{
+
+ db_printf("dr0\t0x%08x\n", rdr0());
+ db_printf("dr1\t0x%08x\n", rdr1());
+ db_printf("dr2\t0x%08x\n", rdr2());
+ db_printf("dr3\t0x%08x\n", rdr3());
+ db_printf("dr6\t0x%08x\n", rdr6());
+ db_printf("dr7\t0x%08x\n", rdr7());
+}
#endif
void
@@ -2005,7 +2076,7 @@
ssd->ssd_gran = sd->sd_gran;
}
-#ifndef XEN
+#if !defined(PC98) && !defined(XEN)
static int
add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
{
@@ -2084,7 +2155,9 @@
physmap[insert_idx + 1] = smap->base + smap->length;
return (1);
}
+#endif /* !PC98 && !XEN */
+#ifndef XEN
static void
basemem_setup(void)
{
@@ -2132,7 +2205,7 @@
for (i = basemem / 4; i < 160; i++)
pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
}
-#endif
+#endif /* !XEN */
/*
* Populate the (physmap) array with base/bound pairs describing the
@@ -2147,9 +2220,274 @@
*
* XXX first should be vm_paddr_t.
*/
+#ifdef PC98
static void
getmemsize(int first)
{
+ int off, physmap_idx, pa_indx, da_indx;
+ u_long physmem_tunable, memtest;
+ vm_paddr_t physmap[PHYSMAP_SIZE];
+ pt_entry_t *pte;
+ quad_t dcons_addr, dcons_size;
+ int i;
+ int pg_n;
+ u_int extmem;
+ u_int under16;
+ vm_paddr_t pa;
+
+ bzero(physmap, sizeof(physmap));
+
+ /* XXX - some of EPSON machines can't use PG_N */
+ pg_n = PG_N;
+ if (pc98_machine_type & M_EPSON_PC98) {
+ switch (epson_machine_id) {
+#ifdef WB_CACHE
+ default:
+#endif
+ case EPSON_PC486_HX:
+ case EPSON_PC486_HG:
+ case EPSON_PC486_HA:
+ pg_n = 0;
+ break;
+ }
+ }
+
+ under16 = pc98_getmemsize(&basemem, &extmem);
+ basemem_setup();
+
+ physmap[0] = 0;
+ physmap[1] = basemem * 1024;
+ physmap_idx = 2;
+ physmap[physmap_idx] = 0x100000;
+ physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
+
+ /*
+ * Now, physmap contains a map of physical memory.
+ */
+
+#ifdef SMP
+ /* make hole for AP bootstrap code */
+ physmap[1] = mp_bootaddress(physmap[1]);
+#endif
+
+ /*
+ * Maxmem isn't the "maximum memory", it's one larger than the
+ * highest page of the physical address space. It should be
+ * called something like "Maxphyspage". We may adjust this
+ * based on ``hw.physmem'' and the results of the memory test.
+ */
+ Maxmem = atop(physmap[physmap_idx + 1]);
+
+#ifdef MAXMEM
+ Maxmem = MAXMEM / 4;
+#endif
+
+ if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
+ Maxmem = atop(physmem_tunable);
+
+ /*
+ * By default keep the memtest enabled. Use a general name so that
+ * one could eventually do more with the code than just disable it.
+ */
+ memtest = 1;
+ TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
+
+ if (atop(physmap[physmap_idx + 1]) != Maxmem &&
+ (boothowto & RB_VERBOSE))
+ printf("Physical memory use set to %ldK\n", Maxmem * 4);
+
+ /*
+ * If Maxmem has been increased beyond what the system has detected,
+ * extend the last memory segment to the new limit.
+ */
+ if (atop(physmap[physmap_idx + 1]) < Maxmem)
+ physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
+
+ /*
+ * We need to divide chunk if Maxmem is larger than 16MB and
+ * under 16MB area is not full of memory.
+ * (1) system area (15-16MB region) is cut off
+ * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
+ */
+ if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
+ /* 15M - 16M region is cut off, so need to divide chunk */
+ physmap[physmap_idx + 1] = under16 * 1024;
+ physmap_idx += 2;
+ physmap[physmap_idx] = 0x1000000;
+ physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
+ }
+
+ /* call pmap initialization to make new kernel address space */
+ pmap_bootstrap(first);
+
+ /*
+ * Size up each available chunk of physical memory.
+ */
+ physmap[0] = PAGE_SIZE; /* mask off page 0 */
+ pa_indx = 0;
+ da_indx = 1;
+ phys_avail[pa_indx++] = physmap[0];
+ phys_avail[pa_indx] = physmap[0];
+ dump_avail[da_indx] = physmap[0];
+ pte = CMAP3;
+
+ /*
+ * Get dcons buffer address
+ */
+ if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
+ getenv_quad("dcons.size", &dcons_size) == 0)
+ dcons_addr = 0;
+
+ /*
+ * physmap is in bytes, so when converting to page boundaries,
+ * round up the start address and round down the end address.
+ */
+ for (i = 0; i <= physmap_idx; i += 2) {
+ vm_paddr_t end;
+
+ end = ptoa((vm_paddr_t)Maxmem);
+ if (physmap[i + 1] < end)
+ end = trunc_page(physmap[i + 1]);
+ for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
+ int tmp, page_bad, full;
+ int *ptr = (int *)CADDR3;
+
+ full = FALSE;
+ /*
+ * block out kernel memory as not available.
+ */
+ if (pa >= KERNLOAD && pa < first)
+ goto do_dump_avail;
+
+ /*
+ * block out dcons buffer
+ */
+ if (dcons_addr > 0
+ && pa >= trunc_page(dcons_addr)
+ && pa < dcons_addr + dcons_size)
+ goto do_dump_avail;
+
+ page_bad = FALSE;
+ if (memtest == 0)
+ goto skip_memtest;
+
+ /*
+ * map page into kernel: valid, read/write,non-cacheable
+ */
+ *pte = pa | PG_V | PG_RW | pg_n;
+ invltlb();
+
+ tmp = *(int *)ptr;
+ /*
+ * Test for alternating 1's and 0's
+ */
+ *(volatile int *)ptr = 0xaaaaaaaa;
+ if (*(volatile int *)ptr != 0xaaaaaaaa)
+ page_bad = TRUE;
+ /*
+ * Test for alternating 0's and 1's
+ */
+ *(volatile int *)ptr = 0x55555555;
+ if (*(volatile int *)ptr != 0x55555555)
+ page_bad = TRUE;
+ /*
+ * Test for all 1's
+ */
+ *(volatile int *)ptr = 0xffffffff;
+ if (*(volatile int *)ptr != 0xffffffff)
+ page_bad = TRUE;
+ /*
+ * Test for all 0's
+ */
+ *(volatile int *)ptr = 0x0;
+ if (*(volatile int *)ptr != 0x0)
+ page_bad = TRUE;
+ /*
+ * Restore original value.
+ */
+ *(int *)ptr = tmp;
+
+skip_memtest:
+ /*
+ * Adjust array of valid/good pages.
+ */
+ if (page_bad == TRUE)
+ continue;
+ /*
+ * If this good page is a continuation of the
+ * previous set of good pages, then just increase
+ * the end pointer. Otherwise start a new chunk.
+ * Note that "end" points one higher than end,
+ * making the range >= start and < end.
+ * If we're also doing a speculative memory
+ * test and we at or past the end, bump up Maxmem
+ * so that we keep going. The first bad page
+ * will terminate the loop.
+ */
+ if (phys_avail[pa_indx] == pa) {
+ phys_avail[pa_indx] += PAGE_SIZE;
+ } else {
+ pa_indx++;
+ if (pa_indx == PHYS_AVAIL_ARRAY_END) {
+ printf(
+ "Too many holes in the physical address space, giving up\n");
+ pa_indx--;
+ full = TRUE;
+ goto do_dump_avail;
+ }
+ phys_avail[pa_indx++] = pa; /* start */
+ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
+ }
+ physmem++;
+do_dump_avail:
+ if (dump_avail[da_indx] == pa) {
+ dump_avail[da_indx] += PAGE_SIZE;
+ } else {
+ da_indx++;
+ if (da_indx == DUMP_AVAIL_ARRAY_END) {
+ da_indx--;
+ goto do_next;
+ }
+ dump_avail[da_indx++] = pa; /* start */
+ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
+ }
+do_next:
+ if (full)
+ break;
+ }
+ }
+ *pte = 0;
+ invltlb();
+
+ /*
+ * XXX
+ * The last chunk must contain at least one page plus the message
+ * buffer to avoid complicating other code (message buffer address
+ * calculation, etc.).
+ */
+ while (phys_avail[pa_indx - 1] + PAGE_SIZE +
+ round_page(msgbufsize) >= phys_avail[pa_indx]) {
+ physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
+ phys_avail[pa_indx--] = 0;
+ phys_avail[pa_indx--] = 0;
+ }
+
+ Maxmem = atop(phys_avail[pa_indx]);
+
+ /* Trim off space for the message buffer. */
+ phys_avail[pa_indx] -= round_page(msgbufsize);
+
+ /* Map the message buffer. */
+ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
+ pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
+ off);
+
+ PT_UPDATES_FLUSH();
+}
+#else /* PC98 */
+static void
+getmemsize(int first)
+{
int has_smap, off, physmap_idx, pa_indx, da_indx;
u_long physmem_tunable, memtest;
vm_paddr_t physmap[PHYSMAP_SIZE];
@@ -2156,7 +2494,7 @@
pt_entry_t *pte;
quad_t dcons_addr, dcons_size;
#ifndef XEN
- int hasbrokenint12, i;
+ int hasbrokenint12, i, res;
u_int extmem;
struct vm86frame vmf;
struct vm86context vmc;
@@ -2241,7 +2579,8 @@
pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
vmc.npages = 0;
smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
- vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+ res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
+ KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
vmf.vmf_ebx = 0;
do {
@@ -2391,7 +2730,7 @@
phys_avail[pa_indx++] = physmap[0];
phys_avail[pa_indx] = physmap[0];
dump_avail[da_indx] = physmap[0];
- pte = CMAP1;
+ pte = CMAP3;
/*
* Get dcons buffer address
@@ -2413,7 +2752,7 @@
end = trunc_page(physmap[i + 1]);
for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
int tmp, page_bad, full;
- int *ptr = (int *)CADDR1;
+ int *ptr = (int *)CADDR3;
full = FALSE;
/*
@@ -2554,18 +2893,21 @@
PT_UPDATES_FLUSH();
}
+#endif /* PC98 */
#ifdef XEN
#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-void
+register_t
init386(first)
int first;
{
unsigned long gdtmachpfn;
int error, gsel_tss, metadata_missing, x, pa;
- size_t kstack0_sz;
struct pcpu *pc;
+#ifdef CPU_ENABLE_SSE
+ struct xstate_hdr *xhdr;
+#endif
struct callback_register event = {
.type = CALLBACKTYPE_event,
.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
@@ -2577,8 +2919,6 @@
thread0.td_kstack = proc0kstack;
thread0.td_kstack_pages = KSTACK_PAGES;
- kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
- thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
/*
* This may be done better later if it gets more high level
@@ -2658,7 +2998,6 @@
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
- PCPU_SET(curpcb, thread0.td_pcb);
/*
* Initialize mutexes.
@@ -2739,16 +3078,8 @@
setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
initializecpu(); /* Initialize CPU registers */
+ initializecpucache();
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- kstack0_sz - sizeof(struct pcb) - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
- PCPU_GET(common_tss.tss_esp0));
-
/* pointer to selector slot for %fs/%gs */
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
@@ -2756,7 +3087,7 @@
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss.tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
@@ -2776,6 +3107,32 @@
/* now running on new page tables, configured,and u/iom is accessible */
msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+ npxinit(true);
+#endif
+ /*
+ * Set up thread0 pcb after npxinit calculated pcb + fpu save
+ * area size. Zero out the extended state header in fpu save
+ * area.
+ */
+ thread0.td_pcb = get_pcb_td(&thread0);
+ bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+#ifdef CPU_ENABLE_SSE
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+ 1);
+ xhdr->xstate_bv = xsave_mask;
+ }
+#endif
+ PCPU_SET(curpcb, thread0.td_pcb);
+ /* make an initial tss so cpu can get interrupt stack on syscall! */
+ /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
+ PCPU_GET(common_tss.tss_esp0));
+
/* transfer to user mode */
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -2783,7 +3140,7 @@
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
@@ -2794,23 +3151,24 @@
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
cpu_probe_amdc1e();
- cpu_probe_cmpxchg8b();
+
+ /* Location of kernel stack for locore */
+ return ((register_t)thread0.td_pcb);
}
#else
-void
-init386(first)
- int first;
+register_t
+init386(int first)
{
struct gate_descriptor *gdp;
int gsel_tss, metadata_missing, x, pa;
- size_t kstack0_sz;
struct pcpu *pc;
+#ifdef CPU_ENABLE_SSE
+ struct xstate_hdr *xhdr;
+#endif
thread0.td_kstack = proc0kstack;
- thread0.td_kstack_pages = KSTACK_PAGES;
- kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
- thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
+ thread0.td_kstack_pages = TD0_KSTACK_PAGES;
/*
* This may be done better later if it gets more high level
@@ -2818,6 +3176,13 @@
*/
proc_linkup0(&proc0, &thread0);
+#ifdef PC98
+ /*
+ * Initialize DMAC
+ */
+ pc98_init_dmac();
+#endif
+
metadata_missing = 0;
if (bootinfo.bi_modulep) {
preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
@@ -2825,11 +3190,16 @@
} else {
metadata_missing = 1;
}
- if (envmode == 1)
- kern_envp = static_env;
- else if (bootinfo.bi_envp)
- kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
+ if (bootinfo.bi_envp)
+ init_static_kenv((caddr_t)bootinfo.bi_envp + KERNBASE, 0);
+ else
+ init_static_kenv(NULL, 0);
+
+#ifndef XEN
+ identify_hypervisor();
+#endif
+
/* Init basic tunables, hz etc */
init_param1();
@@ -2864,7 +3234,6 @@
first += DPCPU_SIZE;
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
- PCPU_SET(curpcb, thread0.td_pcb);
/*
* Initialize mutexes.
@@ -2934,6 +3303,10 @@
setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif
+#ifdef XENHVM
+ setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+#endif
r_idt.rd_limit = sizeof(idt0) - 1;
r_idt.rd_base = (int) idt;
@@ -2967,6 +3340,40 @@
*/
i8254_init();
+ finishidentcpu(); /* Final stage of CPU initialization */
+ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ initializecpu(); /* Initialize CPU registers */
+ initializecpucache();
+
+ /* pointer to selector slot for %fs/%gs */
+ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+
+ dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+ dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+ dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+ dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#if defined(PAE) || defined(PAE_TABLES)
+ dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
+ dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
+ dblfault_tss.tss_eip = (int)dblfault_handler;
+ dblfault_tss.tss_eflags = PSL_KERNEL;
+ dblfault_tss.tss_ds = dblfault_tss.tss_es =
+ dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+ dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+ dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+ dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+ vm86_initialize();
+ getmemsize(first);
+ init_param2(physmem);
+
+ /* now running on new page tables, configured,and u/iom is accessible */
+
/*
* Initialize the console before we print anything out.
*/
@@ -2977,7 +3384,9 @@
#ifdef DEV_ISA
#ifdef DEV_ATPIC
+#ifndef PC98
elcr_probe();
+#endif
atpic_startup();
#else
/* Reset and mask the atpics and leave them shut down. */
@@ -3006,17 +3415,29 @@
kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
#endif
- finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- initializecpu(); /* Initialize CPU registers */
-
+ msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+ npxinit(true);
+#endif
+ /*
+ * Set up thread0 pcb after npxinit calculated pcb + fpu save
+ * area size. Zero out the extended state header in fpu save
+ * area.
+ */
+ thread0.td_pcb = get_pcb_td(&thread0);
+ thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
+ bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+#ifdef CPU_ENABLE_SSE
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+ 1);
+ xhdr->xstate_bv = xsave_mask;
+ }
+#endif
+ PCPU_SET(curpcb, thread0.td_pcb);
/* make an initial tss so cpu can get interrupt stack on syscall! */
/* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- kstack0_sz - sizeof(struct pcb) - 16);
+ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
@@ -3024,34 +3445,6 @@
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
ltr(gsel_tss);
- /* pointer to selector slot for %fs/%gs */
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#ifdef PAE
- dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
- dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
- vm86_initialize();
- getmemsize(first);
- init_param2(physmem);
-
- /* now running on new page tables, configured,and u/iom is accessible */
-
- msgbufinit(msgbufp, msgbufsize);
-
/* make a call gate to reenter kernel with */
gdp = &ldt[LSYS5CALLS_SEL].gd;
@@ -3076,7 +3469,7 @@
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
@@ -3085,7 +3478,13 @@
thread0.td_frame = &proc0_tf;
cpu_probe_amdc1e();
- cpu_probe_cmpxchg8b();
+
+#ifdef FDT
+ x86_init_fdt();
+#endif
+
+ /* Location of kernel stack for locore */
+ return ((register_t)thread0.td_pcb);
}
#endif
@@ -3096,6 +3495,46 @@
pcpu->pc_acpi_id = 0xffffffff;
}
+#ifndef PC98
+static int
+smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct bios_smap *smapbase;
+ struct bios_smap_xattr smap;
+ caddr_t kmdp;
+ uint32_t *smapattr;
+ int count, error, i;
+
+ /* Retrieve the system memory map from the loader. */
+ kmdp = preload_search_by_type("elf kernel");
+ if (kmdp == NULL)
+ kmdp = preload_search_by_type("elf32 kernel");
+ if (kmdp == NULL)
+ return (0);
+ smapbase = (struct bios_smap *)preload_search_info(kmdp,
+ MODINFO_METADATA | MODINFOMD_SMAP);
+ if (smapbase == NULL)
+ return (0);
+ smapattr = (uint32_t *)preload_search_info(kmdp,
+ MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
+ count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
+ error = 0;
+ for (i = 0; i < count; i++) {
+ smap.base = smapbase[i].base;
+ smap.length = smapbase[i].length;
+ smap.type = smapbase[i].type;
+ if (smapattr != NULL)
+ smap.xattr = smapattr[i];
+ else
+ smap.xattr = 0;
+ error = SYSCTL_OUT(req, &smap, sizeof(smap));
+ }
+ return (error);
+}
+SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
+ smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
+#endif /* !PC98 */
+
void
spinlock_enter(void)
{
@@ -3143,9 +3582,9 @@
printf("Intel Pentium detected, installing workaround for F00F bug\n");
- tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
+ tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
if (tmp == 0)
- panic("kmem_alloc returned 0");
+ panic("kmem_malloc returned 0");
/* Put the problematic entry (#6) at the end of the lower page. */
new_idt = (struct gate_descriptor*)
@@ -3154,9 +3593,7 @@
r_idt.rd_base = (u_int)new_idt;
lidt(&r_idt);
idt = new_idt;
- if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
- VM_PROT_READ, FALSE) != KERN_SUCCESS)
- panic("vm_map_protect failed");
+ pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
}
#endif /* defined(I586_CPU) && !NO_F00F_HACK */
@@ -3177,6 +3614,7 @@
pcb->pcb_ebx = tf->tf_ebx;
pcb->pcb_eip = tf->tf_eip;
pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
+ pcb->pcb_gs = rgs();
}
int
@@ -3330,11 +3768,11 @@
#endif
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
- fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
+ fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
(struct save87 *)fpregs);
else
#endif /* CPU_ENABLE_SSE */
- bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs,
+ bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
sizeof(*fpregs));
return (0);
}
@@ -3346,10 +3784,10 @@
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
set_fpregs_xmm((struct save87 *)fpregs,
- &td->td_pcb->pcb_user_save.sv_xmm);
+ &get_pcb_user_save_td(td)->sv_xmm);
else
#endif /* CPU_ENABLE_SSE */
- bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87,
+ bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
sizeof(*fpregs));
#ifdef DEV_NPX
npxuserinited(td);
@@ -3395,12 +3833,14 @@
mcp->mc_esp = tp->tf_esp;
mcp->mc_ss = tp->tf_ss;
mcp->mc_len = sizeof(*mcp);
- get_fpcontext(td, mcp);
+ get_fpcontext(td, mcp, NULL, 0);
sdp = &td->td_pcb->pcb_fsd;
mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
sdp = &td->td_pcb->pcb_gsd;
mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
mcp->mc_flags = 0;
+ mcp->mc_xfpustate = 0;
+ mcp->mc_xfpustate_len = 0;
bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
return (0);
}
@@ -3412,40 +3852,57 @@
* touch the cs selector.
*/
int
-set_mcontext(struct thread *td, const mcontext_t *mcp)
+set_mcontext(struct thread *td, mcontext_t *mcp)
{
struct trapframe *tp;
+ char *xfpustate;
int eflags, ret;
tp = td->td_frame;
- if (mcp->mc_len != sizeof(*mcp))
+ if (mcp->mc_len != sizeof(*mcp) ||
+ (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
return (EINVAL);
eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
(tp->tf_eflags & ~PSL_USERCHANGE);
- if ((ret = set_fpcontext(td, mcp)) == 0) {
- tp->tf_fs = mcp->mc_fs;
- tp->tf_es = mcp->mc_es;
- tp->tf_ds = mcp->mc_ds;
- tp->tf_edi = mcp->mc_edi;
- tp->tf_esi = mcp->mc_esi;
- tp->tf_ebp = mcp->mc_ebp;
- tp->tf_ebx = mcp->mc_ebx;
- tp->tf_edx = mcp->mc_edx;
- tp->tf_ecx = mcp->mc_ecx;
- tp->tf_eax = mcp->mc_eax;
- tp->tf_eip = mcp->mc_eip;
- tp->tf_eflags = eflags;
- tp->tf_esp = mcp->mc_esp;
- tp->tf_ss = mcp->mc_ss;
- td->td_pcb->pcb_gs = mcp->mc_gs;
- ret = 0;
- }
- return (ret);
+ if (mcp->mc_flags & _MC_HASFPXSTATE) {
+ if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+ sizeof(union savefpu))
+ return (EINVAL);
+ xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+ ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+ mcp->mc_xfpustate_len);
+ if (ret != 0)
+ return (ret);
+ } else
+ xfpustate = NULL;
+ ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+ if (ret != 0)
+ return (ret);
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_edi = mcp->mc_edi;
+ tp->tf_esi = mcp->mc_esi;
+ tp->tf_ebp = mcp->mc_ebp;
+ tp->tf_ebx = mcp->mc_ebx;
+ tp->tf_edx = mcp->mc_edx;
+ tp->tf_ecx = mcp->mc_ecx;
+ tp->tf_eax = mcp->mc_eax;
+ tp->tf_eip = mcp->mc_eip;
+ tp->tf_eflags = eflags;
+ tp->tf_esp = mcp->mc_esp;
+ tp->tf_ss = mcp->mc_ss;
+ td->td_pcb->pcb_gs = mcp->mc_gs;
+ return (0);
}
static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+ size_t xfpusave_len)
{
+#ifdef CPU_ENABLE_SSE
+ size_t max_len, len;
+#endif
#ifndef DEV_NPX
mcp->mc_fpformat = _MC_FPFMT_NODEV;
@@ -3453,15 +3910,30 @@
bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
#else
mcp->mc_ownedfp = npxgetregs(td);
- bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0],
+ bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
sizeof(mcp->mc_fpstate));
mcp->mc_fpformat = npxformat();
+#ifdef CPU_ENABLE_SSE
+ if (!use_xsave || xfpusave_len == 0)
+ return;
+ max_len = cpu_max_ext_state_size - sizeof(union savefpu);
+ len = xfpusave_len;
+ if (len > max_len) {
+ len = max_len;
+ bzero(xfpusave + max_len, len - max_len);
+ }
+ mcp->mc_flags |= _MC_HASFPXSTATE;
+ mcp->mc_xfpustate_len = len;
+ bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
#endif
+#endif
}
static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
+set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
+ size_t xfpustate_len)
{
+ int error;
if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
return (0);
@@ -3468,22 +3940,21 @@
else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
mcp->mc_fpformat != _MC_FPFMT_XMM)
return (EINVAL);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
/* We don't care what state is left in the FPU or PCB. */
fpstate_drop(td);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+ error = 0;
+ } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
#ifdef DEV_NPX
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr)
- ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env.
- en_mxcsr &= cpu_mxcsr_mask;
+ error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
+ xfpustate, xfpustate_len);
+#else
+ error = EINVAL;
#endif
- npxsetregs(td, (union savefpu *)&mcp->mc_fpstate);
-#endif
} else
return (EINVAL);
- return (0);
+ return (error);
}
static void
Modified: trunk/sys/i386/i386/mem.c
===================================================================
--- trunk/sys/i386/i386/mem.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mem.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
@@ -37,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mem.c 309426 2016-12-02 19:02:12Z jhb $");
/*
* Memory special file
@@ -86,10 +87,6 @@
int error = 0;
vm_offset_t addr;
- /* XXX UPS Why ? */
- GIANT_REQUIRED;
-
-
if (dev2unit(dev) != CDEV_MINOR_MEM && dev2unit(dev) != CDEV_MINOR_KMEM)
return EIO;
@@ -112,8 +109,11 @@
continue;
}
if (dev2unit(dev) == CDEV_MINOR_MEM) {
- pa = uio->uio_offset;
- pa &= ~PAGE_MASK;
+ if (uio->uio_offset > cpu_getmaxphyaddr()) {
+ error = EFAULT;
+ break;
+ }
+ pa = trunc_page(uio->uio_offset);
} else {
/*
* Extract the physical page since the mapping may
@@ -165,9 +165,11 @@
memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int prot __unused, vm_memattr_t *memattr __unused)
{
- if (dev2unit(dev) == CDEV_MINOR_MEM)
+ if (dev2unit(dev) == CDEV_MINOR_MEM) {
+ if (offset > cpu_getmaxphyaddr())
+ return (-1);
*paddr = offset;
- else if (dev2unit(dev) == CDEV_MINOR_KMEM)
+ } else if (dev2unit(dev) == CDEV_MINOR_KMEM)
*paddr = vtophys(offset);
/* else panic! */
return (0);
Modified: trunk/sys/i386/i386/minidump_machdep.c
===================================================================
--- trunk/sys/i386/i386/minidump_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/minidump_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2006 Peter Wemm
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/minidump_machdep.c 282065 2015-04-27 08:02:12Z kib $");
#include "opt_watchdog.h"
@@ -265,7 +266,7 @@
mdhdr.bitmapsize = vm_page_dump_size;
mdhdr.ptesize = ptesize;
mdhdr.kernbase = KERNBASE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
mdhdr.paemode = 1;
#endif
Modified: trunk/sys/i386/i386/mp_clock.c
===================================================================
--- trunk/sys/i386/i386/mp_clock.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_clock.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* ----------------------------------------------------------------------------
* "THE BEER-WARE LICENSE" (Revision 42):
@@ -8,7 +9,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mp_clock.c 170289 2007-06-04 18:25:08Z dwmalone $");
/*-
* Just when we thought life were beautiful, reality pops its grim face over
Modified: trunk/sys/i386/i386/mp_machdep.c
===================================================================
--- trunk/sys/i386/i386/mp_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1996, by Steve Passe
* All rights reserved.
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/mp_machdep.c 331910 2018-04-03 07:52:06Z avg $");
#include "opt_apic.h"
#include "opt_cpu.h"
@@ -81,6 +82,7 @@
#include <machine/psl.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
+#include <machine/cpu.h>
#define WARMBOOT_TARGET 0
#define WARMBOOT_OFF (KERNBASE + 0x0467)
@@ -145,11 +147,8 @@
void *bootstacks[MAXCPU];
static void *dpcpu;
-/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t *KPTphys;
-
struct pcb stoppcbs[MAXCPU];
-struct pcb **susppcbs = NULL;
+struct susppcb **susppcbs;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
@@ -169,6 +168,11 @@
static u_long *ipi_hardclock_counts[MAXCPU];
#endif
+/* Default cpu_ops implementation. */
+struct cpu_ops cpu_ops = {
+ .ipi_vectored = lapic_ipi_vectored
+};
+
/*
* Local data and functions.
*/
@@ -175,6 +179,9 @@
static volatile cpuset_t ipi_nmi_pending;
+volatile cpuset_t resuming_cpus;
+volatile cpuset_t toresume_cpus;
+
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -195,7 +202,7 @@
int apic_cpuids[MAX_APIC_ID + 1];
/* Holds pending bitmap based IPIs per CPU */
-static volatile u_int cpu_ipi_pending[MAXCPU];
+volatile u_int cpu_ipi_pending[MAXCPU];
static u_int boot_address;
static int cpu_logical; /* logical cpus per core */
@@ -624,7 +631,7 @@
const char *hyperthread;
int i;
- printf("MidnightBSD/SMP: %d package(s) x %d core(s)",
+ printf("FreeBSD/SMP: %d package(s) x %d core(s)",
mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
if (hyperthreading_cpus > 1)
printf(" x %d HTT threads", cpu_logical);
@@ -681,6 +688,8 @@
pc->pc_prvspace = pc;
pc->pc_curthread = 0;
+ fix_cpuid();
+
gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
@@ -744,22 +753,15 @@
/* set up CPU registers and state */
cpu_setregs();
+ /* set up SSE/NX */
+ initializecpu();
+
/* set up FPU state on the AP */
- npxinit();
+ npxinit(false);
- /* set up SSE registers */
- enable_sse();
+ if (cpu_ops.cpu_init)
+ cpu_ops.cpu_init();
-#ifdef PAE
- /* Enable the PTE no-execute bit. */
- if ((amd_feature & AMDID_NX) != 0) {
- uint64_t msr;
-
- msr = rdmsr(MSR_EFER) | EFER_NXE;
- wrmsr(MSR_EFER, msr);
- }
-#endif
-
/* A quick check from sanity claus */
cpuid = PCPU_GET(cpuid);
if (PCPU_GET(apic_id) != lapic_id()) {
@@ -799,7 +801,6 @@
if (smp_cpus == mp_ncpus) {
/* enable IPI's, tlb shootdown, freezes etc */
atomic_store_rel_int(&smp_started, 1);
- smp_active = 1; /* historic */
}
mtx_unlock_spin(&ap_boot_mtx);
@@ -826,6 +827,8 @@
* We tell the I/O APIC code about all the CPUs we want to receive
* interrupts. If we don't want certain CPUs to receive IRQs we
* can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
*/
static void
set_interrupt_apic_ids(void)
@@ -836,6 +839,8 @@
apic_id = cpu_apic_ids[i];
if (apic_id == -1)
continue;
+ if (cpu_info[apic_id].cpu_bsp)
+ continue;
if (cpu_info[apic_id].cpu_disabled)
continue;
@@ -931,7 +936,6 @@
#ifndef PC98
u_char mpbiosreason;
#endif
- uintptr_t kptbase;
u_int32_t mpbioswarmvec;
int apic_id, cpu, i;
@@ -949,11 +953,8 @@
/* set up temporary P==V mapping for AP boot */
/* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
- kptbase = (uintptr_t)(void *)KPTphys;
for (i = TMPMAP_START; i < NKPT; i++)
- PTD[i] = (pd_entry_t)(PG_V | PG_RW |
- ((kptbase + i * PAGE_SIZE) & PG_FRAME));
+ PTD[i] = PTD[KPTDI + i];
invltlb();
/* start each AP */
@@ -962,8 +963,10 @@
/* allocate and set up a boot stack data page */
bootstacks[cpu] =
- (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
- dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE);
+ (char *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE,
+ M_WAITOK | M_ZERO);
+ dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
+ M_WAITOK | M_ZERO);
/* setup a vector to our boot code */
*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
@@ -1088,57 +1091,8 @@
/* used as a watchpoint to signal AP startup */
cpus = mp_naps;
- /*
- * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
- * and running the target CPU. OR this INIT IPI might be latched (P5
- * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
- * ignored.
- */
+ ipi_startup(apic_id, vector);
- /* do an INIT IPI: assert RESET */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-
- /* wait for pending status end */
- lapic_ipi_wait(-1);
-
- /* do an INIT IPI: deassert RESET */
- lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
-
- /* wait for pending status end */
- DELAY(10000); /* wait ~10mS */
- lapic_ipi_wait(-1);
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
-
- /* do a STARTUP IPI */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
-
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
- DELAY(200); /* wait ~200uS */
-
/* Wait up to 5 seconds for it to start. */
for (ms = 0; ms < 5000; ms++) {
if (mp_naps > cpus)
@@ -1185,6 +1139,69 @@
#endif /* COUNT_XINVLTLB_HITS */
/*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
+ /*
+ * This attempts to follow the algorithm described in the
+ * Intel Multiprocessor Specification v1.4 in section B.4.
+ * For each IPI, we allow the local APIC ~20us to deliver the
+ * IPI. If that times out, we panic.
+ */
+
+ /*
+ * first we do an INIT IPI: this INIT IPI might be run, resetting
+ * and running the target CPU. OR this INIT IPI might be latched (P5
+ * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+ * ignored.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+ lapic_ipi_wait(100);
+
+ /* Explicitly deassert the INIT IPI. */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+ apic_id);
+
+ DELAY(10000); /* wait ~10mS */
+
+ /*
+ * next we do a STARTUP IPI: the previous INIT IPI might still be
+ * latched, (P5 bug) this 1st STARTUP would then terminate
+ * immediately, and the previously started INIT IPI would continue. OR
+ * the previous INIT IPI has already run. and this STARTUP IPI will
+ * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+ * will run.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ if (!lapic_ipi_wait(100))
+ panic("Failed to deliver first STARTUP IPI to APIC %d",
+ apic_id);
+ DELAY(200); /* wait ~200uS */
+
+ /*
+ * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+ * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+ * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+ * recognized after hardware RESET or INIT IPI.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ if (!lapic_ipi_wait(100))
+ panic("Failed to deliver second STARTUP IPI to APIC %d",
+ apic_id);
+
+ DELAY(200); /* wait ~200uS */
+}
+
+/*
* Send an IPI to specified CPU handling the bitmap logic.
*/
static void
@@ -1205,7 +1222,7 @@
if (old_pending)
return;
}
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+ cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]);
}
/*
@@ -1256,7 +1273,7 @@
ipi_all_but_self(vector);
} else {
ncpu = 0;
- while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+ while ((cpu = CPU_FFS(&mask)) != 0) {
cpu--;
CPU_CLR(cpu, &mask);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
@@ -1405,7 +1422,7 @@
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
- while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+ while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
@@ -1456,7 +1473,7 @@
CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+ cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
int
@@ -1517,30 +1534,114 @@
{
u_int cpu;
+ mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+
cpu = PCPU_GET(cpuid);
-
- if (suspendctx(susppcbs[cpu])) {
+ if (savectx(&susppcbs[cpu]->sp_pcb)) {
+ npxsuspend(susppcbs[cpu]->sp_fpususpend);
wbinvd();
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
+ CPU_SET_ATOMIC(cpu, &suspended_cpus);
+ /*
+ * Hack for xen, which does not use resumectx() so never
+ * uses the next clause: set resuming_cpus early so that
+ * resume_cpus() can wait on the same bitmap for acpi and
+ * xen. resuming_cpus now means eventually_resumable_cpus.
+ */
+ CPU_SET_ATOMIC(cpu, &resuming_cpus);
} else {
+ npxresume(susppcbs[cpu]->sp_fpususpend);
pmap_init_pat();
+ initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
- susppcbs[cpu]->pcb_eip = 0;
+
+ /* Indicate that we are resuming */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
- /* Wait for resume */
- while (!CPU_ISSET(cpu, &started_cpus))
+ /* Wait for resume directive */
+ while (!CPU_ISSET(cpu, &toresume_cpus))
ia32_pause();
+ if (cpu_ops.cpu_resume)
+ cpu_ops.cpu_resume();
+
/* Resume MCA and local APIC */
mca_resume();
lapic_setup(0);
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &resuming_cpus);
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+ CPU_CLR_ATOMIC(cpu, &toresume_cpus);
}
+
/*
+ * Handlers for TLB related IPIs
+ */
+void
+invltlb_handler(void)
+{
+ uint64_t cr3;
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ cr3 = rcr3();
+ load_cr3(cr3);
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlpg_handler(void)
+{
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ invlpg(smp_tlb_addr1);
+
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlrng_handler(void)
+{
+ vm_offset_t addr;
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < smp_tlb_addr2);
+
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
+void
+invlcache_handler(void)
+{
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ wbinvd();
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
+/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
Modified: trunk/sys/i386/i386/mp_watchdog.c
===================================================================
--- trunk/sys/i386/i386/mp_watchdog.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mp_watchdog.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Robert N. M. Watson
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/mp_watchdog.c 314667 2017-03-04 13:03:31Z avg $
*/
#include "opt_mp_watchdog.h"
@@ -86,7 +87,7 @@
watchdog_init(void *arg)
{
- callout_init(&watchdog_callout, CALLOUT_MPSAFE);
+ callout_init(&watchdog_callout, 1);
if (watchdog_cpu != -1)
watchdog_change(watchdog_cpu);
}
Modified: trunk/sys/i386/i386/mpboot.s
===================================================================
--- trunk/sys/i386/i386/mpboot.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/mpboot.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1995 Jack F. Vogel
* All rights reserved.
@@ -26,7 +27,7 @@
* mpboot.s: FreeBSD machine support for the Intel MP Spec
* multiprocessor systems.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/mpboot.s 282065 2015-04-27 08:02:12Z kib $
*/
#include "opt_pmap.h"
@@ -99,7 +100,7 @@
movl %eax,%cr4
/* Now enable paging mode */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %eax
Property changes on: trunk/sys/i386/i386/mpboot.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/perfmon.c
===================================================================
--- trunk/sys/i386/i386/perfmon.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/perfmon.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 1996 Massachusetts Institute of Technology
*
@@ -28,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/perfmon.c 220433 2011-04-07 23:28:28Z jkim $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/i386/i386/pmap.c
===================================================================
--- trunk/sys/i386/i386/pmap.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/pmap.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
@@ -75,18 +76,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/pmap.c 321363 2017-07-22 06:40:57Z alc $");
/*
* Manages physical address maps.
*
- * In addition to hardware address maps, this
- * module is called upon to provide software-use-only
- * maps which may or may not be stored in the same
- * form as hardware maps. These pseudo-maps are
- * used to store intermediate results from copy
- * operations to and from address spaces.
- *
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
@@ -140,6 +134,8 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
@@ -219,7 +215,7 @@
extern u_int32_t KERNend;
extern u_int32_t KPTphys;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pt_entry_t pg_nx;
static uma_zone_t pdptzone;
#endif
@@ -238,15 +234,18 @@
static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */
/*
- * Isolate the global pv list lock from data and other locks to prevent false
- * sharing within the cache.
+ * pmap_mapdev support pre initialization (i.e. console)
*/
-static struct {
- struct rwlock lock;
- char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
-} pvh_global __aligned(CACHE_LINE_SIZE);
+#define PMAP_PREINIT_MAPPING_COUNT 8
+static struct pmap_preinit_mapping {
+ vm_paddr_t pa;
+ vm_offset_t va;
+ vm_size_t sz;
+ int mode;
+} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
+static int pmap_initialized;
-#define pvh_global_lock pvh_global.lock
+static struct rwlock_padalign pvh_global_lock;
/*
* Data for the pv entry allocation mechanism
@@ -271,11 +270,10 @@
caddr_t CADDR2;
};
static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP1 = 0;
-static pt_entry_t *CMAP3;
+pt_entry_t *CMAP3;
static pd_entry_t *KPTD;
-caddr_t CADDR1 = 0, ptvmmap = 0;
-static caddr_t CADDR3;
+caddr_t ptvmmap = 0;
+caddr_t CADDR3;
struct msgbuf *msgbufp = 0;
/*
@@ -319,7 +317,9 @@
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_flush_page(vm_page_t m);
-static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
+ pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
@@ -332,12 +332,12 @@
vm_prot_t prot);
static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free);
+ struct spglist *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- vm_page_t *free);
+ struct spglist *free);
static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
- vm_page_t *free);
+ struct spglist *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
vm_offset_t va);
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
@@ -347,15 +347,16 @@
pd_entry_t newpde);
static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
+static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free);
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-#ifdef PAE
-static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
+#if defined(PAE) || defined(PAE_TABLES)
+static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
+ int wait);
#endif
static void pmap_set_pg(void);
@@ -390,6 +391,15 @@
int i;
/*
+ * Add a physical memory segment (vm_phys_seg) corresponding to the
+ * preallocated kernel page table pages so that vm_page structures
+ * representing these pages will be created. The vm_page structures
+ * are required for promotion of the corresponding kernel virtual
+ * addresses to superpage mappings.
+ */
+ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+
+ /*
* Initialize the first available kernel virtual address. However,
* using "firstaddr" may waste a few pages of the kernel virtual
* address space, because locore may not have mapped every physical
@@ -405,10 +415,9 @@
*/
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
- kernel_pmap->pm_root = NULL;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
@@ -450,7 +459,6 @@
SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
}
- SYSMAP(caddr_t, CMAP1, CADDR1, 1)
SYSMAP(caddr_t, CMAP3, CADDR3, 1)
/*
@@ -512,7 +520,14 @@
for (i = 1; i < NKPT; i++)
PTD[i] = 0;
- /* Initialize the PAT MSR if present. */
+ /*
+ * Initialize the PAT MSR if present.
+ * pmap_init_pat() clears and sets CR4_PGE, which, as a
+ * side-effect, invalidates stale PG_G TLB entries that might
+ * have been created in our pre-boot environment. We assume
+ * that PAT support implies PGE and in reverse, PGE presence
+ * comes with PAT. Both features were added for Pentium Pro.
+ */
pmap_init_pat();
/* Turn on PG_G on kernel page(s) */
@@ -540,7 +555,10 @@
pat_table[PAT_WRITE_PROTECTED] = 3;
pat_table[PAT_UNCACHED] = 3;
- /* Bail if this CPU doesn't implement PAT. */
+ /*
+ * Bail if this CPU doesn't implement PAT.
+ * We assume that PAT support implies PGE.
+ */
if ((cpu_feature & CPUID_PAT) == 0) {
for (i = 0; i < PAT_INDEX_SIZE; i++)
pat_index[i] = pat_table[i];
@@ -664,20 +682,20 @@
m->md.pat_mode = PAT_WRITE_BACK;
}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
static void *
-pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
*flags = UMA_SLAB_KERNEL;
- return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
+ return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL,
0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
}
#endif
/*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
+ * Abuse the pte nodes for unmapped kva to thread a kva freelist through.
* Requirements:
* - Must deal with pages in order to ensure that none of the PG_* bits
* are ever set, PG_V in particular.
@@ -696,7 +714,7 @@
va = *head;
if (va == 0)
- return (va); /* Out of memory */
+ panic("pmap_ptelist_alloc: exhausted ptelist KVA");
pte = vtopte(va);
*head = *pte;
if (*head & PG_V)
@@ -739,6 +757,7 @@
void
pmap_init(void)
{
+ struct pmap_preinit_mapping *ppim;
vm_page_t mpte;
vm_size_t s;
int i, pv_npg;
@@ -768,12 +787,18 @@
pv_entry_high_water = 9 * (pv_entry_max / 10);
/*
- * If the kernel is running in a virtual machine on an AMD Family 10h
- * processor, then it must assume that MCA is enabled by the virtual
- * machine monitor.
+ * If the kernel is running on a virtual machine, then it must assume
+ * that MCA is enabled by the hypervisor. Moreover, the kernel must
+ * be prepared for the hypervisor changing the vendor and family that
+ * are reported by CPUID. Consequently, the workaround for AMD Family
+ * 10h Erratum 383 is enabled if the processor's feature set does not
+ * include at least one feature that is only supported by older Intel
+ * or newer AMD processors.
*/
- if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
- CPUID_TO_FAMILY(cpu_id) == 0x10)
+ if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+ (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
+ CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
+ AMDID2_FMA4)) == 0)
workaround_erratum383 = 1;
/*
@@ -790,9 +815,10 @@
/*
* Calculate the size of the pv head table for superpages.
+ * Handle the possibility that "vm_phys_segs[...].end" is zero.
*/
- for (i = 0; phys_avail[i + 1]; i += 2);
- pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
+ pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end -
+ PAGE_SIZE) / NBPDR + 1;
/*
* Allocate memory for the pv head table for superpages.
@@ -799,22 +825,33 @@
*/
s = (vm_size_t)(pv_npg * sizeof(struct md_page));
s = round_page(s);
- pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
+ pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
+ M_WAITOK | M_ZERO);
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
- pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
- PAGE_SIZE * pv_maxchunks);
+ pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
if (pv_chunkbase == NULL)
panic("pmap_init: not enough kvm for pv chunks");
pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
UMA_ZONE_VM | UMA_ZONE_NOFREE);
uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
#endif
+
+ pmap_initialized = 1;
+ if (!bootverbose)
+ return;
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (ppim->va == 0)
+ continue;
+ printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i,
+ (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode);
+ }
}
@@ -1181,22 +1218,46 @@
}
#endif /* !SMP */
+static void
+pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
+{
+
+ /*
+ * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was
+ * created by a promotion that did not invalidate the 512 or 1024 4KB
+ * page mappings that might exist in the TLB. Consequently, at this
+ * point, the TLB may hold both 4KB and 2- or 4MB page mappings for
+ * the address range [va, va + NBPDR). Therefore, the entire range
+ * must be invalidated here. In contrast, when PG_PROMOTED is clear,
+ * the TLB will not hold any 4KB page mappings for the address range
+ * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the
+ * 2- or 4MB page mapping from the TLB.
+ */
+ if ((pde & PG_PROMOTED) != 0)
+ pmap_invalidate_range(pmap, va, va + NBPDR - 1);
+ else
+ pmap_invalidate_page(pmap, va);
+}
+
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
{
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
+ if (force) {
+ sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+ } else {
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+ }
- if (cpu_feature & CPUID_SS)
- ; /* If "Self Snoop" is supported, do nothing. */
- else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+ if ((cpu_feature & CPUID_SS) != 0 && !force)
+ ; /* If "Self Snoop" is supported and allowed, do nothing. */
+ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
#ifdef DEV_APIC
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
@@ -1208,16 +1269,31 @@
return;
#endif
/*
- * Otherwise, do per-cache line flush. Use the mfence
+ * Otherwise, do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
* coherence domain.
*/
- mfence();
+ sfence();
for (; sva < eva; sva += cpu_clflush_line_size)
+ clflushopt(sva);
+ sfence();
+ } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+ eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+#ifdef DEV_APIC
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+#endif
+ /*
+ * Writes are ordered by CLFLUSH on Intel CPUs.
+ */
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
+ for (; sva < eva; sva += cpu_clflush_line_size)
clflush(sva);
- mfence();
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
} else {
/*
@@ -1298,6 +1374,13 @@
mtx_unlock(&PMAP2mutex);
}
+/*
+ * NB: The sequence of updating a page table followed by accesses to the
+ * corresponding pages is subject to the situation described in the "AMD64
+ * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23,
+ * "7.3.1 Special Coherency Considerations". Therefore, issuing the INVLPG
+ * right after modifying the PTE bits is crucial.
+ */
static __inline void
invlcaddr(void *caddr)
{
@@ -1584,13 +1667,12 @@
* Page table page management routines.....
***************************************************/
static __inline void
-pmap_free_zero_pages(vm_page_t free)
+pmap_free_zero_pages(struct spglist *free)
{
vm_page_t m;
- while (free != NULL) {
- m = free;
- free = m->right;
+ while ((m = SLIST_FIRST(free)) != NULL) {
+ SLIST_REMOVE_HEAD(free, plinks.s.ss);
/* Preserve the page's PG_ZERO setting. */
vm_page_free_toq(m);
}
@@ -1602,7 +1684,8 @@
* physical memory manager after the TLB has been updated.
*/
static __inline void
-pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
+pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
+ boolean_t set_PG_ZERO)
{
if (set_PG_ZERO)
@@ -1609,8 +1692,7 @@
m->flags |= PG_ZERO;
else
m->flags &= ~PG_ZERO;
- m->right = *free;
- *free = m;
+ SLIST_INSERT_HEAD(free, m, plinks.s.ss);
}
/*
@@ -1619,31 +1701,12 @@
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*/
-static void
+static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- root = pmap->pm_root;
- if (root == NULL) {
- mpte->left = NULL;
- mpte->right = NULL;
- } else {
- root = vm_page_splay(mpte->pindex, root);
- if (mpte->pindex < root->pindex) {
- mpte->left = root->left;
- mpte->right = root;
- root->left = NULL;
- } else if (mpte->pindex == root->pindex)
- panic("pmap_insert_pt_page: pindex already inserted");
- else {
- mpte->right = root->right;
- mpte->left = root;
- root->right = NULL;
- }
- }
- pmap->pm_root = mpte;
+ return (vm_radix_insert(&pmap->pm_root, mpte));
}
/*
@@ -1651,19 +1714,12 @@
* specified pmap's collection of idle page table pages. Returns NULL if there
* is no page table page corresponding to the specified virtual address.
*/
-static vm_page_t
+static __inline vm_page_t
pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
{
- vm_page_t mpte;
- vm_pindex_t pindex = va >> PDRSHIFT;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
- mpte = vm_page_splay(pindex, mpte);
- if ((pmap->pm_root = mpte)->pindex != pindex)
- mpte = NULL;
- }
- return (mpte);
+ return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT));
}
/*
@@ -1671,21 +1727,12 @@
* of idle page table pages. The specified page table page must be a member of
* the pmap's collection.
*/
-static void
+static __inline void
pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (mpte != pmap->pm_root)
- vm_page_splay(mpte->pindex, pmap->pm_root);
- if (mpte->left == NULL)
- root = mpte->right;
- else {
- root = vm_page_splay(mpte->pindex, mpte->left);
- root->right = mpte->right;
- }
- pmap->pm_root = root;
+ vm_radix_remove(&pmap->pm_root, mpte->pindex);
}
/*
@@ -1695,7 +1742,7 @@
* page table page was unmapped and FALSE otherwise.
*/
static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
+pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
{
--m->wire_count;
@@ -1707,7 +1754,7 @@
}
static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
+_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
{
vm_offset_t pteva;
@@ -1743,7 +1790,7 @@
* conditionally free the page, and manage the hold/wire counts.
*/
static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free)
{
pd_entry_t ptepde;
vm_page_t mpte;
@@ -1769,10 +1816,10 @@
* not need to be inserted into that list.
*/
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1790,20 +1837,15 @@
vm_paddr_t pa;
int i;
- PMAP_LOCK_INIT(pmap);
-
/*
* No need to allocate page table space yet but we do need a valid
* page directory table.
*/
if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
- NBPTD);
- if (pmap->pm_pdir == NULL) {
- PMAP_LOCK_DESTROY(pmap);
+ pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
+ if (pmap->pm_pdir == NULL)
return (0);
- }
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
KASSERT(((vm_offset_t)pmap->pm_pdpt &
((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
@@ -1811,9 +1853,9 @@
KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
("pmap_pinit: pdpt above 4g"));
#endif
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
}
- KASSERT(pmap->pm_root == NULL,
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_pinit: pmap has reserved page table page(s)"));
/*
@@ -1845,7 +1887,7 @@
for (i = 0; i < NPGPTD; i++) {
pa = VM_PAGE_TO_PHYS(ptdpg[i]);
pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt[i] = pa | PG_V;
#endif
}
@@ -1862,21 +1904,17 @@
* mapped correctly.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
{
vm_paddr_t ptepa;
vm_page_t m;
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
/*
* Allocate a page table page.
*/
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if (flags & M_WAITOK) {
+ if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
PMAP_UNLOCK(pmap);
rw_wunlock(&pvh_global_lock);
VM_WAIT;
@@ -1908,16 +1946,12 @@
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
{
u_int ptepindex;
pd_entry_t ptepa;
vm_page_t m;
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
/*
* Calculate pagetable page index
*/
@@ -1950,7 +1984,7 @@
* been deallocated.
*/
m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & M_WAITOK))
+ if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
goto retry;
}
return (m);
@@ -2007,12 +2041,12 @@
spins = 50000000;
/* Find least significant set bit. */
- lsb = cpusetobj_ffs(&mask);
+ lsb = CPU_FFS(&mask);
MPASS(lsb != 0);
lsb--;
CPU_SETOF(lsb, &mask);
mtx_lock_spin(&smp_ipi_mtx);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
lazyptd = vtophys(pmap->pm_pdpt);
#else
lazyptd = vtophys(pmap->pm_pdir);
@@ -2077,7 +2111,7 @@
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
- KASSERT(pmap->pm_root == NULL,
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
pmap_lazyfix(pmap);
@@ -2096,7 +2130,7 @@
for (i = 0; i < NPGPTD; i++) {
m = ptdpg[i];
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
("pmap_release: got wrong ptd page"));
#endif
@@ -2104,7 +2138,6 @@
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
}
- PMAP_LOCK_DESTROY(pmap);
}
static int
@@ -2244,16 +2277,18 @@
pt_entry_t *pte, tpte;
pv_entry_t pv;
vm_offset_t va;
- vm_page_t free, m, m_pc;
+ vm_page_t m, m_pc;
+ struct spglist free;
uint32_t inuse;
int bit, field, freed;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
pmap = NULL;
- free = m_pc = NULL;
+ m_pc = NULL;
+ SLIST_INIT(&free);
TAILQ_INIT(&newtail);
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
- free == NULL)) {
+ SLIST_EMPTY(&free))) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
if (pmap != pc->pc_pmap) {
if (pmap != NULL) {
@@ -2302,7 +2337,7 @@
vm_page_dirty(m);
if ((tpte & PG_A) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -2358,14 +2393,14 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
- if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
- m_pc = free;
- free = m_pc->right;
+ if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) {
+ m_pc = SLIST_FIRST(&free);
+ SLIST_REMOVE_HEAD(&free, plinks.s.ss);
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&cnt.v_wire_count, 1);
}
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
return (m_pc);
}
@@ -2507,9 +2542,9 @@
pv_entry_t pv;
rw_assert(&pvh_global_lock, RA_WLOCKED);
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
break;
}
}
@@ -2537,7 +2572,7 @@
pv = pmap_pvh_remove(pvh, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
m = PHYS_TO_VM_PAGE(pa);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
/* Instantiate the remaining NPTEPG - 1 pv entries. */
va_last = va + NBPDR - PAGE_SIZE;
do {
@@ -2573,7 +2608,7 @@
pv = pmap_pvh_remove(&m->md, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
pvh = pa_to_pvh(pa);
- TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
/* Free the remaining NPTEPG - 1 pv entries. */
va_last = va + NBPDR - PAGE_SIZE;
do {
@@ -2620,7 +2655,7 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pv = get_pv_entry(pmap, FALSE);
pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
}
/*
@@ -2636,7 +2671,7 @@
if (pv_entry_count < pv_entry_high_water &&
(pv = get_pv_entry(pmap, TRUE)) != NULL) {
pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
return (TRUE);
} else
return (FALSE);
@@ -2656,7 +2691,7 @@
(pv = get_pv_entry(pmap, TRUE)) != NULL) {
pv->pv_va = va;
pvh = pa_to_pvh(pa);
- TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
return (TRUE);
} else
return (FALSE);
@@ -2686,14 +2721,16 @@
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
vm_paddr_t mptepa;
- vm_page_t free, mpte;
+ vm_page_t mpte;
+ struct spglist free;
+ vm_offset_t sva;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpde = *pde;
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
- mpte = pmap_lookup_pt_page(pmap, va);
- if (mpte != NULL)
+ if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
+ NULL)
pmap_remove_pt_page(pmap, mpte);
else {
KASSERT((oldpde & PG_W) == 0,
@@ -2708,10 +2745,12 @@
if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
VM_ALLOC_WIRED)) == NULL) {
- free = NULL;
- pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
- pmap_invalidate_page(pmap, trunc_4mpage(va));
- pmap_free_zero_pages(free);
+ SLIST_INIT(&free);
+ sva = trunc_4mpage(va);
+ pmap_remove_pde(pmap, pde, sva, &free);
+ if ((oldpde & PG_G) == 0)
+ pmap_invalidate_pde_page(pmap, sva, oldpde);
+ pmap_free_zero_pages(&free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return (FALSE);
@@ -2822,11 +2861,49 @@
}
/*
+ * Removes a 2- or 4MB page mapping from the kernel pmap.
+ */
+static void
+pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+ pd_entry_t newpde;
+ vm_paddr_t mptepa;
+ vm_page_t mpte;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mpte = pmap_lookup_pt_page(pmap, va);
+ if (mpte == NULL)
+ panic("pmap_remove_kernel_pde: Missing pt page.");
+
+ pmap_remove_pt_page(pmap, mpte);
+ mptepa = VM_PAGE_TO_PHYS(mpte);
+ newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
+
+ /*
+ * Initialize the page table page.
+ */
+ pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
+
+ /*
+ * Remove the mapping.
+ */
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, newpde);
+ else
+ pmap_kenter_pde(va, newpde);
+
+ /*
+ * Invalidate the recursive mapping of the page table page.
+ */
+ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+}
+
+/*
* pmap_remove_pde: do the things to unmap a superpage in a process
*/
static void
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free)
+ struct spglist *free)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -2844,8 +2921,9 @@
* Machines that don't support invlpg, also don't support
* PG_G.
*/
- if (oldpde & PG_G)
- pmap_invalidate_page(kernel_pmap, sva);
+ if ((oldpde & PG_G) != 0)
+ pmap_invalidate_pde_page(kernel_pmap, sva, oldpde);
+
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
if (oldpde & PG_MANAGED) {
pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
@@ -2863,8 +2941,7 @@
}
}
if (pmap == kernel_pmap) {
- if (!pmap_demote_pde(pmap, pdq, sva))
- panic("pmap_remove_pde: failed demotion");
+ pmap_remove_kernel_pde(pmap, pdq, sva);
} else {
mpte = pmap_lookup_pt_page(pmap, sva);
if (mpte != NULL) {
@@ -2883,7 +2960,8 @@
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
+ struct spglist *free)
{
pt_entry_t oldpte;
vm_page_t m;
@@ -2917,7 +2995,7 @@
* Remove a single page from a process address space
*/
static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
+pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free)
{
pt_entry_t *pte;
@@ -2942,7 +3020,7 @@
vm_offset_t pdnxt;
pd_entry_t ptpaddr;
pt_entry_t *pte;
- vm_page_t free = NULL;
+ struct spglist free;
int anyvalid;
/*
@@ -2952,6 +3030,7 @@
return;
anyvalid = 0;
+ SLIST_INIT(&free);
rw_wlock(&pvh_global_lock);
sched_pin();
@@ -3044,7 +3123,7 @@
pmap_invalidate_all(pmap);
rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
}
/*
@@ -3069,11 +3148,11 @@
pt_entry_t *pte, tpte;
pd_entry_t *pde;
vm_offset_t va;
- vm_page_t free;
+ struct spglist free;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
- free = NULL;
+ SLIST_INIT(&free);
rw_wlock(&pvh_global_lock);
sched_pin();
if ((m->flags & PG_FICTITIOUS) != 0)
@@ -3111,7 +3190,7 @@
vm_page_dirty(m);
pmap_unuse_pt(pmap, pv->pv_va, &free);
pmap_invalidate_page(pmap, pv->pv_va);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
free_pv_entry(pmap, pv);
PMAP_UNLOCK(pmap);
}
@@ -3118,7 +3197,7 @@
vm_page_aflag_clear(m, PGA_WRITEABLE);
sched_unpin();
rw_wunlock(&pvh_global_lock);
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
}
/*
@@ -3147,15 +3226,20 @@
}
if ((prot & VM_PROT_WRITE) == 0)
newpde &= ~(PG_RW | PG_M);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpde |= pg_nx;
#endif
if (newpde != oldpde) {
- if (!pde_cmpset(pde, oldpde, newpde))
+ /*
+ * As an optimization to future operations on this PDE, clear
+ * PG_PROMOTED. The impending invalidation will remove any
+ * lingering 4KB page mappings from the TLB.
+ */
+ if (!pde_cmpset(pde, oldpde, newpde & ~PG_PROMOTED))
goto retry;
- if (oldpde & PG_G)
- pmap_invalidate_page(pmap, sva);
+ if ((oldpde & PG_G) != 0)
+ pmap_invalidate_pde_page(kernel_pmap, sva, oldpde);
else
anychanged = TRUE;
}
@@ -3174,12 +3258,13 @@
pt_entry_t *pte;
boolean_t anychanged, pv_lists_locked;
- if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+ KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
+ if (prot == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
(VM_PROT_WRITE|VM_PROT_EXECUTE))
return;
@@ -3282,13 +3367,13 @@
}
pbits &= ~(PG_RW | PG_M);
}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
pbits |= pg_nx;
#endif
if (pbits != obits) {
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if (!atomic_cmpset_64(pte, obits, pbits))
goto retry;
#else
@@ -3414,7 +3499,13 @@
("pmap_promote_pde: page table page is out of range"));
KASSERT(mpte->pindex == va >> PDRSHIFT,
("pmap_promote_pde: page table page's pindex is wrong"));
- pmap_insert_pt_page(pmap, mpte);
+ if (pmap_insert_pt_page(pmap, mpte)) {
+ pmap_pde_p_failures++;
+ CTR2(KTR_PMAP,
+ "pmap_promote_pde: failure for va %#x in pmap %p", va,
+ pmap);
+ return;
+ }
/*
* Promote the pv entries.
@@ -3434,9 +3525,9 @@
if (workaround_erratum383)
pmap_update_pde(pmap, va, pde, PG_PS | newpde);
else if (pmap == kernel_pmap)
- pmap_kenter_pde(va, PG_PS | newpde);
+ pmap_kenter_pde(va, PG_PROMOTED | PG_PS | newpde);
else
- pde_store(pde, PG_PS | newpde);
+ pde_store(pde, PG_PROMOTED | PG_PS | newpde);
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
@@ -3455,9 +3546,9 @@
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
-void
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
- vm_prot_t prot, boolean_t wired)
+int
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+ u_int flags, int8_t psind)
{
pd_entry_t *pde;
pt_entry_t *pte;
@@ -3465,19 +3556,19 @@
pv_entry_t pv;
vm_paddr_t opa, pa;
vm_page_t mpte, om;
- boolean_t invlva;
+ boolean_t invlva, wired;
va = trunc_page(va);
+ mpte = NULL;
+ wired = (flags & PMAP_ENTER_WIRED) != 0;
+
KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
va));
- KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
- VM_OBJECT_LOCKED(m->object),
- ("pmap_enter: page %p is not busy", m));
+ if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
+ VM_OBJECT_ASSERT_LOCKED(m->object);
- mpte = NULL;
-
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
sched_pin();
@@ -3487,7 +3578,15 @@
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
- mpte = pmap_allocpte(pmap, va, M_WAITOK);
+ mpte = pmap_allocpte(pmap, va, flags);
+ if (mpte == NULL) {
+ KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
+ ("pmap_allocpte failed with sleep allowed"));
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ PMAP_UNLOCK(pmap);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
}
pde = pmap_pde(pmap, va);
@@ -3567,7 +3666,7 @@
if (pv == NULL)
pv = get_pv_entry(pmap, FALSE);
pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
pa |= PG_MANAGED;
} else if (pv != NULL)
free_pv_entry(pmap, pv);
@@ -3588,7 +3687,7 @@
if ((newpte & PG_MANAGED) != 0)
vm_page_aflag_set(m, PGA_WRITEABLE);
}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
#endif
@@ -3605,7 +3704,7 @@
*/
if ((origpte & ~(PG_M|PG_A)) != newpte) {
newpte |= PG_A;
- if ((access & VM_PROT_WRITE) != 0)
+ if ((flags & VM_PROT_WRITE) != 0)
newpte |= PG_M;
if (origpte & PG_V) {
invlva = FALSE;
@@ -3615,7 +3714,7 @@
vm_page_aflag_set(om, PGA_REFERENCED);
if (opa != VM_PAGE_TO_PHYS(m))
invlva = TRUE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((origpte & PG_NX) == 0 &&
(newpte & PG_NX) != 0)
invlva = TRUE;
@@ -3650,6 +3749,7 @@
sched_unpin();
rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ return (KERN_SUCCESS);
}
/*
@@ -3685,7 +3785,7 @@
return (FALSE);
}
}
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpde |= pg_nx;
#endif
@@ -3698,7 +3798,8 @@
pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
/*
- * Map the superpage.
+ * Map the superpage. (This is not a promoted mapping; there will not
+ * be any lingering 4KB page mappings in the TLB.)
*/
pde_store(pde, newpde);
@@ -3728,7 +3829,8 @@
vm_page_t m, mpte;
vm_pindex_t diff, psize;
- VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
+ VM_OBJECT_ASSERT_LOCKED(m_start->object);
+
psize = atop(end - start);
mpte = NULL;
m = m_start;
@@ -3737,8 +3839,7 @@
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
- (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
- pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
+ m->psind == 1 && pg_ps_enabled &&
pmap_enter_pde(pmap, va, m, prot))
m = &m[NBPDR / PAGE_SIZE - 1];
else
@@ -3776,7 +3877,7 @@
{
pt_entry_t *pte;
vm_paddr_t pa;
- vm_page_t free;
+ struct spglist free;
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
@@ -3815,7 +3916,7 @@
mpte->wire_count++;
} else {
mpte = _pmap_allocpte(pmap, ptepindex,
- M_NOWAIT);
+ PMAP_ENTER_NOSLEEP);
if (mpte == NULL)
return (mpte);
}
@@ -3845,10 +3946,10 @@
if ((m->oflags & VPO_UNMANAGED) == 0 &&
!pmap_try_insert_pv_entry(pmap, va, m)) {
if (mpte != NULL) {
- free = NULL;
+ SLIST_INIT(&free);
if (pmap_unwire_ptp(pmap, mpte, &free)) {
pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
}
mpte = NULL;
@@ -3862,7 +3963,7 @@
pmap->pm_stats.resident_count++;
pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
pa |= pg_nx;
#endif
@@ -3906,7 +4007,7 @@
vm_page_t p;
int pat_mode;
- VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+ VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
("pmap_object_init_pt: non-device object"));
if (pseflag &&
@@ -3966,59 +4067,100 @@
}
/*
- * Routine: pmap_change_wiring
- * Function: Change the wiring attribute for a map/virtual-address
- * pair.
- * In/out conditions:
- * The mapping must already exist in the pmap.
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
*/
void
-pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
+ vm_offset_t pdnxt;
pd_entry_t *pde;
pt_entry_t *pte;
- boolean_t are_queues_locked;
+ boolean_t pv_lists_locked;
- are_queues_locked = FALSE;
-retry:
+ if (pmap_is_current(pmap))
+ pv_lists_locked = FALSE;
+ else {
+ pv_lists_locked = TRUE;
+resume:
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ }
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_PS) != 0) {
- if (!wired != ((*pde & PG_W) == 0)) {
- if (!are_queues_locked) {
- are_queues_locked = TRUE;
- if (!rw_try_wlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_wlock(&pvh_global_lock);
- goto retry;
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ pde = pmap_pde(pmap, sva);
+ if ((*pde & PG_V) == 0)
+ continue;
+ if ((*pde & PG_PS) != 0) {
+ if ((*pde & PG_W) == 0)
+ panic("pmap_unwire: pde %#jx is missing PG_W",
+ (uintmax_t)*pde);
+
+ /*
+ * Are we unwiring the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+ /*
+ * Regardless of whether a pde (or pte) is 32
+ * or 64 bits in size, PG_W is among the least
+ * significant 32 bits.
+ */
+ atomic_clear_int((u_int *)pde, PG_W);
+ pmap->pm_stats.wired_count -= NBPDR /
+ PAGE_SIZE;
+ continue;
+ } else {
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_wlock(&pvh_global_lock)) {
+ PMAP_UNLOCK(pmap);
+ /* Repeat sva. */
+ goto resume;
+ }
+ sched_pin();
}
+ if (!pmap_demote_pde(pmap, pde, sva))
+ panic("pmap_unwire: demotion failed");
}
- if (!pmap_demote_pde(pmap, pde, va))
- panic("pmap_change_wiring: demotion failed");
- } else
- goto out;
+ }
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & PG_V) == 0)
+ continue;
+ if ((*pte & PG_W) == 0)
+ panic("pmap_unwire: pte %#jx is missing PG_W",
+ (uintmax_t)*pte);
+
+ /*
+ * PG_W must be cleared atomically. Although the pmap
+ * lock synchronizes access to PG_W, another processor
+ * could be setting PG_M and/or PG_A concurrently.
+ *
+ * PG_W is among the least significant 32 bits.
+ */
+ atomic_clear_int((u_int *)pte, PG_W);
+ pmap->pm_stats.wired_count--;
+ }
}
- pte = pmap_pte(pmap, va);
-
- if (wired && !pmap_pte_w(pte))
- pmap->pm_stats.wired_count++;
- else if (!wired && pmap_pte_w(pte))
- pmap->pm_stats.wired_count--;
-
- /*
- * Wiring is not a hardware characteristic so there is no need to
- * invalidate TLB.
- */
- pmap_pte_set_w(pte, wired);
- pmap_pte_release(pte);
-out:
- if (are_queues_locked)
+ if (pv_lists_locked) {
+ sched_unpin();
rw_wunlock(&pvh_global_lock);
+ }
PMAP_UNLOCK(pmap);
}
-
/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
@@ -4031,7 +4173,7 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
- vm_page_t free;
+ struct spglist free;
vm_offset_t addr;
vm_offset_t end_addr = src_addr + len;
vm_offset_t pdnxt;
@@ -4070,6 +4212,8 @@
continue;
if (srcptepaddr & PG_PS) {
+ if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
+ continue;
if (dst_pmap->pm_pdir[ptepindex] == 0 &&
((srcptepaddr & PG_MANAGED) == 0 ||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
@@ -4098,7 +4242,7 @@
*/
if ((ptetemp & PG_MANAGED) != 0) {
dstmpte = pmap_allocpte(dst_pmap, addr,
- M_NOWAIT);
+ PMAP_ENTER_NOSLEEP);
if (dstmpte == NULL)
goto out;
dst_pte = pmap_pte_quick(dst_pmap, addr);
@@ -4114,12 +4258,12 @@
PG_A);
dst_pmap->pm_stats.resident_count++;
} else {
- free = NULL;
+ SLIST_INIT(&free);
if (pmap_unwire_ptp(dst_pmap, dstmpte,
&free)) {
pmap_invalidate_page(dst_pmap,
addr);
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
}
goto out;
}
@@ -4243,12 +4387,12 @@
if (*sysmaps->CMAP2)
panic("pmap_copy_page: CMAP2 busy");
sched_pin();
- invlpg((u_int)sysmaps->CADDR1);
- invlpg((u_int)sysmaps->CADDR2);
*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
pmap_cache_bits(src->md.pat_mode, 0);
+ invlcaddr(sysmaps->CADDR1);
*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
pmap_cache_bits(dst->md.pat_mode, 0);
+ invlcaddr(sysmaps->CADDR2);
bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
*sysmaps->CMAP1 = 0;
*sysmaps->CMAP2 = 0;
@@ -4256,6 +4400,8 @@
mtx_unlock(&sysmaps->lock);
}
+int unmapped_buf_allowed = 1;
+
void
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
@@ -4274,8 +4420,6 @@
panic("pmap_copy_pages: CMAP2 busy");
sched_pin();
while (xfersize > 0) {
- invlpg((u_int)sysmaps->CADDR1);
- invlpg((u_int)sysmaps->CADDR2);
a_pg = ma[a_offset >> PAGE_SHIFT];
a_pg_offset = a_offset & PAGE_MASK;
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
@@ -4283,9 +4427,11 @@
b_pg_offset = b_offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A |
- pmap_cache_bits(b_pg->md.pat_mode, 0);
+ pmap_cache_bits(a_pg->md.pat_mode, 0);
+ invlcaddr(sysmaps->CADDR1);
*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A |
PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0);
+ invlcaddr(sysmaps->CADDR2);
a_cp = sysmaps->CADDR1 + a_pg_offset;
b_cp = sysmaps->CADDR2 + b_pg_offset;
bcopy(a_cp, b_cp, cnt);
@@ -4318,7 +4464,7 @@
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
rw_wlock(&pvh_global_lock);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
break;
@@ -4329,7 +4475,7 @@
}
if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
break;
@@ -4381,7 +4527,7 @@
rw_assert(&pvh_global_lock, RA_WLOCKED);
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4424,11 +4570,11 @@
pmap_remove_pages(pmap_t pmap)
{
pt_entry_t *pte, tpte;
- vm_page_t free = NULL;
vm_page_t m, mpte, mt;
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
+ struct spglist free;
int field, idx;
int32_t bit;
uint32_t inuse, bitmask;
@@ -4438,6 +4584,7 @@
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
+ SLIST_INIT(&free);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
sched_pin();
@@ -4508,7 +4655,7 @@
if ((tpte & PG_PS) != 0) {
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
pvh = pa_to_pvh(tpte & PG_PS_FRAME);
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
if (TAILQ_EMPTY(&pvh->pv_list)) {
for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
if (TAILQ_EMPTY(&mt->md.pv_list))
@@ -4526,7 +4673,7 @@
}
} else {
pmap->pm_stats.resident_count--;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -4546,7 +4693,7 @@
pmap_invalidate_all(pmap);
rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
+ pmap_free_zero_pages(&free);
}
/*
@@ -4564,13 +4711,12 @@
("pmap_is_modified: page %p is not managed", m));
/*
- * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+ * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
* concurrently set while the object is locked. Thus, if PGA_WRITEABLE
* is clear, no PTEs can have PG_M set.
*/
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
- if ((m->oflags & VPO_BUSY) == 0 &&
- (m->aflags & PGA_WRITEABLE) == 0)
+ VM_OBJECT_ASSERT_WLOCKED(m->object);
+ if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return (FALSE);
rw_wlock(&pvh_global_lock);
rv = pmap_is_modified_pvh(&m->md) ||
@@ -4596,7 +4742,7 @@
rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4669,7 +4815,7 @@
rw_assert(&pvh_global_lock, RA_WLOCKED);
rv = FALSE;
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4699,13 +4845,12 @@
("pmap_remove_write: page %p is not managed", m));
/*
- * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
- * another thread while the object is locked. Thus, if PGA_WRITEABLE
- * is clear, no page table entries need updating.
+ * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
+ * set by another thread while the object is locked. Thus,
+ * if PGA_WRITEABLE is clear, no page table entries need updating.
*/
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
- if ((m->oflags & VPO_BUSY) == 0 &&
- (m->aflags & PGA_WRITEABLE) == 0)
+ VM_OBJECT_ASSERT_WLOCKED(m->object);
+ if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
rw_wlock(&pvh_global_lock);
sched_pin();
@@ -4712,7 +4857,7 @@
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
+ TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
va = pv->pv_va;
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
@@ -4722,7 +4867,7 @@
PMAP_UNLOCK(pmap);
}
small_mappings:
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, pv->pv_va);
@@ -4751,6 +4896,8 @@
rw_wunlock(&pvh_global_lock);
}
+#define PMAP_TS_REFERENCED_MAX 5
+
/*
* pmap_ts_referenced:
*
@@ -4767,73 +4914,88 @@
pmap_ts_referenced(vm_page_t m)
{
struct md_page *pvh;
- pv_entry_t pv, pvf, pvn;
+ pv_entry_t pv, pvf;
pmap_t pmap;
- pd_entry_t oldpde, *pde;
+ pd_entry_t *pde;
pt_entry_t *pte;
- vm_offset_t va;
+ vm_paddr_t pa;
int rtval = 0;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ pa = VM_PAGE_TO_PHYS(m);
+ pvh = pa_to_pvh(pa);
rw_wlock(&pvh_global_lock);
sched_pin();
- if ((m->flags & PG_FICTITIOUS) != 0)
+ if ((m->flags & PG_FICTITIOUS) != 0 ||
+ (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
goto small_mappings;
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
- va = pv->pv_va;
+ pv = pvf;
+ do {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- oldpde = *pde;
- if ((oldpde & PG_A) != 0) {
- if (pmap_demote_pde(pmap, pde, va)) {
- if ((oldpde & PG_W) == 0) {
- /*
- * Remove the mapping to a single page
- * so that a subsequent access may
- * repromote. Since the underlying
- * page table page is fully populated,
- * this removal never frees a page
- * table page.
- */
- va += VM_PAGE_TO_PHYS(m) - (oldpde &
- PG_PS_FRAME);
- pmap_remove_page(pmap, va, NULL);
- rtval++;
- if (rtval > 4) {
- PMAP_UNLOCK(pmap);
- goto out;
- }
- }
+ pde = pmap_pde(pmap, pv->pv_va);
+ if ((*pde & PG_A) != 0) {
+ /*
+ * Since this reference bit is shared by either 1024
+ * or 512 4KB pages, it should not be cleared every
+ * time it is tested. Apply a simple "hash" function
+ * on the physical page number, the virtual superpage
+ * number, and the pmap address to select one 4KB page
+ * out of the 1024 or 512 on which testing the
+ * reference bit will result in clearing that bit.
+ * This function is designed to avoid the selection of
+ * the same 4KB page for every 2- or 4MB page mapping.
+ *
+ * On demotion, a mapping that hasn't been referenced
+ * is simply destroyed. To avoid the possibility of a
+ * subsequent page fault on a demoted wired mapping,
+ * always leave its reference bit set. Moreover,
+ * since the superpage is wired, the current state of
+ * its reference bit won't affect page replacement.
+ */
+ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^
+ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 &&
+ (*pde & PG_W) == 0) {
+ atomic_clear_int((u_int *)pde, PG_A);
+ pmap_invalidate_page(pmap, pv->pv_va);
}
+ rtval++;
}
PMAP_UNLOCK(pmap);
- }
+ /* Rotate the PV list if it has more than one entry. */
+ if (TAILQ_NEXT(pv, pv_next) != NULL) {
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ }
+ if (rtval >= PMAP_TS_REFERENCED_MAX)
+ goto out;
+ } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
small_mappings:
- if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pvf = pv;
- do {
- pvn = TAILQ_NEXT(pv, pv_list);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
- " found a 4mpage in page %p's pv list", m));
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_A) != 0) {
- atomic_clear_int((u_int *)pte, PG_A);
- pmap_invalidate_page(pmap, pv->pv_va);
- rtval++;
- if (rtval > 4)
- pvn = NULL;
- }
- PMAP_UNLOCK(pmap);
- } while ((pv = pvn) != NULL && pv != pvf);
- }
+ if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
+ goto out;
+ pv = pvf;
+ do {
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pde = pmap_pde(pmap, pv->pv_va);
+ KASSERT((*pde & PG_PS) == 0,
+ ("pmap_ts_referenced: found a 4mpage in page %p's pv list",
+ m));
+ pte = pmap_pte_quick(pmap, pv->pv_va);
+ if ((*pte & PG_A) != 0) {
+ atomic_clear_int((u_int *)pte, PG_A);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ rtval++;
+ }
+ PMAP_UNLOCK(pmap);
+ /* Rotate the PV list if it has more than one entry. */
+ if (TAILQ_NEXT(pv, pv_next) != NULL) {
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ }
+ } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval <
+ PMAP_TS_REFERENCED_MAX);
out:
sched_unpin();
rw_wunlock(&pvh_global_lock);
@@ -4841,6 +5003,112 @@
}
/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ pd_entry_t oldpde, *pde;
+ pt_entry_t *pte;
+ vm_offset_t pdnxt;
+ vm_page_t m;
+ boolean_t anychanged, pv_lists_locked;
+
+ if (advice != MADV_DONTNEED && advice != MADV_FREE)
+ return;
+ if (pmap_is_current(pmap))
+ pv_lists_locked = FALSE;
+ else {
+ pv_lists_locked = TRUE;
+resume:
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ }
+ anychanged = FALSE;
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ pde = pmap_pde(pmap, sva);
+ oldpde = *pde;
+ if ((oldpde & PG_V) == 0)
+ continue;
+ else if ((oldpde & PG_PS) != 0) {
+ if ((oldpde & PG_MANAGED) == 0)
+ continue;
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_wlock(&pvh_global_lock)) {
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+ goto resume;
+ }
+ sched_pin();
+ }
+ if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
+ }
+
+ /*
+ * Unless the page mappings are wired, remove the
+ * mapping to a single page so that a subsequent
+ * access may repromote. Since the underlying page
+ * table page is fully populated, this removal never
+ * frees a page table page.
+ */
+ if ((oldpde & PG_W) == 0) {
+ pte = pmap_pte_quick(pmap, sva);
+ KASSERT((*pte & PG_V) != 0,
+ ("pmap_advise: invalid PTE"));
+ pmap_remove_pte(pmap, pte, sva, NULL);
+ anychanged = TRUE;
+ }
+ }
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+ PG_V))
+ continue;
+ else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified()
+ * can be avoided by making the page
+ * dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+ vm_page_dirty(m);
+ }
+ atomic_clear_int((u_int *)pte, PG_M | PG_A);
+ } else if ((*pte & PG_A) != 0)
+ atomic_clear_int((u_int *)pte, PG_A);
+ else
+ continue;
+ if ((*pte & PG_G) != 0)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ }
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ if (pv_lists_locked) {
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
@@ -4855,14 +5123,14 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_modify: page %p is not managed", m));
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
- KASSERT((m->oflags & VPO_BUSY) == 0,
- ("pmap_clear_modify: page %p is busy", m));
+ VM_OBJECT_ASSERT_WLOCKED(m->object);
+ KASSERT(!vm_page_xbusied(m),
+ ("pmap_clear_modify: page %p is exclusive busied", m));
/*
* If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
* If the object containing the page is locked and the page is not
- * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+ * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
@@ -4871,7 +5139,7 @@
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
+ TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
va = pv->pv_va;
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
@@ -4908,7 +5176,7 @@
PMAP_UNLOCK(pmap);
}
small_mappings:
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, pv->pv_va);
@@ -4931,73 +5199,6 @@
}
/*
- * pmap_clear_reference:
- *
- * Clear the reference bit on the specified physical page.
- */
-void
-pmap_clear_reference(vm_page_t m)
-{
- struct md_page *pvh;
- pv_entry_t next_pv, pv;
- pmap_t pmap;
- pd_entry_t oldpde, *pde;
- pt_entry_t *pte;
- vm_offset_t va;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_clear_reference: page %p is not managed", m));
- rw_wlock(&pvh_global_lock);
- sched_pin();
- if ((m->flags & PG_FICTITIOUS) != 0)
- goto small_mappings;
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- oldpde = *pde;
- if ((oldpde & PG_A) != 0) {
- if (pmap_demote_pde(pmap, pde, va)) {
- /*
- * Remove the mapping to a single page so
- * that a subsequent access may repromote.
- * Since the underlying page table page is
- * fully populated, this removal never frees
- * a page table page.
- */
- va += VM_PAGE_TO_PHYS(m) - (oldpde &
- PG_PS_FRAME);
- pmap_remove_page(pmap, va, NULL);
- }
- }
- PMAP_UNLOCK(pmap);
- }
-small_mappings:
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
- " a 4mpage in page %p's pv list", m));
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_A) != 0) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_A is among the least significant
- * 32 bits.
- */
- atomic_clear_int((u_int *)pte, PG_A);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
* Miscellaneous support routines follow
*/
@@ -5044,24 +5245,51 @@
void *
pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
{
+ struct pmap_preinit_mapping *ppim;
vm_offset_t va, offset;
vm_size_t tmpsize;
+ int i;
offset = pa & PAGE_MASK;
- size = roundup(offset + size, PAGE_SIZE);
+ size = round_page(offset + size);
pa = pa & PG_FRAME;
if (pa < KERNLOAD && pa + size <= KERNLOAD)
va = KERNBASE + pa;
- else
- va = kmem_alloc_nofault(kernel_map, size);
- if (!va)
- panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
+ else if (!pmap_initialized) {
+ va = 0;
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (ppim->va == 0) {
+ ppim->pa = pa;
+ ppim->sz = size;
+ ppim->mode = mode;
+ ppim->va = virtual_avail;
+ virtual_avail += size;
+ va = ppim->va;
+ break;
+ }
+ }
+ if (va == 0)
+ panic("%s: too many preinit mappings", __func__);
+ } else {
+ /*
+ * If we have a preinit mapping, re-use it.
+ */
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (ppim->pa == pa && ppim->sz == size &&
+ ppim->mode == mode)
+ return ((void *)(ppim->va + offset));
+ }
+ va = kva_alloc(size);
+ if (va == 0)
+ panic("%s: Couldn't allocate KVA", __func__);
+ }
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size);
+ pmap_invalidate_cache_range(va, va + size, FALSE);
return ((void *)(va + offset));
}
@@ -5082,14 +5310,31 @@
void
pmap_unmapdev(vm_offset_t va, vm_size_t size)
{
- vm_offset_t base, offset;
+ struct pmap_preinit_mapping *ppim;
+ vm_offset_t offset;
+ int i;
if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
return;
- base = trunc_page(va);
offset = va & PAGE_MASK;
- size = roundup(offset + size, PAGE_SIZE);
- kmem_free(kernel_map, base, size);
+ size = round_page(offset + size);
+ va = trunc_page(va);
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (ppim->va == va && ppim->sz == size) {
+ if (pmap_initialized)
+ return;
+ ppim->pa = 0;
+ ppim->va = 0;
+ ppim->sz = 0;
+ ppim->mode = 0;
+ if (va + size == virtual_avail)
+ virtual_avail = va;
+ return;
+ }
+ }
+ if (pmap_initialized)
+ kva_free(va, size);
}
/*
@@ -5129,8 +5374,10 @@
{
struct sysmaps *sysmaps;
vm_offset_t sva, eva;
+ bool useclflushopt;
- if ((cpu_feature & CPUID_CLFSH) != 0) {
+ useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
+ if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) {
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
mtx_lock(&sysmaps->lock);
if (*sysmaps->CMAP2)
@@ -5143,14 +5390,25 @@
eva = sva + PAGE_SIZE;
/*
- * Use mfence despite the ordering implied by
- * mtx_{un,}lock() because clflush is not guaranteed
- * to be ordered by any other instruction.
+ * Use mfence or sfence despite the ordering implied by
+ * mtx_{un,}lock() because clflush on non-Intel CPUs
+ * and clflushopt are not guaranteed to be ordered by
+ * any other instruction.
*/
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
+ if (useclflushopt)
+ sfence();
+ else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
+ for (; sva < eva; sva += cpu_clflush_line_size) {
+ if (useclflushopt)
+ clflushopt(sva);
+ else
+ clflush(sva);
+ }
+ if (useclflushopt)
+ sfence();
+ else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
*sysmaps->CMAP2 = 0;
sched_unpin();
mtx_unlock(&sysmaps->lock);
@@ -5179,7 +5437,7 @@
base = trunc_page(va);
offset = va & PAGE_MASK;
- size = roundup(offset + size, PAGE_SIZE);
+ size = round_page(offset + size);
/*
* Only supported on kernel virtual addresses above the recursive map.
@@ -5267,7 +5525,7 @@
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva);
+ pmap_invalidate_cache_range(base, tmpva, FALSE);
}
return (0);
}
@@ -5342,7 +5600,7 @@
CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
#endif
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
cr3 = vtophys(pmap->pm_pdpt);
#else
cr3 = vtophys(pmap->pm_pdir);
@@ -5486,7 +5744,7 @@
printf("pa %x", pa);
m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
pads(pmap);
Modified: trunk/sys/i386/i386/ptrace_machdep.c
===================================================================
--- trunk/sys/i386/i386/ptrace_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/ptrace_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 Doug Rabson
* All rights reserved.
@@ -26,14 +27,16 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/ptrace_machdep.c 286311 2015-08-05 08:17:10Z kib $");
#include "opt_cpu.h"
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
+#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
@@ -41,9 +44,82 @@
#define CPU_ENABLE_SSE
#endif
-int
-cpu_ptrace(struct thread *td, int req, void *addr, int data)
+#ifdef CPU_ENABLE_SSE
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
{
+ struct ptrace_xstate_info info;
+ char *savefpu;
+ int error;
+
+ if (!use_xsave)
+ return (EOPNOTSUPP);
+
+ switch (req) {
+ case PT_GETXSTATE_OLD:
+ npxgetregs(td);
+ savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+ error = copyout(savefpu, addr,
+ cpu_max_ext_state_size - sizeof(union savefpu));
+ break;
+
+ case PT_SETXSTATE_OLD:
+ if (data > cpu_max_ext_state_size - sizeof(union savefpu)) {
+ error = EINVAL;
+ break;
+ }
+ savefpu = malloc(data, M_TEMP, M_WAITOK);
+ error = copyin(addr, savefpu, data);
+ if (error == 0) {
+ npxgetregs(td);
+ error = npxsetxstate(td, savefpu, data);
+ }
+ free(savefpu, M_TEMP);
+ break;
+
+ case PT_GETXSTATE_INFO:
+ if (data != sizeof(info)) {
+ error = EINVAL;
+ break;
+ }
+ info.xsave_len = cpu_max_ext_state_size;
+ info.xsave_mask = xsave_mask;
+ error = copyout(&info, addr, data);
+ break;
+
+ case PT_GETXSTATE:
+ npxgetregs(td);
+ savefpu = (char *)(get_pcb_user_save_td(td));
+ error = copyout(savefpu, addr, cpu_max_ext_state_size);
+ break;
+
+ case PT_SETXSTATE:
+ if (data < sizeof(union savefpu) ||
+ data > cpu_max_ext_state_size) {
+ error = EINVAL;
+ break;
+ }
+ savefpu = malloc(data, M_TEMP, M_WAITOK);
+ error = copyin(addr, savefpu, data);
+ if (error == 0)
+ error = npxsetregs(td, (union savefpu *)savefpu,
+ savefpu + sizeof(union savefpu), data -
+ sizeof(union savefpu));
+ free(savefpu, M_TEMP);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+#endif
+
+static int
+cpu_ptrace_xmm(struct thread *td, int req, void *addr, int data)
+{
#ifdef CPU_ENABLE_SSE
struct savexmm *fpstate;
int error;
@@ -51,7 +127,7 @@
if (!cpu_fxsr)
return (EINVAL);
- fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
+ fpstate = &get_pcb_user_save_td(td)->sv_xmm;
switch (req) {
case PT_GETXMMREGS:
npxgetregs(td);
@@ -64,6 +140,14 @@
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
break;
+ case PT_GETXSTATE_OLD:
+ case PT_SETXSTATE_OLD:
+ case PT_GETXSTATE_INFO:
+ case PT_GETXSTATE:
+ case PT_SETXSTATE:
+ error = cpu_ptrace_xstate(td, req, addr, data);
+ break;
+
default:
return (EINVAL);
}
@@ -73,3 +157,51 @@
return (EINVAL);
#endif
}
+
+int
+cpu_ptrace(struct thread *td, int req, void *addr, int data)
+{
+ struct segment_descriptor *sdp, sd;
+ register_t r;
+ int error;
+
+ switch (req) {
+ case PT_GETXMMREGS:
+ case PT_SETXMMREGS:
+ case PT_GETXSTATE_OLD:
+ case PT_SETXSTATE_OLD:
+ case PT_GETXSTATE_INFO:
+ case PT_GETXSTATE:
+ case PT_SETXSTATE:
+ error = cpu_ptrace_xmm(td, req, addr, data);
+ break;
+
+ case PT_GETFSBASE:
+ case PT_GETGSBASE:
+ sdp = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsd :
+ &td->td_pcb->pcb_gsd;
+ r = sdp->sd_hibase << 24 | sdp->sd_lobase;
+ error = copyout(&r, addr, sizeof(r));
+ break;
+
+ case PT_SETFSBASE:
+ case PT_SETGSBASE:
+ error = copyin(addr, &r, sizeof(r));
+ if (error != 0)
+ break;
+ fill_based_sd(&sd, r);
+ if (req == PT_SETFSBASE) {
+ td->td_pcb->pcb_fsd = sd;
+ td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
+ } else {
+ td->td_pcb->pcb_gsd = sd;
+ td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
+ }
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ return (error);
+}
Modified: trunk/sys/i386/i386/stack_machdep.c
===================================================================
--- trunk/sys/i386/i386/stack_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/stack_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 Antoine Brodin
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/stack_machdep.c 286396 2015-08-07 04:31:02Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -40,7 +41,7 @@
#include <vm/pmap.h>
static void
-stack_capture(struct stack *st, register_t ebp)
+stack_capture(struct thread *td, struct stack *st, register_t ebp)
{
struct i386_frame *frame;
vm_offset_t callpc;
@@ -56,8 +57,8 @@
if (stack_put(st, callpc) == -1)
break;
if (frame->f_frame <= frame ||
- (vm_offset_t)frame->f_frame >=
- (vm_offset_t)ebp + KSTACK_PAGES * PAGE_SIZE)
+ (vm_offset_t)frame->f_frame >= td->td_kstack +
+ td->td_kstack_pages * PAGE_SIZE)
break;
frame = frame->f_frame;
}
@@ -74,7 +75,7 @@
panic("stack_save_td: running");
ebp = td->td_pcb->pcb_ebp;
- stack_capture(st, ebp);
+ stack_capture(td, st, ebp);
}
void
@@ -83,5 +84,5 @@
register_t ebp;
__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
- stack_capture(st, ebp);
+ stack_capture(curthread, st, ebp);
}
Modified: trunk/sys/i386/i386/support.s
===================================================================
--- trunk/sys/i386/i386/support.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/support.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1993 The Regents of the University of California.
* All rights reserved.
@@ -26,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/support.s 274648 2014-11-18 12:53:32Z kib $
*/
#include "opt_npx.h"
@@ -62,8 +63,8 @@
stosb
popl %edi
ret
-END(bzero)
-
+END(bzero)
+
ENTRY(sse2_pagezero)
pushl %ebx
movl 8(%esp),%ecx
@@ -181,11 +182,13 @@
* ws at tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
ENTRY(bcopy)
+ pushl %ebp
+ movl %esp,%ebp
pushl %esi
pushl %edi
- movl 12(%esp),%esi
- movl 16(%esp),%edi
- movl 20(%esp),%ecx
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%edi
+ movl 16(%ebp),%ecx
movl %edi,%eax
subl %esi,%eax
@@ -196,12 +199,13 @@
cld /* nope, copy forwards */
rep
movsl
- movl 20(%esp),%ecx
+ movl 16(%ebp),%ecx
andl $3,%ecx /* any bytes left? */
rep
movsb
popl %edi
popl %esi
+ popl %ebp
ret
ALIGN_TEXT
@@ -214,7 +218,7 @@
std
rep
movsb
- movl 20(%esp),%ecx /* copy remainder by 32-bit words */
+ movl 16(%ebp),%ecx /* copy remainder by 32-bit words */
shrl $2,%ecx
subl $3,%esi
subl $3,%edi
@@ -223,6 +227,7 @@
popl %edi
popl %esi
cld
+ popl %ebp
ret
END(bcopy)
@@ -385,16 +390,16 @@
ret
/*
- * casuword. Compare and set user word. Returns -1 or the current value.
+ * casueword. Compare and set user word. Returns -1 on fault,
+ * 0 on non-faulting access. The current value is in *oldp.
*/
-
-ALTENTRY(casuword32)
-ENTRY(casuword)
+ALTENTRY(casueword32)
+ENTRY(casueword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx /* dst */
movl 8(%esp),%eax /* old */
- movl 12(%esp),%ecx /* new */
+ movl 16(%esp),%ecx /* new */
cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
ja fusufault
@@ -412,17 +417,20 @@
movl PCPU(CURPCB),%ecx
movl $0,PCB_ONFAULT(%ecx)
+ movl 12(%esp),%edx /* oldp */
+ movl %eax,(%edx)
+ xorl %eax,%eax
ret
-END(casuword32)
-END(casuword)
+END(casueword32)
+END(casueword)
/*
* Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
- * memory. All these functions are MPSAFE.
+ * memory.
*/
-ALTENTRY(fuword32)
-ENTRY(fuword)
+ALTENTRY(fueword32)
+ENTRY(fueword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx /* from */
@@ -432,9 +440,12 @@
movl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
+ movl 8(%esp),%edx
+ movl %eax,(%edx)
+ xorl %eax,%eax
ret
-END(fuword32)
-END(fuword)
+END(fueword32)
+END(fueword)
/*
* fuswintr() and suswintr() are specialized variants of fuword16() and
@@ -690,7 +701,7 @@
movl 4(%esp),%eax
lgdt (%eax)
#endif
-
+
/* flush the prefetch q */
jmp 1f
nop
@@ -736,13 +747,13 @@
/* void reset_dbregs() */
ENTRY(reset_dbregs)
- movl $0,%eax
- movl %eax,%dr7 /* disable all breapoints first */
- movl %eax,%dr0
- movl %eax,%dr1
- movl %eax,%dr2
- movl %eax,%dr3
- movl %eax,%dr6
+ movl $0,%eax
+ movl %eax,%dr7 /* disable all breakpoints first */
+ movl %eax,%dr0
+ movl %eax,%dr1
+ movl %eax,%dr2
+ movl %eax,%dr3
+ movl %eax,%dr6
ret
END(reset_dbregs)
Property changes on: trunk/sys/i386/i386/support.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/swtch.s
===================================================================
--- trunk/sys/i386/i386/swtch.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/swtch.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
@@ -29,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/swtch.s 282065 2015-04-27 08:02:12Z kib $
*/
#include "opt_npx.h"
@@ -174,7 +175,7 @@
/* switch address space */
movl PCB_CR3(%edx),%eax
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
cmpl %eax,IdlePDPT /* Kernel address space? */
#else
cmpl %eax,IdlePTD /* Kernel address space? */
@@ -386,70 +387,6 @@
pushfl
popl PCB_PSL(%ecx)
-#ifdef DEV_NPX
- /*
- * If fpcurthread == NULL, then the npx h/w state is irrelevant and the
- * state had better already be in the pcb. This is true for forks
- * but not for dumps (the old book-keeping with FP flags in the pcb
- * always lost for dumps because the dump pcb has 0 flags).
- *
- * If fpcurthread != NULL, then we have to save the npx h/w state to
- * fpcurthread's pcb and copy it to the requested pcb, or save to the
- * requested pcb and reload. Copying is easier because we would
- * have to handle h/w bugs for reloading. We used to lose the
- * parent's npx state for forks by forgetting to reload.
- */
- pushfl
- CLI
- movl PCPU(FPCURTHREAD),%eax
- testl %eax,%eax
- je 1f
-
- pushl %ecx
- movl TD_PCB(%eax),%eax
- movl PCB_SAVEFPU(%eax),%eax
- pushl %eax
- pushl %eax
- call npxsave
- addl $4,%esp
- popl %eax
- popl %ecx
-
- pushl $PCB_SAVEFPU_SIZE
- leal PCB_USERFPU(%ecx),%ecx
- pushl %ecx
- pushl %eax
- call bcopy
- addl $12,%esp
-1:
- popfl
-#endif /* DEV_NPX */
-
- ret
-END(savectx)
-
-/*
- * suspendctx(pcb)
- * Update pcb, suspending current processor state.
- */
-ENTRY(suspendctx)
- /* Fetch PCB. */
- movl 4(%esp),%ecx
-
- /* Save context by calling savectx(). */
- pushl %ecx
- call savectx
- addl $4,%esp
-
- /* Fetch PCB again. */
- movl 4(%esp),%ecx
-
- /* Update caller's return address and stack pointer. */
- movl (%esp),%eax
- movl %eax,PCB_EIP(%ecx)
- movl %esp,PCB_ESP(%ecx)
-
- /* Save other registers and descriptor tables. */
movl %cr0,%eax
movl %eax,PCB_CR0(%ecx)
movl %cr2,%eax
@@ -482,16 +419,13 @@
movl $1,%eax
ret
-END(suspendctx)
+END(savectx)
/*
- * resumectx(pcb in %esi)
+ * resumectx(pcb) __fastcall
* Resuming processor state from pcb.
*/
ENTRY(resumectx)
- /* Fetch PCB. */
- movl %esi,%ecx
-
/* Restore GDT. */
lgdt PCB_GDT(%ecx)
@@ -547,10 +481,6 @@
movl PCB_DR7(%ecx),%eax
movl %eax,%dr7
-#ifdef DEV_NPX
- /* XXX FIX ME */
-#endif
-
/* Restore other registers */
movl PCB_EDI(%ecx),%edi
movl PCB_ESI(%ecx),%esi
Property changes on: trunk/sys/i386/i386/swtch.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/symbols.raw
===================================================================
--- trunk/sys/i386/i386/symbols.raw 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/symbols.raw 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,7 +1,7 @@
# @(#)symbols.raw 7.6 (Berkeley) 5/8/91
#
-# $FreeBSD$
-#
+# $FreeBSD: stable/10/sys/i386/i386/symbols.raw 253361 2013-07-15 12:18:36Z glebius $
+# $MidnightBSD$
#gdb
@@ -43,7 +43,6 @@
_averunnable
_boottime
#netstat
- _mbstat
_ipstat
_tcb
_tcpstat
Property changes on: trunk/sys/i386/i386/symbols.raw
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/sys_machdep.c
===================================================================
--- trunk/sys/i386/i386/sys_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/sys_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
@@ -30,13 +31,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 306961 2016-10-10 11:53:54Z tijl $");
#include "opt_capsicum.h"
#include "opt_kstack_pages.h"
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -88,6 +89,27 @@
union descriptor *descs);
static int i386_ldt_grow(struct thread *td, int len);
+void
+fill_based_sd(struct segment_descriptor *sdp, uint32_t base)
+{
+
+ sdp->sd_lobase = base & 0xffffff;
+ sdp->sd_hibase = (base >> 24) & 0xff;
+#ifdef XEN
+ /* need to do nosegneg like Linux */
+ sdp->sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
+#else
+ sdp->sd_lolimit = 0xffff; /* 4GB limit, wraps around */
+#endif
+ sdp->sd_hilimit = 0xf;
+ sdp->sd_type = SDT_MEMRWA;
+ sdp->sd_dpl = SEL_UPL;
+ sdp->sd_p = 1;
+ sdp->sd_xx = 0;
+ sdp->sd_def32 = 1;
+ sdp->sd_gran = 1;
+}
+
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
int op;
@@ -105,6 +127,7 @@
union {
struct i386_ldt_args largs;
struct i386_ioperm_args iargs;
+ struct i386_get_xfpustate xfpu;
} kargs;
uint32_t base;
struct segment_descriptor sd, *sdp;
@@ -126,10 +149,15 @@
case I386_SET_FSBASE:
case I386_GET_GSBASE:
case I386_SET_GSBASE:
+ case I386_GET_XFPUSTATE:
break;
case I386_SET_IOPERM:
default:
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CAPFAIL))
+ ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
+#endif
return (ECAPMODE);
}
}
@@ -150,6 +178,11 @@
if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
return (EINVAL);
break;
+ case I386_GET_XFPUSTATE:
+ if ((error = copyin(uap->parms, &kargs.xfpu,
+ sizeof(struct i386_get_xfpustate))) != 0)
+ return (error);
+ break;
default:
break;
}
@@ -160,18 +193,14 @@
break;
case I386_SET_LDT:
if (kargs.largs.descs != NULL) {
- lp = (union descriptor *)kmem_alloc(kernel_map,
- kargs.largs.num * sizeof(union descriptor));
- if (lp == NULL) {
- error = ENOMEM;
- break;
- }
+ lp = (union descriptor *)malloc(
+ kargs.largs.num * sizeof(union descriptor),
+ M_TEMP, M_WAITOK);
error = copyin(kargs.largs.descs, lp,
kargs.largs.num * sizeof(union descriptor));
if (error == 0)
error = i386_set_ldt(td, &kargs.largs, lp);
- kmem_free(kernel_map, (vm_offset_t)lp,
- kargs.largs.num * sizeof(union descriptor));
+ free(lp, M_TEMP);
} else {
error = i386_set_ldt(td, &kargs.largs, NULL);
}
@@ -195,7 +224,7 @@
break;
case I386_SET_FSBASE:
error = copyin(uap->parms, &base, sizeof(base));
- if (!error) {
+ if (error == 0) {
/*
* Construct a descriptor and store it in the pcb for
* the next context switch. Also store it in the gdt
@@ -202,21 +231,7 @@
* so that the load of tf_fs into %fs will activate it
* at return to userland.
*/
- sd.sd_lobase = base & 0xffffff;
- sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
- sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
- sd.sd_hilimit = 0xf;
- sd.sd_type = SDT_MEMRWA;
- sd.sd_dpl = SEL_UPL;
- sd.sd_p = 1;
- sd.sd_xx = 0;
- sd.sd_def32 = 1;
- sd.sd_gran = 1;
+ fill_based_sd(&sd, base);
critical_enter();
td->td_pcb->pcb_fsd = sd;
#ifdef XEN
@@ -236,28 +251,13 @@
break;
case I386_SET_GSBASE:
error = copyin(uap->parms, &base, sizeof(base));
- if (!error) {
+ if (error == 0) {
/*
* Construct a descriptor and store it in the pcb for
* the next context switch. Also store it in the gdt
* because we have to do a load_gs() right now.
*/
- sd.sd_lobase = base & 0xffffff;
- sd.sd_hibase = (base >> 24) & 0xff;
-
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
- sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
- sd.sd_hilimit = 0xf;
- sd.sd_type = SDT_MEMRWA;
- sd.sd_dpl = SEL_UPL;
- sd.sd_p = 1;
- sd.sd_xx = 0;
- sd.sd_def32 = 1;
- sd.sd_gran = 1;
+ fill_based_sd(&sd, base);
critical_enter();
td->td_pcb->pcb_gsd = sd;
#ifdef XEN
@@ -270,6 +270,14 @@
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
break;
+ case I386_GET_XFPUSTATE:
+ if (kargs.xfpu.len > cpu_max_ext_state_size -
+ sizeof(union savefpu))
+ return (EINVAL);
+ npxgetregs(td);
+ error = copyout((char *)(get_pcb_user_save_td(td) + 1),
+ kargs.xfpu.addr, kargs.xfpu.len);
+ break;
default:
error = EINVAL;
break;
@@ -294,13 +302,10 @@
0 /* granularity */
};
- ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
- if (ext == 0)
- return (ENOMEM);
- bzero(ext, sizeof(struct pcb_ext));
+ ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
+ M_WAITOK | M_ZERO);
/* -16 is so we can convert a trapframe into vm86trapframe inplace */
- ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
- sizeof(struct pcb) - 16;
+ ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16;
ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
/*
* The last byte of the i/o map must be followed by an 0xff byte.
@@ -340,8 +345,9 @@
struct thread *td;
struct i386_ioperm_args *uap;
{
- int i, error;
char *iomap;
+ u_int i;
+ int error;
if ((error = priv_check(td, PRIV_IO)) != 0)
return (error);
@@ -359,7 +365,8 @@
return (error);
iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
- if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
+ if (uap->start > uap->start + uap->length ||
+ uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
return (EINVAL);
for (i = uap->start; i < uap->start + uap->length; i++) {
@@ -467,13 +474,8 @@
M_SUBPROC, M_WAITOK);
new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
- round_page(len * sizeof(union descriptor)));
- if (new_ldt->ldt_base == NULL) {
- free(new_ldt, M_SUBPROC);
- mtx_lock_spin(&dt_lock);
- return (NULL);
- }
+ new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
+ round_page(len * sizeof(union descriptor)), M_WAITOK);
new_ldt->ldt_refcnt = 1;
new_ldt->ldt_active = 0;
@@ -507,13 +509,8 @@
M_SUBPROC, M_WAITOK);
new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
- len * sizeof(union descriptor));
- if (new_ldt->ldt_base == NULL) {
- free(new_ldt, M_SUBPROC);
- mtx_lock_spin(&dt_lock);
- return (NULL);
- }
+ new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
+ len * sizeof(union descriptor), M_WAITOK | M_ZERO);
new_ldt->ldt_refcnt = 1;
new_ldt->ldt_active = 0;
@@ -570,7 +567,7 @@
mtx_assert(&dt_lock, MA_OWNED);
if (--pldt->ldt_refcnt == 0) {
mtx_unlock_spin(&dt_lock);
- kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+ kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
pldt->ldt_len * sizeof(union descriptor));
free(pldt, M_SUBPROC);
} else
@@ -849,7 +846,7 @@
* free the new object and return.
*/
mtx_unlock_spin(&dt_lock);
- kmem_free(kernel_map,
+ kmem_free(kernel_arena,
(vm_offset_t)new_ldt->ldt_base,
new_ldt->ldt_len * sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
@@ -883,7 +880,7 @@
mtx_unlock_spin(&dt_lock);
#endif
if (old_ldt_base != NULL_LDT_BASE) {
- kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
+ kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
old_ldt_len * sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
}
Modified: trunk/sys/i386/i386/trap.c
===================================================================
--- trunk/sys/i386/i386/trap.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/trap.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (C) 1994, David Greenman
* Copyright (c) 1990, 1993
@@ -38,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/trap.c 333370 2018-05-08 17:05:39Z emaste $");
/*
* 386 Trap and System call handling
@@ -105,29 +106,6 @@
#ifdef KDTRACE_HOOKS
#include <sys/dtrace_bsd.h>
-
-/*
- * This is a hook which is initialised by the dtrace module
- * to handle traps which might occur during DTrace probe
- * execution.
- */
-dtrace_trap_func_t dtrace_trap_func;
-
-dtrace_doubletrap_func_t dtrace_doubletrap_func;
-
-/*
- * This is a hook which is initialised by the systrace module
- * when it is loaded. This keeps the DTrace syscall provider
- * implementation opaque.
- */
-systrace_probe_func_t systrace_probe_func;
-
-/*
- * These hooks are necessary for the pid, usdt and fasttrap providers.
- */
-dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr;
-dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr;
-dtrace_return_probe_ptr_t dtrace_return_probe_ptr;
#endif
extern void trap(struct trapframe *frame);
@@ -139,7 +117,9 @@
extern inthand_t IDTVEC(lcall_syscall);
-#define MAX_TRAP_MSG 33
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
+#define MAX_TRAP_MSG 32
static char *trap_msg[] = {
"", /* 0 unused */
"privileged instruction fault", /* 1 T_PRIVINFLT */
@@ -174,12 +154,10 @@
"reserved (unknown) fault", /* 30 T_RESERVED */
"", /* 31 unused (reserved) */
"DTrace pid return trap", /* 32 T_DTRACE_RET */
- "DTrace fasttrap probe trap", /* 33 T_DTRACE_PROBE */
-
};
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
+int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
#ifdef KDB
@@ -210,6 +188,9 @@
void
trap(struct trapframe *frame)
{
+#ifdef KDTRACE_HOOKS
+ struct reg regs;
+#endif
struct thread *td = curthread;
struct proc *p = td->td_proc;
int i = 0, ucode = 0, code;
@@ -266,33 +247,10 @@
/*
* A trap can occur while DTrace executes a probe. Before
* executing the probe, DTrace blocks re-scheduling and sets
- * a flag in it's per-cpu flags to indicate that it doesn't
+ * a flag in its per-cpu flags to indicate that it doesn't
* want to fault. On returning from the probe, the no-fault
* flag is cleared and finally re-scheduling is enabled.
- *
- * If the DTrace kernel module has registered a trap handler,
- * call it and if it returns non-zero, assume that it has
- * handled the trap and modified the trap frame so that this
- * function can return normally.
*/
- if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
- type == T_BPTFLT) {
- struct reg regs;
-
- fill_frame_regs(frame, ®s);
- if (type == T_DTRACE_PROBE &&
- dtrace_fasttrap_probe_ptr != NULL &&
- dtrace_fasttrap_probe_ptr(®s) == 0)
- goto out;
- if (type == T_BPTFLT &&
- dtrace_pid_probe_ptr != NULL &&
- dtrace_pid_probe_ptr(®s) == 0)
- goto out;
- if (type == T_DTRACE_RET &&
- dtrace_return_probe_ptr != NULL &&
- dtrace_return_probe_ptr(®s) == 0)
- goto out;
- }
if ((type == T_PROTFLT || type == T_PAGEFLT) &&
dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
goto out;
@@ -310,8 +268,9 @@
uprintf(
"pid %ld (%s): trap %d with interrupts disabled\n",
(long)curproc->p_pid, curthread->td_name, type);
- else if (type != T_BPTFLT && type != T_TRCTRAP &&
- frame->tf_eip != (int)cpu_switch_load_gs) {
+ else if (type != T_NMI && type != T_BPTFLT &&
+ type != T_TRCTRAP &&
+ frame->tf_eip != (int)cpu_switch_load_gs) {
/*
* XXX not quite right, since this may be for a
* multiple fault in user mode.
@@ -321,9 +280,9 @@
/*
* Page faults need interrupts disabled until later,
* and we shouldn't enable interrupts while holding
- * a spin lock or if servicing an NMI.
+ * a spin lock.
*/
- if (type != T_NMI && type != T_PAGEFLT &&
+ if (type != T_PAGEFLT &&
td->td_md.md_spinlock_count == 0)
enable_intr();
}
@@ -365,6 +324,14 @@
case T_BPTFLT: /* bpt instruction fault */
case T_TRCTRAP: /* trace trap */
enable_intr();
+#ifdef KDTRACE_HOOKS
+ if (type == T_BPTFLT) {
+ fill_frame_regs(frame, ®s);
+ if (dtrace_pid_probe_ptr != NULL &&
+ dtrace_pid_probe_ptr(®s) == 0)
+ goto out;
+ }
+#endif
frame->tf_eflags &= ~PSL_T;
i = SIGTRAP;
ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
@@ -405,6 +372,10 @@
i = SIGBUS;
ucode = BUS_OBJERR;
break;
+ case T_ALIGNFLT:
+ i = SIGBUS;
+ ucode = BUS_ADRALN;
+ break;
case T_DOUBLEFLT: /* double fault */
default:
i = SIGBUS;
@@ -501,7 +472,7 @@
goto userout;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
- break;
+ goto out;
#endif /* POWERFAIL_NMI */
#endif /* DEV_ISA */
@@ -544,6 +515,15 @@
#endif
i = SIGFPE;
break;
+#ifdef KDTRACE_HOOKS
+ case T_DTRACE_RET:
+ enable_intr();
+ fill_frame_regs(frame, ®s);
+ if (dtrace_return_probe_ptr != NULL &&
+ dtrace_return_probe_ptr(®s) == 0)
+ goto out;
+ goto userout;
+#endif
}
} else {
/* kernel trap */
@@ -557,8 +537,8 @@
case T_DNA:
#ifdef DEV_NPX
- KASSERT(!PCB_USER_FPU(td->td_pcb),
- ("Unregistered use of FPU in kernel"));
+ if (PCB_USER_FPU(td->td_pcb))
+ panic("Unregistered use of FPU in kernel");
if (npxdna())
goto out;
#endif
@@ -590,11 +570,7 @@
vm86_trap((struct vm86frame *)frame);
goto out;
}
- if (type == T_STKFLT)
- break;
-
/* FALL THROUGH */
-
case T_SEGNPFLT: /* segment not present fault */
if (curpcb->pcb_flags & PCB_VM86CALL)
break;
@@ -635,6 +611,9 @@
frame->tf_eip = (int)doreti_iret_fault;
goto out;
}
+ if (type == T_STKFLT)
+ break;
+
if (frame->tf_eip == (int)doreti_popl_ds) {
frame->tf_eip = (int)doreti_popl_ds_fault;
goto out;
@@ -707,7 +686,35 @@
load_dr6(rdr6() & 0xfffffff0);
goto out;
}
+
/*
+ * Malicious user code can configure a debug
+ * register watchpoint to trap on data access
+ * to the top of stack and then execute 'pop
+ * %ss; int 3'. Due to exception deferral for
+ * 'pop %ss', the CPU will not interrupt 'int
+ * 3' to raise the DB# exception for the debug
+ * register but will postpone the DB# until
+ * execution of the first instruction of the
+ * BP# handler (in kernel mode). Normally the
+ * previous check would ignore DB# exceptions
+ * for watchpoints on user addresses raised in
+ * kernel mode. However, some CPU errata
+ * include cases where DB# exceptions do not
+ * properly set bits in %dr6, e.g. Haswell
+ * HSD23 and Skylake-X SKZ24.
+ *
+ * A deferred DB# can also be raised on the
+ * first instructions of system call entry
+ * points or single-step traps via similar use
+ * of 'pop %ss' or 'mov xxx, %ss'.
+ */
+ if (frame->tf_eip ==
+ (uintptr_t)IDTVEC(int0x80_syscall) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+ return;
+ /*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
*/
case T_BPTFLT:
@@ -794,7 +801,6 @@
user:
userret(td, frame);
- mtx_assert(&Giant, MA_NOTOWNED);
KASSERT(PCB_USER_FPU(td->td_pcb),
("Return from trap with kernel FPU ctx leaked"));
userout:
@@ -809,7 +815,7 @@
vm_offset_t eva;
{
vm_offset_t va;
- struct vmspace *vm = NULL;
+ struct vmspace *vm;
vm_map_t map;
int rv = 0;
vm_prot_t ftype;
@@ -872,7 +878,7 @@
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
- return -2;
+ return (-2);
#endif
if (usermode)
goto nogo;
@@ -880,17 +886,21 @@
map = kernel_map;
} else {
/*
- * This is a fault on non-kernel virtual memory.
- * vm is initialized above to NULL. If curproc is NULL
- * or curproc->p_vmspace is NULL the fault is fatal.
+ * This is a fault on non-kernel virtual memory. If either
+ * p or p->p_vmspace is NULL, then the fault is fatal.
*/
- if (p != NULL)
- vm = p->p_vmspace;
-
- if (vm == NULL)
+ if (p == NULL || (vm = p->p_vmspace) == NULL)
goto nogo;
map = &vm->vm_map;
+
+ /*
+ * When accessing a user-space address, kernel must be
+ * ready to accept the page fault, and provide a
+ * handling routine. Since accessing the address
+ * without the handler is a bug, do not try to handle
+ * it normally, and panic immediately.
+ */
if (!usermode && (td->td_intr_nesting_level != 0 ||
curpcb->pcb_onfault == NULL)) {
trap_fatal(frame, eva);
@@ -899,12 +909,20 @@
}
/*
+ * If the trap was caused by errant bits in the PTE then panic.
+ */
+ if (frame->tf_err & PGEX_RSV) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+
+ /*
* PGEX_I is defined only if the execute disable bit capability is
* supported and enabled.
*/
if (frame->tf_err & PGEX_W)
ftype = VM_PROT_WRITE;
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
ftype = VM_PROT_EXECUTE;
#endif
@@ -957,8 +975,7 @@
trap_fatal(frame, eva);
return (-1);
}
-
- return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+ return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
}
static void
@@ -970,6 +987,9 @@
u_int type;
struct soft_segment_descriptor softseg;
char *msg;
+#ifdef KDB
+ bool handled;
+#endif
code = frame->tf_err;
type = frame->tf_trapno;
@@ -989,9 +1009,15 @@
#endif
if (type == T_PAGEFLT) {
printf("fault virtual address = 0x%x\n", eva);
- printf("fault code = %s %s, %s\n",
+ printf("fault code = %s %s%s, %s\n",
code & PGEX_U ? "user" : "supervisor",
code & PGEX_W ? "write" : "read",
+#if defined(PAE) || defined(PAE_TABLES)
+ pg_nx != 0 ?
+ (code & PGEX_I ? " instruction" : " data") :
+#endif
+ "",
+ code & PGEX_RSV ? "reserved bits in PTE" :
code & PGEX_P ? "protection violation" : "page not present");
}
printf("instruction pointer = 0x%x:0x%x\n",
@@ -1022,21 +1048,18 @@
if (frame->tf_eflags & PSL_VM)
printf("vm86, ");
printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
- printf("current process = ");
- if (curproc) {
- printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name);
- } else {
- printf("Idle\n");
- }
+ printf("current process = %d (%s)\n",
+ curproc->p_pid, curthread->td_name);
#ifdef KDB
- if (debugger_on_panic || kdb_active) {
+ if (debugger_on_panic) {
+ kdb_why = KDB_WHY_TRAP;
frame->tf_err = eva; /* smuggle fault address to ddb */
- if (kdb_trap(type, 0, frame)) {
- frame->tf_err = code; /* restore error code */
+ handled = kdb_trap(type, 0, frame);
+ frame->tf_err = code; /* restore error code */
+ kdb_why = KDB_WHY_UNSET;
+ if (handled)
return;
- }
- frame->tf_err = code; /* restore error code */
}
#endif
printf("trap number = %d\n", type);
@@ -1083,6 +1106,7 @@
struct proc *p;
struct trapframe *frame;
caddr_t params;
+ long tmp;
int error;
p = td->td_proc;
@@ -1098,7 +1122,10 @@
/*
* Code is first argument, followed by actual args.
*/
- sa->code = fuword(params);
+ error = fueword(params, &tmp);
+ if (error == -1)
+ return (EFAULT);
+ sa->code = tmp;
params += sizeof(int);
} else if (sa->code == SYS___syscall) {
/*
@@ -1105,7 +1132,10 @@
* Like syscall, but code is a quad, so as to maintain
* quad alignment for the rest of the arguments.
*/
- sa->code = fuword(params);
+ error = fueword(params, &tmp);
+ if (error == -1)
+ return (EFAULT);
+ sa->code = tmp;
params += sizeof(quad_t);
}
@@ -1174,7 +1204,7 @@
KASSERT(PCB_USER_FPU(td->td_pcb),
("System call %s returning with kernel FPU ctx leaked",
syscallname(td->td_proc, sa.code)));
- KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
+ KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, sa.code)));
Modified: trunk/sys/i386/i386/uio_machdep.c
===================================================================
--- trunk/sys/i386/i386/uio_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/uio_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Alan L. Cox <alc at cs.rice.edu>
* Copyright (c) 1982, 1986, 1991, 1993
@@ -16,7 +17,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -36,9 +37,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/uio_machdep.c 266312 2014-05-17 13:59:11Z ian $");
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -45,7 +47,6 @@
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/sf_buf.h>
-#include <sys/systm.h>
#include <sys/uio.h>
#include <vm/vm.h>
Modified: trunk/sys/i386/i386/vm86.c
===================================================================
--- trunk/sys/i386/i386/vm86.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm86.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997 Jonathan Lemon
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/vm86.c 332325 2018-04-09 13:01:43Z emaste $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -128,8 +129,7 @@
}
int
-vm86_emulate(vmf)
- struct vm86frame *vmf;
+vm86_emulate(struct vm86frame *vmf)
{
struct vm86_kernel *vm86;
caddr_t addr;
@@ -586,10 +586,7 @@
* caller's cs:ip routine.
*/
int
-vm86_datacall(intnum, vmf, vmc)
- int intnum;
- struct vm86frame *vmf;
- struct vm86context *vmc;
+vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
{
pt_entry_t *pte = (pt_entry_t *)vm86paddr;
vm_paddr_t page;
@@ -634,11 +631,8 @@
}
int
-vm86_getptr(vmc, kva, sel, off)
- struct vm86context *vmc;
- vm_offset_t kva;
- u_short *sel;
- u_short *off;
+vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
+ u_short *off)
{
int i;
@@ -650,13 +644,10 @@
return (1);
}
return (0);
- panic("vm86_getptr: address not found");
}
int
-vm86_sysarch(td, args)
- struct thread *td;
- char *args;
+vm86_sysarch(struct thread *td, char *args)
{
int error = 0;
struct i386_vm86_args ua;
Modified: trunk/sys/i386/i386/vm86bios.s
===================================================================
--- trunk/sys/i386/i386/vm86bios.s 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm86bios.s 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998 Jonathan Lemon
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/i386/i386/vm86bios.s 282065 2015-04-27 08:02:12Z kib $
*/
#include "opt_npx.h"
@@ -122,7 +123,7 @@
movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */
movl %eax,0(%ebx) /* ... install as PTD entry 0 */
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
movl IdlePDPT,%ecx
#endif
movl %ecx,%cr3 /* new page tables */
Property changes on: trunk/sys/i386/i386/vm86bios.s
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/i386/i386/vm_machdep.c
===================================================================
--- trunk/sys/i386/i386/vm_machdep.c 2018-05-27 16:34:57 UTC (rev 9997)
+++ trunk/sys/i386/i386/vm_machdep.c 2018-05-27 16:35:41 UTC (rev 9998)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986 The Regents of the University of California.
* Copyright (c) 1989, 1990 William Jolitz
@@ -41,7 +42,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/i386/i386/vm_machdep.c 332759 2018-04-19 06:20:53Z avg $");
#include "opt_isa.h"
#include "opt_npx.h"
@@ -106,10 +107,15 @@
#define NSFBUFS (512 + maxusers * 16)
#endif
-CTASSERT((struct thread **)OFFSETOF_CURTHREAD ==
- &((struct pcpu *)NULL)->pc_curthread);
-CTASSERT((struct pcb **)OFFSETOF_CURPCB == &((struct pcpu *)NULL)->pc_curpcb);
+#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+_Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
+ "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
+_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
+ "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
+
static void cpu_reset_real(void);
#ifdef SMP
static void cpu_reset_proxy(void);
@@ -116,6 +122,18 @@
static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+
+static int nsfbufs;
+static int nsfbufspeak;
+static int nsfbufsused;
+
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
+ "Maximum number of sendfile(2) sf_bufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
+ "Number of sendfile(2) sf_bufs at peak usage");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
+ "Number of sendfile(2) sf_bufs in use");
+
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
@@ -137,8 +155,55 @@
*/
static struct mtx sf_buf_lock;
-extern int _ucodesel, _udatasel;
+union savefpu *
+get_pcb_user_save_td(struct thread *td)
+{
+ vm_offset_t p;
+ p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+ roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
+ KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
+ return ((union savefpu *)p);
+}
+union savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+ vm_offset_t p;
+
+ p = (vm_offset_t)(pcb + 1);
+ return ((union savefpu *)p);
+}
+
+struct pcb *
+get_pcb_td(struct thread *td)
+{
+ vm_offset_t p;
+
+ p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+ roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
+ sizeof(struct pcb);
+ return ((struct pcb *)p);
+}
+
+void *
+alloc_fpusave(int flags)
+{
+ void *res;
+#ifdef CPU_ENABLE_SSE
+ struct savefpu_ymm *sf;
+#endif
+
+ res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
+#ifdef CPU_ENABLE_SSE
+ if (use_xsave) {
+ sf = (struct savefpu_ymm *)res;
+ bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
+ sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
+ }
+#endif
+ return (res);
+}
+
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@@ -145,11 +210,7 @@
* ready to run and return to user mode.
*/
void
-cpu_fork(td1, p2, td2, flags)
- register struct thread *td1;
- register struct proc *p2;
- struct thread *td2;
- int flags;
+cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
{
register struct proc *p1;
struct pcb *pcb2;
@@ -188,8 +249,7 @@
#endif
/* Point the pcb to the top of the stack */
- pcb2 = (struct pcb *)(td2->td_kstack +
- td2->td_kstack_pages * PAGE_SIZE) - 1;
+ pcb2 = get_pcb_td(td2);
td2->td_pcb = pcb2;
/* Copy td1's pcb */
@@ -196,7 +256,9 @@
bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
/* Properly initialize pcb_save */
- pcb2->pcb_save = &pcb2->pcb_user_save;
+ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+ bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
+ cpu_max_ext_state_size);
/* Point mdproc and then copy over td1's contents */
mdp2 = &p2->p_md;
@@ -230,7 +292,7 @@
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
-#ifdef PAE
+#if defined(PAE) || defined(PAE_TABLES)
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
@@ -354,7 +416,7 @@
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
*/
- kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
+ kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
ctob(IOPAGES + 1));
pcb->pcb_ext = NULL;
}
@@ -373,12 +435,22 @@
void
cpu_thread_alloc(struct thread *td)
{
+ struct pcb *pcb;
+#ifdef CPU_ENABLE_SSE
+ struct xstate_hdr *xhdr;
+#endif
- td->td_pcb = (struct pcb *)(td->td_kstack +
- td->td_kstack_pages * PAGE_SIZE) - 1;
- td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
- td->td_pcb->pcb_ext = NULL;
- td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
+ td->td_pcb = pcb = get_pcb_td(td);
+ td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+ pcb->pcb_ext = NULL;
+ pcb->pcb_save = get_pcb_user_save_pcb(pcb);
+#ifdef CPU_ENABLE_SSE
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
+ bzero(xhdr, sizeof(*xhdr));
+ xhdr->xstate_bv = xsave_mask;
+ }
+#endif
}
void
@@ -411,13 +483,7 @@
break;
default:
- if (td->td_proc->p_sysent->sv_errsize) {
- if (error >= td->td_proc->p_sysent->sv_errsize)
- error = -1; /* XXX */
- else
- error = td->td_proc->p_sysent->sv_errtbl[error];
- }
- td->td_frame->tf_eax = error;
+ td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error);
td->td_frame->tf_eflags |= PSL_C;
break;
}
@@ -444,8 +510,11 @@
* values here.
*/
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
- pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE);
- pcb2->pcb_save = &pcb2->pcb_user_save;
+ pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
+ PCB_KERNNPX);
+ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+ bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
+ cpu_max_ext_state_size);
/*
* Create a new fresh stack for the new thread.
@@ -577,13 +646,11 @@
static void
cpu_reset_proxy()
{
- cpuset_t tcrp;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to see that we've started */
- CPU_SETOF(cpu_reset_proxyid, &tcrp);
- stop_cpus(tcrp);
+ ia32_pause(); /* Wait for other cpu to see that we've started */
+
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
cpu_reset_real();
@@ -605,7 +672,7 @@
cpuset_t map;
u_int cnt;
- if (smp_active) {
+ if (smp_started) {
map = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &map);
CPU_NAND(&map, &stopped_cpus);
@@ -621,20 +688,22 @@
printf("cpu_reset: Restarting BSP\n");
/* Restart CPU #0. */
- /* XXX: restart_cpus(1 << 0); */
CPU_SETOF(0, &started_cpus);
wmb();
cnt = 0;
- while (cpu_reset_proxy_active == 0 && cnt < 10000000)
+ while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
+ ia32_pause();
cnt++; /* Wait for BSP to announce restart */
- if (cpu_reset_proxy_active == 0)
+ }
+ if (cpu_reset_proxy_active == 0) {
printf("cpu_reset: Failed to restart BSP\n");
- enable_intr();
- cpu_reset_proxy_active = 2;
-
- while (1);
- /* NOTREACHED */
+ } else {
+ cpu_reset_proxy_active = 2;
+ while (1)
+ ia32_pause();
+ /* NOTREACHED */
+ }
}
DELAY(1000000);
@@ -750,7 +819,7 @@
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
- sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
+ sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
M_NOWAIT | M_ZERO);
for (i = 0; i < nsfbufs; i++) {
@@ -785,7 +854,7 @@
*/
pmap_qenter(sf->kva, &m, 1);
pmap_invalidate_cache_range(sf->kva, sf->kva +
- PAGE_SIZE);
+ PAGE_SIZE, FALSE);
ret = TRUE;
break;
}
@@ -832,7 +901,7 @@
if (flags & SFB_NOWAIT)
goto done;
sf_buf_alloc_want++;
- mbstat.sf_allocwait++;
+ SFSTAT_INC(sf_allocwait);
error = msleep(&sf_buf_freelist, &sf_buf_lock,
(flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
sf_buf_alloc_want--;
Added: trunk/sys/i386/include/counter.h
===================================================================
--- trunk/sys/i386/include/counter.h (rev 0)
+++ trunk/sys/i386/include/counter.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,179 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Konstantin Belousov <kib at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/i386/include/counter.h 252434 2013-07-01 02:48:27Z kib $
+ */
+
+#ifndef __MACHINE_COUNTER_H__
+#define __MACHINE_COUNTER_H__
+
+#include <sys/pcpu.h>
+#ifdef INVARIANTS
+#include <sys/proc.h>
+#endif
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#define counter_enter() do { \
+ if ((cpu_feature & CPUID_CX8) == 0) \
+ critical_enter(); \
+} while (0)
+
+#define counter_exit() do { \
+ if ((cpu_feature & CPUID_CX8) == 0) \
+ critical_exit(); \
+} while (0)
+
+extern struct pcpu __pcpu[MAXCPU];
+
+static inline void
+counter_64_inc_8b(uint64_t *p, int64_t inc)
+{
+
+ __asm __volatile(
+ "movl %%fs:(%%esi),%%eax\n\t"
+ "movl %%fs:4(%%esi),%%edx\n"
+"1:\n\t"
+ "movl %%eax,%%ebx\n\t"
+ "movl %%edx,%%ecx\n\t"
+ "addl (%%edi),%%ebx\n\t"
+ "adcl 4(%%edi),%%ecx\n\t"
+ "cmpxchg8b %%fs:(%%esi)\n\t"
+ "jnz 1b"
+ :
+ : "S" ((char *)p - (char *)&__pcpu[0]), "D" (&inc)
+ : "memory", "cc", "eax", "edx", "ebx", "ecx");
+}
+
+#ifdef IN_SUBR_COUNTER_C
+static inline uint64_t
+counter_u64_read_one_8b(uint64_t *p)
+{
+ uint32_t res_lo, res_high;
+
+ __asm __volatile(
+ "movl %%eax,%%ebx\n\t"
+ "movl %%edx,%%ecx\n\t"
+ "cmpxchg8b (%2)"
+ : "=a" (res_lo), "=d"(res_high)
+ : "SD" (p)
+ : "cc", "ebx", "ecx");
+ return (res_lo + ((uint64_t)res_high << 32));
+}
+
+static inline uint64_t
+counter_u64_fetch_inline(uint64_t *p)
+{
+ uint64_t res;
+ int i;
+
+ res = 0;
+ if ((cpu_feature & CPUID_CX8) == 0) {
+ /*
+ * The machines without cmpxchg8b are not SMP.
+ * Disabling the preemption provides atomicity of the
+ * counter reading, since update is done in the
+ * critical section as well.
+ */
+ critical_enter();
+ for (i = 0; i < mp_ncpus; i++) {
+ res += *(uint64_t *)((char *)p +
+ sizeof(struct pcpu) * i);
+ }
+ critical_exit();
+ } else {
+ for (i = 0; i < mp_ncpus; i++)
+ res += counter_u64_read_one_8b((uint64_t *)((char *)p +
+ sizeof(struct pcpu) * i));
+ }
+ return (res);
+}
+
+static inline void
+counter_u64_zero_one_8b(uint64_t *p)
+{
+
+ __asm __volatile(
+ "movl (%0),%%eax\n\t"
+ "movl 4(%0),%%edx\n"
+ "xorl %%ebx,%%ebx\n\t"
+ "xorl %%ecx,%%ecx\n\t"
+"1:\n\t"
+ "cmpxchg8b (%0)\n\t"
+ "jnz 1b"
+ :
+ : "SD" (p)
+ : "memory", "cc", "eax", "edx", "ebx", "ecx");
+}
+
+static void
+counter_u64_zero_one_cpu(void *arg)
+{
+ uint64_t *p;
+
+ p = (uint64_t *)((char *)arg + sizeof(struct pcpu) * PCPU_GET(cpuid));
+ counter_u64_zero_one_8b(p);
+}
+
+static inline void
+counter_u64_zero_inline(counter_u64_t c)
+{
+ int i;
+
+ if ((cpu_feature & CPUID_CX8) == 0) {
+ critical_enter();
+ for (i = 0; i < mp_ncpus; i++)
+ *(uint64_t *)((char *)c + sizeof(struct pcpu) * i) = 0;
+ critical_exit();
+ } else {
+ smp_rendezvous(smp_no_rendevous_barrier,
+ counter_u64_zero_one_cpu, smp_no_rendevous_barrier, c);
+ }
+}
+#endif
+
+#define counter_u64_add_protected(c, inc) do { \
+ if ((cpu_feature & CPUID_CX8) == 0) { \
+ CRITICAL_ASSERT(curthread); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+ } else \
+ counter_64_inc_8b((c), (inc)); \
+} while (0)
+
+static inline void
+counter_u64_add(counter_u64_t c, int64_t inc)
+{
+
+ if ((cpu_feature & CPUID_CX8) == 0) {
+ critical_enter();
+ *(uint64_t *)zpcpu_get(c) += inc;
+ critical_exit();
+ } else {
+ counter_64_inc_8b(c, inc);
+ }
+}
+
+#endif /* ! __MACHINE_COUNTER_H__ */
Property changes on: trunk/sys/i386/include/counter.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/i386/include/fdt.h
===================================================================
--- trunk/sys/i386/include/fdt.h (rev 0)
+++ trunk/sys/i386/include/fdt.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,7 @@
+/* $MidnightBSD$ */
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: stable/10/sys/i386/include/fdt.h 250840 2013-05-21 03:05:49Z marcel $ */
+
+#include <x86/fdt.h>
Property changes on: trunk/sys/i386/include/fdt.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/i386/include/ofw_machdep.h
===================================================================
--- trunk/sys/i386/include/ofw_machdep.h (rev 0)
+++ trunk/sys/i386/include/ofw_machdep.h 2018-05-27 16:35:41 UTC (rev 9998)
@@ -0,0 +1,7 @@
+/* $MidnightBSD$ */
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: stable/10/sys/i386/include/ofw_machdep.h 250840 2013-05-21 03:05:49Z marcel $ */
+
+#include <x86/ofw_machdep.h>
Property changes on: trunk/sys/i386/include/ofw_machdep.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
More information about the Midnightbsd-cvs
mailing list