[Midnightbsd-cvs] src: i386/i386:
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sat Sep 27 22:12:57 EDT 2008
Log Message:
-----------
Modified Files:
--------------
src/sys/i386/i386:
apic_vector.s (r1.1.1.1 -> r1.2)
bios.c (r1.1.1.1 -> r1.2)
busdma_machdep.c (r1.2 -> r1.3)
db_trace.c (r1.2 -> r1.3)
dump_machdep.c (r1.1.1.1 -> r1.2)
elan-mmcr.c (r1.1.1.1 -> r1.2)
elf_machdep.c (r1.1.1.1 -> r1.2)
exception.s (r1.1.1.1 -> r1.2)
genassym.c (r1.1.1.1 -> r1.2)
geode.c (r1.1.1.1 -> r1.2)
identcpu.c (r1.4 -> r1.5)
in_cksum.c (r1.1.1.1 -> r1.2)
initcpu.c (r1.3 -> r1.4)
intr_machdep.c (r1.2 -> r1.3)
io.c (r1.1.1.1 -> r1.2)
io_apic.c (r1.2 -> r1.3)
legacy.c (r1.1.1.1 -> r1.2)
local_apic.c (r1.2 -> r1.3)
locore.s (r1.1.1.1 -> r1.2)
machdep.c (r1.4 -> r1.5)
mem.c (r1.2 -> r1.3)
mp_clock.c (r1.1.1.1 -> r1.2)
mp_machdep.c (r1.2 -> r1.3)
mp_watchdog.c (r1.1.1.1 -> r1.2)
mptable.c (r1.1.1.1 -> r1.2)
mptable_pci.c (r1.1.1.1 -> r1.2)
nexus.c (r1.1.1.1 -> r1.2)
pmap.c (r1.2 -> r1.3)
ptrace_machdep.c (r1.3 -> r1.4)
support.s (r1.1.1.1 -> r1.2)
swtch.s (r1.1.1.1 -> r1.2)
sys_machdep.c (r1.1.1.1 -> r1.2)
trap.c (r1.1.1.1 -> r1.2)
tsc.c (r1.1.1.1 -> r1.2)
vm86.c (r1.1.1.1 -> r1.2)
vm86bios.s (r1.1.1.1 -> r1.2)
vm_machdep.c (r1.2 -> r1.3)
Added Files:
-----------
src/sys/i386/i386:
bpf_jit_machdep.c (r1.1)
bpf_jit_machdep.h (r1.1)
minidump_machdep.c (r1.1)
msi.c (r1.1)
-------------- next part --------------
Index: intr_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/intr_machdep.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/intr_machdep.c -L sys/i386/i386/intr_machdep.c -u -r1.2 -r1.3
--- sys/i386/i386/intr_machdep.c
+++ sys/i386/i386/intr_machdep.c
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.14.2.2 2006/03/10 19:37:33 jhb Exp $
+ * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.29.2.1 2007/11/26 15:06:49 scottl Exp $
*/
/*
@@ -42,15 +42,17 @@
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
-#include <sys/lock.h>
#include <sys/ktr.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/systm.h>
+#include <sys/sx.h>
#include <machine/clock.h>
#include <machine/intr_machdep.h>
+#include <machine/smp.h>
#ifdef DDB
#include <ddb/ddb.h>
#endif
@@ -61,7 +63,15 @@
static int intrcnt_index;
static struct intsrc *interrupt_sources[NUM_IO_INTS];
-static struct mtx intr_table_lock;
+static struct sx intr_table_lock;
+static struct mtx intrcnt_lock;
+static STAILQ_HEAD(, pic) pics;
+
+#ifdef INTR_FILTER
+static void intr_eoi_src(void *arg);
+static void intr_disab_eoi_src(void *arg);
+static void intr_event_stray(void *cookie);
+#endif
#ifdef SMP
static int assign_cpu;
@@ -70,10 +80,45 @@
#endif
static void intr_init(void *__dummy);
+static int intr_pic_registered(struct pic *pic);
static void intrcnt_setname(const char *name, int index);
static void intrcnt_updatename(struct intsrc *is);
static void intrcnt_register(struct intsrc *is);
+static int
+intr_pic_registered(struct pic *pic)
+{
+ struct pic *p;
+
+ STAILQ_FOREACH(p, &pics, pics) {
+ if (p == pic)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Register a new interrupt controller (PIC). This is to support suspend
+ * and resume where we suspend/resume controllers rather than individual
+ * sources. This also allows controllers with no active sources (such as
+ * 8259As in a system using the APICs) to participate in suspend and resume.
+ */
+int
+intr_register_pic(struct pic *pic)
+{
+ int error;
+
+ sx_xlock(&intr_table_lock);
+ if (intr_pic_registered(pic))
+ error = EBUSY;
+ else {
+ STAILQ_INSERT_TAIL(&pics, pic, pics);
+ error = 0;
+ }
+ sx_xunlock(&intr_table_lock);
+ return (error);
+}
+
/*
* Register a new interrupt source with the global interrupt system.
* The global interrupts need to be disabled when this function is
@@ -84,23 +129,30 @@
{
int error, vector;
+ KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
vector = isrc->is_pic->pic_vector(isrc);
if (interrupt_sources[vector] != NULL)
return (EEXIST);
+#ifdef INTR_FILTER
+ error = intr_event_create(&isrc->is_event, isrc, 0,
+ (mask_fn)isrc->is_pic->pic_enable_source,
+ intr_eoi_src, intr_disab_eoi_src, "irq%d:", vector);
+#else
error = intr_event_create(&isrc->is_event, isrc, 0,
(mask_fn)isrc->is_pic->pic_enable_source, "irq%d:", vector);
+#endif
if (error)
return (error);
- mtx_lock_spin(&intr_table_lock);
+ sx_xlock(&intr_table_lock);
if (interrupt_sources[vector] != NULL) {
- mtx_unlock_spin(&intr_table_lock);
+ sx_xunlock(&intr_table_lock);
intr_event_destroy(isrc->is_event);
return (EEXIST);
}
intrcnt_register(isrc);
interrupt_sources[vector] = isrc;
- isrc->is_enabled = 0;
- mtx_unlock_spin(&intr_table_lock);
+ isrc->is_handlers = 0;
+ sx_xunlock(&intr_table_lock);
return (0);
}
@@ -112,8 +164,8 @@
}
int
-intr_add_handler(const char *name, int vector, driver_intr_t handler,
- void *arg, enum intr_type flags, void **cookiep)
+intr_add_handler(const char *name, int vector, driver_filter_t filter,
+ driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
{
struct intsrc *isrc;
int error;
@@ -121,22 +173,21 @@
isrc = intr_lookup_source(vector);
if (isrc == NULL)
return (EINVAL);
- error = intr_event_add_handler(isrc->is_event, name, handler, arg,
- intr_priority(flags), flags, cookiep);
+ error = intr_event_add_handler(isrc->is_event, name, filter, handler,
+ arg, intr_priority(flags), flags, cookiep);
if (error == 0) {
+ sx_xlock(&intr_table_lock);
intrcnt_updatename(isrc);
- mtx_lock_spin(&intr_table_lock);
- if (!isrc->is_enabled) {
- isrc->is_enabled = 1;
+ isrc->is_handlers++;
+ if (isrc->is_handlers == 1) {
#ifdef SMP
if (assign_cpu)
intr_assign_next_cpu(isrc);
#endif
- mtx_unlock_spin(&intr_table_lock);
isrc->is_pic->pic_enable_intr(isrc);
- } else
- mtx_unlock_spin(&intr_table_lock);
- isrc->is_pic->pic_enable_source(isrc);
+ isrc->is_pic->pic_enable_source(isrc);
+ }
+ sx_xunlock(&intr_table_lock);
}
return (error);
}
@@ -144,13 +195,21 @@
int
intr_remove_handler(void *cookie)
{
+ struct intsrc *isrc;
int error;
+ isrc = intr_handler_source(cookie);
error = intr_event_remove_handler(cookie);
-#ifdef XXX
- if (error == 0)
- intrcnt_updatename(/* XXX */);
-#endif
+ if (error == 0) {
+ sx_xlock(&intr_table_lock);
+ isrc->is_handlers--;
+ if (isrc->is_handlers == 0) {
+ isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
+ isrc->is_pic->pic_disable_intr(isrc);
+ }
+ intrcnt_updatename(isrc);
+ sx_xunlock(&intr_table_lock);
+ }
return (error);
}
@@ -165,13 +224,84 @@
return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
}
+#ifdef INTR_FILTER
+void
+intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
+{
+ struct thread *td;
+ struct intr_event *ie;
+ int vector;
+
+ td = curthread;
+
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ (*isrc->is_count)++;
+ PCPU_INC(cnt.v_intr);
+
+ ie = isrc->is_event;
+
+ /*
+ * XXX: We assume that IRQ 0 is only used for the ISA timer
+ * device (clk).
+ */
+ vector = isrc->is_pic->pic_vector(isrc);
+ if (vector == 0)
+ clkintr_pending = 1;
+
+ if (intr_event_handle(ie, frame) != 0)
+ intr_event_stray(isrc);
+}
+
+static void
+intr_event_stray(void *cookie)
+{
+ struct intsrc *isrc;
+
+ isrc = cookie;
+ /*
+ * For stray interrupts, mask and EOI the source, bump the
+ * stray count, and log the condition.
+ */
+ isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
+ (*isrc->is_straycount)++;
+ if (*isrc->is_straycount < MAX_STRAY_LOG)
+ log(LOG_ERR, "stray irq%d\n", isrc->is_pic->pic_vector(isrc));
+ else if (*isrc->is_straycount == MAX_STRAY_LOG)
+ log(LOG_CRIT,
+ "too many stray irq %d's: not logging anymore\n",
+ isrc->is_pic->pic_vector(isrc));
+}
+
+static void
+intr_eoi_src(void *arg)
+{
+ struct intsrc *isrc;
+
+ isrc = arg;
+ isrc->is_pic->pic_eoi_source(isrc);
+}
+
+static void
+intr_disab_eoi_src(void *arg)
+{
+ struct intsrc *isrc;
+
+ isrc = arg;
+ isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
+}
+#else
void
-intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
+intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
{
struct thread *td;
struct intr_event *ie;
struct intr_handler *ih;
- int error, vector, thread;
+ int error, vector, thread, ret;
td = curthread;
@@ -182,7 +312,7 @@
* processed too.
*/
(*isrc->is_count)++;
- PCPU_LAZY_INC(cnt.v_intr);
+ PCPU_INC(cnt.v_intr);
ie = isrc->is_event;
@@ -214,23 +344,42 @@
* Execute fast interrupt handlers directly.
* To support clock handlers, if a handler registers
* with a NULL argument, then we pass it a pointer to
- * an intrframe as its argument.
+ * a trapframe as its argument.
*/
td->td_intr_nesting_level++;
+ ret = 0;
thread = 0;
critical_enter();
TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
- if (!(ih->ih_flags & IH_FAST)) {
+ if (ih->ih_filter == NULL) {
thread = 1;
continue;
}
CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
- ih->ih_handler, ih->ih_argument == NULL ? iframe :
+ ih->ih_filter, ih->ih_argument == NULL ? frame :
ih->ih_argument, ih->ih_name);
if (ih->ih_argument == NULL)
- ih->ih_handler(iframe);
+ ret = ih->ih_filter(frame);
else
- ih->ih_handler(ih->ih_argument);
+ ret = ih->ih_filter(ih->ih_argument);
+ /*
+ * Wrapper handler special handling:
+ *
+ * in some particular cases (like pccard and pccbb),
+ * the _real_ device handler is wrapped in a couple of
+ * functions - a filter wrapper and an ithread wrapper.
+ * In this case (and just in this case), the filter wrapper
+ * could ask the system to schedule the ithread and mask
+ * the interrupt source if the wrapped handler is composed
+ * of just an ithread handler.
+ *
+ * TODO: write a generic wrapper to avoid people rolling
+ * their own
+ */
+ if (!thread) {
+ if (ret == FILTER_SCHEDULE_THREAD)
+ thread = 1;
+ }
}
/*
@@ -242,40 +391,41 @@
isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
else
isrc->is_pic->pic_eoi_source(isrc);
- critical_exit();
/* Schedule the ithread if needed. */
if (thread) {
error = intr_event_schedule_thread(ie);
KASSERT(error == 0, ("bad stray interrupt"));
}
+ critical_exit();
td->td_intr_nesting_level--;
}
+#endif
void
intr_resume(void)
{
- struct intsrc **isrc;
- int i;
+ struct pic *pic;
- mtx_lock_spin(&intr_table_lock);
- for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
- if (*isrc != NULL && (*isrc)->is_pic->pic_resume != NULL)
- (*isrc)->is_pic->pic_resume(*isrc);
- mtx_unlock_spin(&intr_table_lock);
+ sx_xlock(&intr_table_lock);
+ STAILQ_FOREACH(pic, &pics, pics) {
+ if (pic->pic_resume != NULL)
+ pic->pic_resume(pic);
+ }
+ sx_xunlock(&intr_table_lock);
}
void
intr_suspend(void)
{
- struct intsrc **isrc;
- int i;
+ struct pic *pic;
- mtx_lock_spin(&intr_table_lock);
- for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
- if (*isrc != NULL && (*isrc)->is_pic->pic_suspend != NULL)
- (*isrc)->is_pic->pic_suspend(*isrc);
- mtx_unlock_spin(&intr_table_lock);
+ sx_xlock(&intr_table_lock);
+ STAILQ_FOREACH(pic, &pics, pics) {
+ if (pic->pic_suspend != NULL)
+ pic->pic_suspend(pic);
+ }
+ sx_xunlock(&intr_table_lock);
}
static void
@@ -298,8 +448,8 @@
{
char straystr[MAXCOMLEN + 1];
- /* mtx_assert(&intr_table_lock, MA_OWNED); */
KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
+ mtx_lock_spin(&intrcnt_lock);
is->is_index = intrcnt_index;
intrcnt_index += 2;
snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
@@ -308,17 +458,18 @@
is->is_count = &intrcnt[is->is_index];
intrcnt_setname(straystr, is->is_index + 1);
is->is_straycount = &intrcnt[is->is_index + 1];
+ mtx_unlock_spin(&intrcnt_lock);
}
void
intrcnt_add(const char *name, u_long **countp)
{
- mtx_lock_spin(&intr_table_lock);
+ mtx_lock_spin(&intrcnt_lock);
*countp = &intrcnt[intrcnt_index];
intrcnt_setname(name, intrcnt_index);
intrcnt_index++;
- mtx_unlock_spin(&intr_table_lock);
+ mtx_unlock_spin(&intrcnt_lock);
}
static void
@@ -327,7 +478,9 @@
intrcnt_setname("???", 0);
intrcnt_index = 1;
- mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
+ STAILQ_INIT(&pics);
+ sx_init(&intr_table_lock, "intr sources");
+ mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
}
SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL)
@@ -338,16 +491,14 @@
DB_SHOW_COMMAND(irqs, db_show_irqs)
{
struct intsrc **isrc;
- int i, quit, verbose;
+ int i, verbose;
- quit = 0;
if (strcmp(modif, "v") == 0)
verbose = 1;
else
verbose = 0;
isrc = interrupt_sources;
- db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
- for (i = 0; i < NUM_IO_INTS && !quit; i++, isrc++)
+ for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++)
if (*isrc != NULL)
db_dump_intr_event((*isrc)->is_event, verbose);
}
@@ -359,8 +510,9 @@
* allocate CPUs round-robin.
*/
-static u_int cpu_apic_ids[MAXCPU];
-static int current_cpu, num_cpus;
+/* The BSP is always a valid target. */
+static cpumask_t intr_cpus = (1 << 0);
+static int current_cpu, num_cpus = 1;
static void
intr_assign_next_cpu(struct intsrc *isrc)
@@ -373,29 +525,29 @@
*/
pic = isrc->is_pic;
apic_id = cpu_apic_ids[current_cpu];
- current_cpu++;
- if (current_cpu >= num_cpus)
- current_cpu = 0;
- if (bootverbose) {
- printf("INTR: Assigning IRQ %d", pic->pic_vector(isrc));
- printf(" to local APIC %u\n", apic_id);
- }
pic->pic_assign_cpu(isrc, apic_id);
+ do {
+ current_cpu++;
+ if (current_cpu >= num_cpus)
+ current_cpu = 0;
+ } while (!(intr_cpus & (1 << current_cpu)));
}
/*
- * Add a local APIC ID to our list of valid local APIC IDs that can
- * be destinations of interrupts.
+ * Add a CPU to our mask of valid CPUs that can be destinations of
+ * interrupts.
*/
void
-intr_add_cpu(u_int apic_id)
+intr_add_cpu(u_int cpu)
{
+ if (cpu >= MAXCPU)
+ panic("%s: Invalid CPU ID", __func__);
if (bootverbose)
- printf("INTR: Adding local APIC %d as a target\n", apic_id);
- if (num_cpus >= MAXCPU)
- panic("WARNING: Local APIC IDs exhausted!");
- cpu_apic_ids[num_cpus] = apic_id;
+ printf("INTR: Adding local APIC %d as a target\n",
+ cpu_apic_ids[cpu]);
+
+ intr_cpus |= (1 << cpu);
num_cpus++;
}
@@ -413,15 +565,15 @@
if (num_cpus <= 1)
return;
- /* Round-robin assign each enabled source a CPU. */
- mtx_lock_spin(&intr_table_lock);
+ /* Round-robin assign a CPU to each enabled source. */
+ sx_xlock(&intr_table_lock);
assign_cpu = 1;
for (i = 0; i < NUM_IO_INTS; i++) {
isrc = interrupt_sources[i];
- if (isrc != NULL && isrc->is_enabled)
+ if (isrc != NULL && isrc->is_handlers > 0)
intr_assign_next_cpu(isrc);
}
- mtx_unlock_spin(&intr_table_lock);
+ sx_xunlock(&intr_table_lock);
}
SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL)
#endif
Index: in_cksum.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/in_cksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/in_cksum.c -L sys/i386/i386/in_cksum.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/in_cksum.c
+++ sys/i386/i386/in_cksum.c
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/in_cksum.c,v 1.28 2005/03/02 21:33:25 joerg Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/in_cksum.c,v 1.28.10.1 2007/10/26 07:15:04 bz Exp $");
/*
* MPsafe: alfred
@@ -260,17 +260,6 @@
* reorder operations, this will generally take place in parallel with
* other calculations.
*/
-#define ADD(n) __asm __volatile \
- ("addl %1, %0" : "+r" (sum) : \
- "g" (((const u_int32_t *)w)[n / 4]))
-#define ADDC(n) __asm __volatile \
- ("adcl %1, %0" : "+r" (sum) : \
- "g" (((const u_int32_t *)w)[n / 4]))
-#define LOAD(n) __asm __volatile \
- ("" : : "r" (((const u_int32_t *)w)[n / 4]))
-#define MOP __asm __volatile \
- ("adcl $0, %0" : "+r" (sum))
-
u_short
in_cksum_skip(m, len, skip)
struct mbuf *m;
@@ -341,15 +330,24 @@
* Advance to a 486 cache line boundary.
*/
if (4 & (int) w && mlen >= 4) {
- ADD(0);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0])
+ );
w += 2;
mlen -= 4;
}
if (8 & (int) w && mlen >= 8) {
- ADD(0);
- ADDC(4);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1])
+ );
w += 4;
mlen -= 8;
}
@@ -379,45 +377,81 @@
* is initially 33 (not 32) to guaranteed that
* the LOAD(32) is within bounds.
*/
- ADD(16);
- ADDC(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- LOAD(32);
- ADDC(20);
- ADDC(24);
- ADDC(28);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl %5, %0\n"
+ "mov %6, %%eax\n"
+ "adcl %7, %0\n"
+ "adcl %8, %0\n"
+ "adcl %9, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[4]),
+ "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3]),
+ "g" (((const u_int32_t *)w)[8]),
+ "g" (((const u_int32_t *)w)[5]),
+ "g" (((const u_int32_t *)w)[6]),
+ "g" (((const u_int32_t *)w)[7])
+ : "eax"
+ );
w += 16;
}
mlen += 32 + 1;
if (mlen >= 32) {
- ADD(16);
- ADDC(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- ADDC(20);
- ADDC(24);
- ADDC(28);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl %5, %0\n"
+ "adcl %6, %0\n"
+ "adcl %7, %0\n"
+ "adcl %8, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[4]),
+ "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3]),
+ "g" (((const u_int32_t *)w)[5]),
+ "g" (((const u_int32_t *)w)[6]),
+ "g" (((const u_int32_t *)w)[7])
+ );
w += 16;
mlen -= 32;
}
if (mlen >= 16) {
- ADD(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3])
+ );
w += 8;
mlen -= 16;
}
if (mlen >= 8) {
- ADD(0);
- ADDC(4);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1])
+ );
w += 4;
mlen -= 8;
}
Index: db_trace.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/db_trace.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/db_trace.c -L sys/i386/i386/db_trace.c -u -r1.2 -r1.3
--- sys/i386/i386/db_trace.c
+++ sys/i386/i386/db_trace.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/db_trace.c,v 1.66.2.1 2006/03/13 03:05:33 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/db_trace.c,v 1.79 2007/02/19 10:57:47 kib Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -80,6 +80,7 @@
{ "edi", DB_OFFSET(tf_edi), db_frame },
{ "eip", DB_OFFSET(tf_eip), db_frame },
{ "efl", DB_OFFSET(tf_eflags), db_frame },
+#define DB_N_SHOW_REGS 15 /* Don't show registers after here. */
{ "dr0", NULL, db_dr0 },
{ "dr1", NULL, db_dr1 },
{ "dr2", NULL, db_dr2 },
@@ -89,7 +90,7 @@
{ "dr6", NULL, db_dr6 },
{ "dr7", NULL, db_dr7 },
};
-struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
+struct db_variable *db_eregs = db_regs + DB_N_SHOW_REGS;
#define DB_DRX_FUNC(reg) \
static int \
@@ -182,19 +183,17 @@
#define INTERRUPT 2
#define SYSCALL 3
#define DOUBLE_FAULT 4
+#define TRAP_INTERRUPT 5
static void db_nextframe(struct i386_frame **, db_addr_t *, struct thread *);
static int db_numargs(struct i386_frame *);
static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t);
static void decode_syscall(int, struct thread *);
-static char * watchtype_str(int type);
+static const char * watchtype_str(int type);
int i386_set_watch(int watchnum, unsigned int watchaddr, int size, int access,
- struct dbreg * d);
-int i386_clr_watch(int watchnum, struct dbreg * d);
-int db_md_set_watchpoint(db_expr_t addr, db_expr_t size);
-int db_md_clr_watchpoint(db_expr_t addr, db_expr_t size);
-void db_md_list_watchpoints(void);
+ struct dbreg *d);
+int i386_clr_watch(int watchnum, struct dbreg *d);
/*
* Figure out how many arguments were passed into the frame at "fp".
@@ -203,26 +202,30 @@
db_numargs(fp)
struct i386_frame *fp;
{
- int *argp;
+ char *argp;
int inst;
int args;
- argp = (int *)db_get_value((int)&fp->f_retaddr, 4, FALSE);
+ argp = (char *)db_get_value((int)&fp->f_retaddr, 4, FALSE);
/*
* XXX etext is wrong for LKMs. We should attempt to interpret
* the instruction at the return address in all cases. This
* may require better fault handling.
*/
- if (argp < (int *)btext || argp >= (int *)etext) {
- args = 5;
+ if (argp < btext || argp >= etext) {
+ args = -1;
} else {
+retry:
inst = db_get_value((int)argp, 4, FALSE);
if ((inst & 0xff) == 0x59) /* popl %ecx */
args = 1;
else if ((inst & 0xffff) == 0xc483) /* addl $Ibs, %esp */
args = ((inst >> 16) & 0xff) / 4;
- else
- args = 5;
+ else if ((inst & 0xf8ff) == 0xc089) { /* movl %eax, %Reg */
+ argp += 2;
+ goto retry;
+ } else
+ args = -1;
}
return (args);
}
@@ -235,15 +238,19 @@
int *argp;
db_addr_t callpc;
{
+ int n = narg >= 0 ? narg : 5;
+
db_printf("%s(", name);
- while (narg) {
+ while (n) {
if (argnp)
db_printf("%s=", *argnp++);
db_printf("%r", db_get_value((int)argp, 4, FALSE));
argp++;
- if (--narg != 0)
+ if (--n != 0)
db_printf(",");
}
+ if (narg < 0)
+ db_printf(",...");
db_printf(") at ");
db_printsym(callpc, DB_STGY_PROC);
db_printf("\n");
@@ -311,6 +318,13 @@
frame_type = SYSCALL;
else if (strcmp(name, "dblfault_handler") == 0)
frame_type = DOUBLE_FAULT;
+ /* XXX: These are interrupts with trap frames. */
+ else if (strcmp(name, "Xtimerint") == 0 ||
+ strcmp(name, "Xcpustop") == 0 ||
+ strcmp(name, "Xrendezvous") == 0 ||
+ strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
+ strcmp(name, "Xlazypmap") == 0)
+ frame_type = TRAP_INTERRUPT;
}
/*
@@ -346,9 +360,9 @@
* current frame.
*/
if (frame_type == INTERRUPT)
- tf = (struct trapframe *)((int)*fp + 12);
+ tf = (struct trapframe *)((int)*fp + 16);
else
- tf = (struct trapframe *)((int)*fp + 8);
+ tf = (struct trapframe *)((int)*fp + 12);
if (INKERNEL((int) tf)) {
esp = get_esp(tf);
@@ -362,6 +376,7 @@
db_printf("--- syscall");
decode_syscall(tf->tf_eax, td);
break;
+ case TRAP_INTERRUPT:
case INTERRUPT:
db_printf("--- interrupt");
break;
@@ -387,16 +402,38 @@
int *argp;
db_expr_t offset;
c_db_sym_t sym;
- int narg, quit;
+ int instr, narg;
boolean_t first;
+ /*
+ * If an indirect call via an invalid pointer caused a trap,
+ * %pc contains the invalid address while the return address
+ * of the unlucky caller has been saved by CPU on the stack
+ * just before the trap frame. In this case, try to recover
+ * the caller's address so that the first frame is assigned
+ * to the right spot in the right function, for that is where
+ * the failure actually happened.
+ *
+ * This trick depends on the fault address stashed in tf_err
+ * by trap_fatal() before entering KDB.
+ */
+ if (kdb_frame && pc == kdb_frame->tf_err) {
+ /*
+ * Find where the trap frame actually ends.
+ * It won't contain tf_esp or tf_ss unless crossing rings.
+ */
+ if (ISPL(kdb_frame->tf_cs))
+ instr = (int)(kdb_frame + 1);
+ else
+ instr = (int)&kdb_frame->tf_esp;
+ pc = db_get_value(instr, 4, FALSE);
+ }
+
if (count == -1)
count = 1024;
first = TRUE;
- quit = 0;
- db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
- while (count-- && !quit) {
+ while (count-- && !db_pager_quit) {
sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
db_symbol_values(sym, &name, NULL);
@@ -414,8 +451,6 @@
actframe = frame;
if (first) {
if (tf != NULL) {
- int instr;
-
instr = db_get_value(pc, 4, FALSE);
if ((instr & 0xffffff) == 0x00e58955) {
/* pushl %ebp; movl %esp, %ebp */
@@ -534,21 +569,20 @@
unsigned int watchaddr;
int size;
int access;
- struct dbreg * d;
+ struct dbreg *d;
{
- int i;
- unsigned int mask;
-
+ int i, len;
+
if (watchnum == -1) {
- for (i = 0, mask = 0x3; i < 4; i++, mask <<= 2)
- if ((d->dr[7] & mask) == 0)
+ for (i = 0; i < 4; i++)
+ if (!DBREG_DR7_ENABLED(d->dr[7], i))
break;
if (i < 4)
watchnum = i;
else
return (-1);
}
-
+
switch (access) {
case DBREG_DR7_EXEC:
size = 1; /* size must be 1 for an execution breakpoint */
@@ -556,29 +590,36 @@
case DBREG_DR7_WRONLY:
case DBREG_DR7_RDWR:
break;
- default : return (-1);
+ default:
+ return (-1);
}
-
+
/*
* we can watch a 1, 2, or 4 byte sized location
*/
switch (size) {
- case 1 : mask = 0x00; break;
- case 2 : mask = 0x01 << 2; break;
- case 4 : mask = 0x03 << 2; break;
- default : return (-1);
+ case 1:
+ len = DBREG_DR7_LEN_1;
+ break;
+ case 2:
+ len = DBREG_DR7_LEN_2;
+ break;
+ case 4:
+ len = DBREG_DR7_LEN_4;
+ break;
+ default:
+ return (-1);
}
- mask |= access;
-
/* clear the bits we are about to affect */
- d->dr[7] &= ~((0x3 << (watchnum*2)) | (0x0f << (watchnum*4+16)));
+ d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
/* set drN register to the address, N=watchnum */
- DBREG_DRX(d,watchnum) = watchaddr;
+ DBREG_DRX(d, watchnum) = watchaddr;
/* enable the watchpoint */
- d->dr[7] |= (0x2 << (watchnum*2)) | (mask << (watchnum*4+16));
+ d->dr[7] |= DBREG_DR7_SET(watchnum, len, access,
+ DBREG_DR7_GLOBAL_ENABLE);
return (watchnum);
}
@@ -587,15 +628,15 @@
int
i386_clr_watch(watchnum, d)
int watchnum;
- struct dbreg * d;
+ struct dbreg *d;
{
if (watchnum < 0 || watchnum >= 4)
return (-1);
-
- d->dr[7] = d->dr[7] & ~((0x3 << (watchnum*2)) | (0x0f << (watchnum*4+16)));
- DBREG_DRX(d,watchnum) = 0;
-
+
+ d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
+ DBREG_DRX(d, watchnum) = 0;
+
return (0);
}
@@ -605,38 +646,35 @@
db_expr_t addr;
db_expr_t size;
{
- int avail, wsize;
- int i;
struct dbreg d;
-
+ int avail, i, wsize;
+
fill_dbregs(NULL, &d);
-
+
avail = 0;
- for(i=0; i<4; i++) {
- if ((d.dr[7] & (3 << (i*2))) == 0)
+ for(i = 0; i < 4; i++) {
+ if (!DBREG_DR7_ENABLED(d.dr[7], i))
avail++;
}
-
- if (avail*4 < size)
+
+ if (avail * 4 < size)
return (-1);
-
- for (i=0; i<4 && (size != 0); i++) {
- if ((d.dr[7] & (3<<(i*2))) == 0) {
- if (size > 4)
+
+ for (i = 0; i < 4 && (size > 0); i++) {
+ if (!DBREG_DR7_ENABLED(d.dr[7], i)) {
+ if (size > 2)
wsize = 4;
else
wsize = size;
- if (wsize == 3)
- wsize++;
- i386_set_watch(i, addr, wsize,
+ i386_set_watch(i, addr, wsize,
DBREG_DR7_WRONLY, &d);
addr += wsize;
size -= wsize;
}
}
-
+
set_dbregs(NULL, &d);
-
+
return(0);
}
@@ -646,28 +684,27 @@
db_expr_t addr;
db_expr_t size;
{
- int i;
struct dbreg d;
+ int i;
fill_dbregs(NULL, &d);
- for(i=0; i<4; i++) {
- if (d.dr[7] & (3 << (i*2))) {
- if ((DBREG_DRX((&d), i) >= addr) &&
+ for(i = 0; i < 4; i++) {
+ if (DBREG_DR7_ENABLED(d.dr[7], i)) {
+ if ((DBREG_DRX((&d), i) >= addr) &&
(DBREG_DRX((&d), i) < addr+size))
i386_clr_watch(i, &d);
-
+
}
}
-
+
set_dbregs(NULL, &d);
-
+
return(0);
}
-static
-char *
+static const char *
watchtype_str(type)
int type;
{
@@ -683,31 +720,30 @@
void
db_md_list_watchpoints()
{
- int i;
struct dbreg d;
+ int i, len, type;
fill_dbregs(NULL, &d);
db_printf("\nhardware watchpoints:\n");
db_printf(" watch status type len address\n");
db_printf(" ----- -------- ---------- --- ----------\n");
- for (i=0; i<4; i++) {
- if (d.dr[7] & (0x03 << (i*2))) {
- unsigned type, len;
- type = (d.dr[7] >> (16+(i*4))) & 3;
- len = (d.dr[7] >> (16+(i*4)+2)) & 3;
- db_printf(" %-5d %-8s %10s %3d 0x%08x\n",
- i, "enabled", watchtype_str(type),
- len+1, DBREG_DRX((&d),i));
- }
- else {
+ for (i = 0; i < 4; i++) {
+ if (DBREG_DR7_ENABLED(d.dr[7], i)) {
+ type = DBREG_DR7_ACCESS(d.dr[7], i);
+ len = DBREG_DR7_LEN(d.dr[7], i);
+ db_printf(" %-5d %-8s %10s %3d ",
+ i, "enabled", watchtype_str(type), len + 1);
+ db_printsym((db_addr_t)DBREG_DRX((&d), i), DB_STGY_ANY);
+ db_printf("\n");
+ } else {
db_printf(" %-5d disabled\n", i);
}
}
-
+
db_printf("\ndebug register values:\n");
- for (i=0; i<8; i++) {
- db_printf(" dr%d 0x%08x\n", i, DBREG_DRX((&d),i));
+ for (i = 0; i < 8; i++) {
+ db_printf(" dr%d 0x%08x\n", i, DBREG_DRX((&d), i));
}
db_printf("\n");
}
Index: support.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/support.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/support.s -L sys/i386/i386/support.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/support.s
+++ sys/i386/i386/support.s
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/support.s,v 1.107 2005/04/21 23:07:20 alc Exp $
+ * $FreeBSD: src/sys/i386/i386/support.s,v 1.119 2007/08/22 05:06:14 jkoshy Exp $
*/
#include "opt_npx.h"
@@ -80,6 +80,7 @@
ENTRY(bzero)
MEXITCOUNT
jmp *bzero_vector
+END(bzero)
ENTRY(generic_bzero)
pushl %edi
@@ -96,7 +97,8 @@
stosb
popl %edi
ret
-
+END(generic_bzero)
+
#ifdef I486_CPU
ENTRY(i486_bzero)
movl 4(%esp),%edx
@@ -197,6 +199,7 @@
SUPERALIGN_TEXT
do0:
ret
+END(i486_bzero)
#endif
#if defined(I586_CPU) && defined(DEV_NPX)
@@ -355,6 +358,7 @@
stosb
popl %edi
ret
+END(i586_bzero)
#endif /* I586_CPU && defined(DEV_NPX) */
ENTRY(sse2_pagezero)
@@ -371,18 +375,19 @@
sfence
popl %ebx
ret
+END(sse2_pagezero)
ENTRY(i686_pagezero)
pushl %edi
pushl %ebx
- movl 12(%esp), %edi
- movl $1024, %ecx
+ movl 12(%esp),%edi
+ movl $1024,%ecx
cld
ALIGN_TEXT
1:
- xorl %eax, %eax
+ xorl %eax,%eax
repe
scasl
jnz 2f
@@ -395,32 +400,33 @@
2:
incl %ecx
- subl $4, %edi
+ subl $4,%edi
- movl %ecx, %edx
- cmpl $16, %ecx
+ movl %ecx,%edx
+ cmpl $16,%ecx
jge 3f
- movl %edi, %ebx
- andl $0x3f, %ebx
+ movl %edi,%ebx
+ andl $0x3f,%ebx
shrl %ebx
shrl %ebx
- movl $16, %ecx
- subl %ebx, %ecx
+ movl $16,%ecx
+ subl %ebx,%ecx
3:
- subl %ecx, %edx
+ subl %ecx,%edx
rep
stosl
- movl %edx, %ecx
- testl %edx, %edx
+ movl %edx,%ecx
+ testl %edx,%edx
jnz 1b
popl %ebx
popl %edi
ret
+END(i686_pagezero)
/* fillw(pat, base, cnt) */
ENTRY(fillw)
@@ -433,6 +439,7 @@
stosw
popl %edi
ret
+END(fillw)
ENTRY(bcopyb)
pushl %esi
@@ -464,10 +471,12 @@
popl %esi
cld
ret
+END(bcopyb)
ENTRY(bcopy)
MEXITCOUNT
jmp *bcopy_vector
+END(bcopy)
/*
* generic_bcopy(src, dst, cnt)
@@ -517,6 +526,7 @@
popl %esi
cld
ret
+END(generic_bcopy)
#if defined(I586_CPU) && defined(DEV_NPX)
ENTRY(i586_bcopy)
@@ -665,6 +675,7 @@
popl %esi
cld
ret
+END(i586_bcopy)
#endif /* I586_CPU && defined(DEV_NPX) */
/*
@@ -688,7 +699,7 @@
popl %esi
popl %edi
ret
-
+END(memcpy)
/*****************************************************************************/
/* copyout and fubyte family */
@@ -714,6 +725,7 @@
ENTRY(copyout)
MEXITCOUNT
jmp *copyout_vector
+END(copyout)
ENTRY(generic_copyout)
movl PCPU(CURPCB),%eax
@@ -773,6 +785,7 @@
movl PCPU(CURPCB),%edx
movl %eax,PCB_ONFAULT(%edx)
ret
+END(generic_copyout)
ALIGN_TEXT
copyout_fault:
@@ -836,6 +849,7 @@
call fastmove
addl $4,%esp
jmp done_copyout
+END(i586_copyout)
#endif /* I586_CPU && defined(DEV_NPX) */
/*
@@ -844,6 +858,7 @@
ENTRY(copyin)
MEXITCOUNT
jmp *copyin_vector
+END(copyin)
ENTRY(generic_copyin)
movl PCPU(CURPCB),%eax
@@ -887,6 +902,7 @@
movl PCPU(CURPCB),%edx
movl %eax,PCB_ONFAULT(%edx)
ret
+END(generic_copyin)
ALIGN_TEXT
copyin_fault:
@@ -930,6 +946,7 @@
call fastmove
addl $8,%esp
jmp done_copyin
+END(i586_copyin)
#endif /* I586_CPU && defined(DEV_NPX) */
#if defined(I586_CPU) && defined(DEV_NPX)
@@ -1137,12 +1154,15 @@
movl $0,PCB_ONFAULT(%edx)
movl $EFAULT,%eax
ret
+END(fastmove)
#endif /* I586_CPU && defined(DEV_NPX) */
/*
- * casuptr. Compare and set user pointer. Returns -1 or the current value.
+ * casuword. Compare and set user word. Returns -1 or the current value.
*/
-ENTRY(casuptr)
+
+ALTENTRY(casuword32)
+ENTRY(casuword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx /* dst */
@@ -1155,7 +1175,7 @@
#ifdef SMP
lock
#endif
- cmpxchgl %ecx, (%edx) /* Compare and set. */
+ cmpxchgl %ecx,(%edx) /* Compare and set. */
/*
* The old value is in %eax. If the store succeeded it will be the
@@ -1167,6 +1187,8 @@
movl $fusufault,PCB_ONFAULT(%ecx)
movl $0,PCB_ONFAULT(%ecx)
ret
+END(casuword32)
+END(casuword)
/*
* Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
@@ -1185,6 +1207,8 @@
movl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
ret
+END(fuword32)
+END(fuword)
/*
* fuswintr() and suswintr() are specialized variants of fuword16() and
@@ -1197,6 +1221,8 @@
ENTRY(fuswintr)
movl $-1,%eax
ret
+END(suswintr)
+END(fuswintr)
ENTRY(fuword16)
movl PCPU(CURPCB),%ecx
@@ -1209,6 +1235,7 @@
movzwl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
ret
+END(fuword16)
ENTRY(fubyte)
movl PCPU(CURPCB),%ecx
@@ -1221,6 +1248,7 @@
movzbl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
ret
+END(fubyte)
ALIGN_TEXT
fusufault:
@@ -1250,6 +1278,8 @@
movl PCPU(CURPCB),%ecx
movl %eax,PCB_ONFAULT(%ecx)
ret
+END(suword32)
+END(suword)
ENTRY(suword16)
movl PCPU(CURPCB),%ecx
@@ -1265,6 +1295,7 @@
movl PCPU(CURPCB),%ecx /* restore trashed register */
movl %eax,PCB_ONFAULT(%ecx)
ret
+END(suword16)
ENTRY(subyte)
movl PCPU(CURPCB),%ecx
@@ -1280,6 +1311,7 @@
movl PCPU(CURPCB),%ecx /* restore trashed register */
movl %eax,PCB_ONFAULT(%ecx)
ret
+END(subyte)
/*
* copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
@@ -1352,7 +1384,7 @@
popl %edi
popl %esi
ret
-
+END(copyinstr)
/*
* copystr(from, to, maxlen, int *lencopied) - MP SAFE
@@ -1394,6 +1426,7 @@
popl %edi
popl %esi
ret
+END(copystr)
ENTRY(bcmp)
pushl %edi
@@ -1419,7 +1452,7 @@
popl %esi
popl %edi
ret
-
+END(bcmp)
/*
* Handling of special 386 registers and descriptor tables etc
@@ -1449,6 +1482,7 @@
movl $KCSEL,4(%esp)
MEXITCOUNT
lret
+END(lgdt)
/* ssdtosd(*ssdp,*sdp) */
ENTRY(ssdtosd)
@@ -1470,6 +1504,7 @@
movl %ebx,4(%ecx)
popl %ebx
ret
+END(ssdtosd)
/* void reset_dbregs() */
ENTRY(reset_dbregs)
@@ -1481,6 +1516,7 @@
movl %eax,%dr3
movl %eax,%dr6
ret
+END(reset_dbregs)
/*****************************************************************************/
/* setjump, longjump */
@@ -1497,6 +1533,7 @@
movl %edx,20(%eax) /* save eip */
xorl %eax,%eax /* return(0); */
ret
+END(setjmp)
ENTRY(longjmp)
movl 4(%esp),%eax
@@ -1510,6 +1547,7 @@
xorl %eax,%eax /* return(1); */
incl %eax
ret
+END(longjmp)
/*
* Support for BB-profiling (gcc -a). The kernbb program will extract
Index: mp_watchdog.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mp_watchdog.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/mp_watchdog.c -L sys/i386/i386/mp_watchdog.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/mp_watchdog.c
+++ sys/i386/i386/mp_watchdog.c
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/mp_watchdog.c,v 1.4 2005/02/27 22:34:07 pjd Exp $
+ * $FreeBSD: src/sys/i386/i386/mp_watchdog.c,v 1.5 2007/06/04 23:56:33 jeff Exp $
*/
#include "opt_mp_watchdog.h"
@@ -105,9 +105,7 @@
* locks to make sure. Then reset the timer.
*/
mtx_lock(&Giant);
- mtx_lock_spin(&sched_lock);
watchdog_timer = WATCHDOG_THRESHOLD;
- mtx_unlock_spin(&sched_lock);
mtx_unlock(&Giant);
callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
}
@@ -156,34 +154,6 @@
sysctl_watchdog, "I", "");
/*
- * A badly behaved sysctl that leaks the sched lock when written to. Then
- * spin holding it just to make matters worse. This can be used to test the
- * effectiveness of the watchdog by generating a fairly hard and nast hang.
- * Note that Giant is also held in the current world order when we get here.
- */
-static int
-sysctl_leak_schedlock(SYSCTL_HANDLER_ARGS)
-{
- int error, temp;
-
- temp = 0;
- error = sysctl_handle_int(oidp, &temp, 0, req);
- if (error)
- return (error);
-
- if (req->newptr != NULL) {
- if (temp) {
- printf("Leaking the sched lock...\n");
- mtx_lock_spin(&sched_lock);
- while (1);
- }
- }
- return (0);
-}
-SYSCTL_PROC(_debug, OID_AUTO, leak_schedlock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_leak_schedlock, "IU", "");
-
-/*
* Drop into the debugger by sending an IPI NMI to the boot processor.
*/
static void
Index: mp_clock.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mp_clock.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/mp_clock.c -L sys/i386/i386/mp_clock.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/mp_clock.c
+++ sys/i386/i386/mp_clock.c
@@ -8,7 +8,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.20 2007/06/04 18:25:06 dwmalone Exp $");
/*-
* Just when we thought life were beautiful, reality pops its grim face over
@@ -71,7 +71,7 @@
if (piix_timecounter.tc_frequency == 0)
return (EOPNOTSUPP);
freq = piix_freq;
- error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+ error = sysctl_handle_int(oidp, &freq, 0, req);
if (error == 0 && req->newptr != NULL) {
piix_freq = freq;
piix_timecounter.tc_frequency = piix_freq;
Index: mp_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/mp_machdep.c -L sys/i386/i386/mp_machdep.c -u -r1.2 -r1.3
--- sys/i386/i386/mp_machdep.c
+++ sys/i386/i386/mp_machdep.c
@@ -24,14 +24,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.252.2.5.2.1 2006/04/28 06:54:34 cperciva Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.281.2.2 2007/11/28 23:24:07 cperciva Exp $");
#include "opt_apic.h"
#include "opt_cpu.h"
-#include "opt_kdb.h"
#include "opt_kstack_pages.h"
#include "opt_mp_watchdog.h"
#include "opt_sched.h"
+#include "opt_smp.h"
#if !defined(lint)
#if !defined(SMP)
@@ -61,6 +61,7 @@
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -71,12 +72,11 @@
#include <vm/vm_extern.h>
#include <machine/apicreg.h>
-#include <machine/clock.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
+#include <machine/psl.h>
#include <machine/smp.h>
-#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
#include <machine/specialreg.h>
#include <machine/privatespace.h>
@@ -127,29 +127,9 @@
#endif /* CHECK_POINTS */
-/*
- * Values to send to the POST hardware.
- */
-#define MP_BOOTADDRESS_POST 0x10
-#define MP_PROBE_POST 0x11
-#define MPTABLE_PASS1_POST 0x12
-
-#define MP_START_POST 0x13
-#define MP_ENABLE_POST 0x14
-#define MPTABLE_PASS2_POST 0x15
-
-#define START_ALL_APS_POST 0x16
-#define INSTALL_AP_TRAMP_POST 0x17
-#define START_AP_POST 0x18
-
-#define MP_ANNOUNCE_POST 0x19
-
/* lock region used by kernel profiling */
int mcount_lock;
-/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
-int current_postcode;
-
int mp_naps; /* # of Applications processors */
int boot_cpu_id = -1; /* designated BSP */
extern int nkpt;
@@ -177,19 +157,20 @@
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
-#ifdef KDB_STOP_NMI
+#ifdef STOP_NMI
volatile cpumask_t ipi_nmi_pending;
+
+static void ipi_nmi_selected(u_int32_t cpus);
#endif
#ifdef COUNT_IPIS
/* Interrupt counts. */
-#ifdef IPI_PREEMPTION
static u_long *ipi_preempt_counts[MAXCPU];
-#endif
static u_long *ipi_ast_counts[MAXCPU];
u_long *ipi_invltlb_counts[MAXCPU];
u_long *ipi_invlrng_counts[MAXCPU];
u_long *ipi_invlpg_counts[MAXCPU];
+u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
u_long *ipi_lazypmap_counts[MAXCPU];
#endif
@@ -198,6 +179,20 @@
* Local data and functions.
*/
+#ifdef STOP_NMI
+/*
+ * Provide an alternate method of stopping other CPUs. If another CPU has
+ * disabled interrupts the conventional STOP IPI will be blocked. This
+ * NMI-based stop should get through in that case.
+ */
+static int stop_cpus_with_nmi = 1;
+SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
+ &stop_cpus_with_nmi, 0, "");
+TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
+#else
+#define stop_cpus_with_nmi 0
+#endif
+
static u_int logical_cpus;
/* used to hold the AP's until we are ready to release them */
@@ -214,24 +209,25 @@
int cpu_present:1;
int cpu_bsp:1;
int cpu_disabled:1;
-} static cpu_info[MAXCPU];
-static int cpu_apic_ids[MAXCPU];
+} static cpu_info[MAX_APIC_ID + 1];
+int cpu_apic_ids[MAXCPU];
/* Holds pending bitmap based IPIs per CPU */
static volatile u_int cpu_ipi_pending[MAXCPU];
static u_int boot_address;
+static void assign_cpu_ids(void);
+static void install_ap_tramp(void);
static void set_interrupt_apic_ids(void);
static int start_all_aps(void);
-static void install_ap_tramp(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_logical_cpus;
static u_int hyperthreading_cpus;
static cpumask_t hyperthreading_cpus_mask;
-static int hyperthreading_allowed;
+static int hyperthreading_allowed = 1;
static struct sysctl_ctx_list logical_cpu_clist;
static void
@@ -245,28 +241,25 @@
mp_topology(void)
{
struct cpu_group *group;
- int logical_cpus;
int apic_id;
int groups;
int cpu;
/* Build the smp_topology map. */
/* Nothing to do if there is no HTT support. */
- if ((cpu_feature & CPUID_HTT) == 0)
- return;
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
- if (logical_cpus <= 1)
+ if (hyperthreading_cpus <= 1)
return;
group = &mp_groups[0];
groups = 1;
- for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+ for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
if (!cpu_info[apic_id].cpu_present)
continue;
/*
* If the current group has members and we're not a logical
* cpu, create a new group.
*/
- if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
+ if (group->cg_count != 0 &&
+ (apic_id % hyperthreading_cpus) == 0) {
group++;
groups++;
}
@@ -287,7 +280,6 @@
u_int
mp_bootaddress(u_int basemem)
{
- POSTCODE(MP_BOOTADDRESS_POST);
boot_address = trunc_page(basemem); /* round down to 4k boundary */
if ((basemem - boot_address) < bootMP_size)
@@ -300,9 +292,8 @@
cpu_add(u_int apic_id, char boot_cpu)
{
- if (apic_id >= MAXCPU) {
- printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
- apic_id, MAXCPU - 1);
+ if (apic_id > MAX_APIC_ID) {
+ panic("SMP: APIC ID %d too high", apic_id);
return;
}
KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
@@ -315,11 +306,11 @@
boot_cpu_id = apic_id;
cpu_info[apic_id].cpu_bsp = 1;
}
- mp_ncpus++;
+ if (mp_ncpus < MAXCPU)
+ mp_ncpus++;
if (bootverbose)
printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
"AP");
-
}
void
@@ -370,8 +361,6 @@
int i;
u_int threads_per_cache, p[4];
- POSTCODE(MP_START_POST);
-
/* Initialize the logical ID to APIC ID table. */
for (i = 0; i < MAXCPU; i++) {
cpu_apic_ids[i] = -1;
@@ -385,7 +374,11 @@
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(IPI_INVLRNG, IDTVEC(invlrng),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
+
+ /* Install an inter-CPU IPI for cache invalidation. */
+ setidt(IPI_INVLCACHE, IDTVEC(invlcache),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
/* Install an inter-CPU IPI for lazy pmap release */
setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -412,6 +405,8 @@
("BSP's APIC ID doesn't match boot_cpu_id"));
cpu_apic_ids[0] = boot_cpu_id;
+ assign_cpu_ids();
+
/* Start each Application Processor */
start_all_aps();
@@ -463,6 +458,9 @@
}
set_interrupt_apic_ids();
+
+ /* Last, setup the cpu topology now that we have probed CPUs */
+ mp_topology();
}
@@ -474,11 +472,9 @@
{
int i, x;
- POSTCODE(MP_ANNOUNCE_POST);
-
/* List CPUs */
printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1, x = 0; x < MAXCPU; x++) {
+ for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
continue;
if (cpu_info[x].cpu_disabled)
@@ -564,6 +560,9 @@
lidt(&r_idt);
#endif
+ /* Initialize the PAT MSR if present. */
+ pmap_init_pat();
+
/* set up CPU registers and state */
cpu_setregs();
@@ -573,6 +572,16 @@
/* set up SSE registers */
enable_sse();
+#ifdef PAE
+ /* Enable the PTE no-execute bit. */
+ if ((amd_feature & AMDID_NX) != 0) {
+ uint64_t msr;
+
+ msr = rdmsr(MSR_EFER) | EFER_NXE;
+ wrmsr(MSR_EFER, msr);
+ }
+#endif
+
/* A quick check from sanity claus */
if (PCPU_GET(apic_id) != lapic_id()) {
printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
@@ -589,7 +598,7 @@
mtx_lock_spin(&ap_boot_mtx);
/* Init local apic for irq's */
- lapic_setup();
+ lapic_setup(1);
/* Set memory range attributes for this CPU to match the BSP */
mem_range_AP_init();
@@ -626,25 +635,8 @@
while (smp_started == 0)
ia32_pause();
- /* ok, now grab sched_lock and enter the scheduler */
- mtx_lock_spin(&sched_lock);
-
- /*
- * Correct spinlock nesting. The idle thread context that we are
- * borrowing was created so that it would start out with a single
- * spin lock (sched_lock) held in fork_trampoline(). Since we've
- * explicitly acquired locks in this function, the nesting count
- * is now 2 rather than 1. Since we are nested, calling
- * spinlock_exit() will simply adjust the counts without allowing
- * spin lock using code to interrupt us.
- */
- spinlock_exit();
- KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
-
- binuptime(PCPU_PTR(switchtime));
- PCPU_SET(switchticks, ticks);
-
- cpu_throw(NULL, choosethread()); /* doesn't return */
+ /* enter the scheduler */
+ sched_throw(NULL);
panic("scheduler returned us to %s", __func__);
/* NOTREACHED */
@@ -664,24 +656,69 @@
static void
set_interrupt_apic_ids(void)
{
- u_int apic_id;
+ u_int i, apic_id;
- for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
- if (!cpu_info[apic_id].cpu_present)
+ for (i = 0; i < MAXCPU; i++) {
+ apic_id = cpu_apic_ids[i];
+ if (apic_id == -1)
continue;
if (cpu_info[apic_id].cpu_bsp)
continue;
+ if (cpu_info[apic_id].cpu_disabled)
+ continue;
/* Don't let hyperthreads service interrupts. */
if (hyperthreading_cpus > 1 &&
apic_id % hyperthreading_cpus != 0)
continue;
- intr_add_cpu(apic_id);
+ intr_add_cpu(i);
}
}
/*
+ * Assign logical CPU IDs to local APICs.
+ */
+static void
+assign_cpu_ids(void)
+{
+ u_int i;
+
+ /* Check for explicitly disabled CPUs. */
+ for (i = 0; i <= MAX_APIC_ID; i++) {
+ if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
+ continue;
+
+ /* Don't use this CPU if it has been disabled by a tunable. */
+ if (resource_disabled("lapic", i)) {
+ cpu_info[i].cpu_disabled = 1;
+ continue;
+ }
+ }
+
+ /*
+ * Assign CPU IDs to local APIC IDs and disable any CPUs
+ * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
+ * so we only have to assign IDs for APs.
+ */
+ mp_ncpus = 1;
+ for (i = 0; i <= MAX_APIC_ID; i++) {
+ if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
+ cpu_info[i].cpu_disabled)
+ continue;
+
+ if (mp_ncpus < MAXCPU) {
+ cpu_apic_ids[mp_ncpus] = i;
+ mp_ncpus++;
+ } else
+ cpu_info[i].cpu_disabled = 1;
+ }
+ KASSERT(mp_maxid >= mp_ncpus - 1,
+ ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
+ mp_ncpus));
+}
+
+/*
* start each AP in our list
*/
static int
@@ -696,8 +733,6 @@
u_int32_t mpbioswarmvec;
int apic_id, cpu, i, pg;
- POSTCODE(START_ALL_APS_POST);
-
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
/* install the AP 1st level boot code */
@@ -719,24 +754,8 @@
invltlb();
/* start each AP */
- for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
-
- /* Ignore non-existent CPUs and the BSP. */
- if (!cpu_info[apic_id].cpu_present ||
- cpu_info[apic_id].cpu_bsp)
- continue;
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", apic_id)) {
- cpu_info[apic_id].cpu_disabled = 1;
- mp_ncpus--;
- continue;
- }
-
- cpu++;
-
- /* save APIC ID for this logical ID */
- cpu_apic_ids[cpu] = apic_id;
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ apic_id = cpu_apic_ids[cpu];
/* first page of AP's private space */
pg = cpu * i386_btop(sizeof(struct privatespace));
@@ -841,8 +860,6 @@
u_int16_t *dst16;
u_int32_t *dst32;
- POSTCODE(INSTALL_AP_TRAMP_POST);
-
KASSERT (size <= PAGE_SIZE,
("'size' do not fit into PAGE_SIZE, as expected."));
pmap_kenter(va, boot_address);
@@ -893,8 +910,6 @@
int vector, ms;
int cpus;
- POSTCODE(START_AP_POST);
-
/* calculate the vector */
vector = (boot_address >> 12) & 0xff;
@@ -1008,13 +1023,16 @@
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
- mtx_assert(&smp_ipi_mtx, MA_OWNED);
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
+ mtx_unlock_spin(&smp_ipi_mtx);
}
static void
@@ -1042,7 +1060,9 @@
if (ncpu < 1)
return;
}
- mtx_assert(&smp_ipi_mtx, MA_OWNED);
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
@@ -1052,6 +1072,15 @@
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
+ mtx_unlock_spin(&smp_ipi_mtx);
+}
+
+void
+smp_cache_flush(void)
+{
+
+ if (smp_started)
+ smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
}
void
@@ -1128,36 +1157,30 @@
}
}
-
void
-ipi_bitmap_handler(struct clockframe frame)
+ipi_bitmap_handler(struct trapframe frame)
{
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-#ifdef IPI_PREEMPTION
- if (ipi_bitmap & IPI_PREEMPT) {
+ if (ipi_bitmap & (1 << IPI_PREEMPT)) {
+ struct thread *running_thread = curthread;
#ifdef COUNT_IPIS
- *ipi_preempt_counts[cpu]++;
+ (*ipi_preempt_counts[cpu])++;
#endif
- mtx_lock_spin(&sched_lock);
- /* Don't preempt the idle thread */
- if (curthread->td_priority < PRI_MIN_IDLE) {
- struct thread *running_thread = curthread;
- if (running_thread->td_critnest > 1)
- running_thread->td_owepreempt = 1;
- else
- mi_switch(SW_INVOL | SW_PREEMPT, NULL);
- }
- mtx_unlock_spin(&sched_lock);
+ thread_lock(running_thread);
+ if (running_thread->td_critnest > 1)
+ running_thread->td_owepreempt = 1;
+ else
+ mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ thread_unlock(running_thread);
}
-#endif
- if (ipi_bitmap & IPI_AST) {
+ if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
- *ipi_ast_counts[cpu]++;
+ (*ipi_ast_counts[cpu])++;
#endif
/* Nothing to do for AST */
}
@@ -1179,6 +1202,12 @@
ipi = IPI_BITMAP_VECTOR;
}
+#ifdef STOP_NMI
+ if (ipi == IPI_STOP && stop_cpus_with_nmi) {
+ ipi_nmi_selected(cpus);
+ return;
+ }
+#endif
CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
while ((cpu = ffs(cpus)) != 0) {
cpu--;
@@ -1209,6 +1238,10 @@
ipi_all(u_int ipi)
{
+ if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+ ipi_selected(all_cpus, ipi);
+ return;
+ }
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
}
@@ -1220,6 +1253,10 @@
ipi_all_but_self(u_int ipi)
{
+ if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+ ipi_selected(PCPU_GET(other_cpus), ipi);
+ return;
+ }
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
@@ -1231,11 +1268,15 @@
ipi_self(u_int ipi)
{
+ if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+ ipi_selected(PCPU_GET(cpumask), ipi);
+ return;
+ }
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
}
-#ifdef KDB_STOP_NMI
+#ifdef STOP_NMI
/*
* send NMI IPI to selected CPUs
*/
@@ -1245,7 +1286,6 @@
void
ipi_nmi_selected(u_int32_t cpus)
{
-
int cpu;
register_t icrlo;
@@ -1254,10 +1294,8 @@
CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
-
atomic_set_int(&ipi_nmi_pending, cpus);
-
while ((cpu = ffs(cpus)) != 0) {
cpu--;
cpus &= ~(1 << cpu);
@@ -1269,41 +1307,52 @@
if (!lapic_ipi_wait(BEFORE_SPIN))
panic("ipi_nmi_selected: previous IPI has not cleared");
- lapic_ipi_raw(icrlo,cpu_apic_ids[cpu]);
+ lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
}
}
-
int
-ipi_nmi_handler()
+ipi_nmi_handler(void)
{
- int cpu = PCPU_GET(cpuid);
+ int cpumask = PCPU_GET(cpumask);
- if(!(atomic_load_acq_int(&ipi_nmi_pending) & (1 << cpu)))
+ if (!(ipi_nmi_pending & cpumask))
return 1;
- atomic_clear_int(&ipi_nmi_pending,1 << cpu);
+ atomic_clear_int(&ipi_nmi_pending, cpumask);
+ cpustop_handler();
+ return 0;
+}
+
+#endif /* STOP_NMI */
+
+/*
+ * Handle an IPI_STOP by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpustop_handler(void)
+{
+ int cpu = PCPU_GET(cpuid);
+ int cpumask = PCPU_GET(cpumask);
savectx(&stoppcbs[cpu]);
/* Indicate that we are stopped */
- atomic_set_int(&stopped_cpus,1 << cpu);
-
+ atomic_set_int(&stopped_cpus, cpumask);
/* Wait for restart */
- while(!(atomic_load_acq_int(&started_cpus) & (1 << cpu)))
+ while (!(started_cpus & cpumask))
ia32_pause();
- atomic_clear_int(&started_cpus,1 << cpu);
- atomic_clear_int(&stopped_cpus,1 << cpu);
+ atomic_clear_int(&started_cpus, cpumask);
+ atomic_clear_int(&stopped_cpus, cpumask);
- if(cpu == 0 && cpustop_restartfunc != NULL)
+ if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
-
- return 0;
+ cpustop_restartfunc = NULL;
+ }
}
-
-#endif /* KDB_STOP_NMI */
/*
* This is called once the rest of the system is up and running and we're
@@ -1315,11 +1364,9 @@
if (mp_ncpus == 1)
return;
- mtx_lock_spin(&sched_lock);
atomic_store_rel_int(&aps_ready, 1);
while (smp_started == 0)
ia32_pause();
- mtx_unlock_spin(&sched_lock);
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
@@ -1482,10 +1529,8 @@
intrcnt_add(buf, &ipi_invlrng_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d: invlpg", i);
intrcnt_add(buf, &ipi_invlpg_counts[i]);
-#ifdef IPI_PREEMPTION
snprintf(buf, sizeof(buf), "cpu%d: preempt", i);
intrcnt_add(buf, &ipi_preempt_counts[i]);
-#endif
snprintf(buf, sizeof(buf), "cpu%d: ast", i);
intrcnt_add(buf, &ipi_ast_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i);
Index: io_apic.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/io_apic.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/io_apic.c -L sys/i386/i386/io_apic.c -u -r1.2 -r1.3
--- sys/i386/i386/io_apic.c
+++ sys/i386/i386/io_apic.c
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.20.2.4 2006/03/07 18:33:21 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.35 2007/06/05 18:57:48 jhb Exp $");
#include "opt_isa.h"
@@ -36,11 +36,15 @@
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/kernel.h>
-#include <sys/malloc.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -48,6 +52,7 @@
#include <machine/frame.h>
#include <machine/intr_machdep.h>
#include <machine/apicvar.h>
+#include <machine/resource.h>
#include <machine/segments.h>
#define IOAPIC_ISA_INTS 16
@@ -60,9 +65,7 @@
#define IRQ_SMI (NUM_IO_INTS + 3)
#define IRQ_DISABLED (NUM_IO_INTS + 4)
-#define TODO printf("%s: not implemented!\n", __func__)
-
-static MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures");
+static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
/*
* I/O APIC interrupt source driver. Each pin is assigned an IRQ cookie
@@ -72,6 +75,10 @@
* IRQs behave as PCI IRQs by default. We also assume that the pin for
* IRQ 0 is actually an ExtINT pin. The apic enumerators override the
* configuration of individual pins as indicated by their tables.
+ *
+ * Documentation for the I/O APIC: "82093AA I/O Advanced Programmable
+ * Interrupt Controller (IOAPIC)", May 1996, Intel Corp.
+ * ftp://download.intel.com/design/chipsets/datashts/29056601.pdf
*/
struct ioapic_intsrc {
@@ -84,6 +91,7 @@
u_int io_edgetrigger:1;
u_int io_masked:1;
int io_bus:4;
+ uint32_t io_lowreg;
};
struct ioapic {
@@ -93,6 +101,7 @@
u_int io_intbase:8; /* System Interrupt base */
u_int io_numintr:8;
volatile ioapic_t *io_addr; /* XXX: should use bus_space */
+ vm_paddr_t io_paddr;
STAILQ_ENTRY(ioapic) io_next;
struct ioapic_intsrc io_pins[0];
};
@@ -105,20 +114,20 @@
static void ioapic_disable_source(struct intsrc *isrc, int eoi);
static void ioapic_eoi_source(struct intsrc *isrc);
static void ioapic_enable_intr(struct intsrc *isrc);
+static void ioapic_disable_intr(struct intsrc *isrc);
static int ioapic_vector(struct intsrc *isrc);
static int ioapic_source_pending(struct intsrc *isrc);
static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
-static void ioapic_suspend(struct intsrc *isrc);
-static void ioapic_resume(struct intsrc *isrc);
+static void ioapic_resume(struct pic *pic);
static void ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
ioapic_eoi_source, ioapic_enable_intr,
- ioapic_vector, ioapic_source_pending,
- ioapic_suspend, ioapic_resume,
+ ioapic_disable_intr, ioapic_vector,
+ ioapic_source_pending, NULL, ioapic_resume,
ioapic_config_intr, ioapic_assign_cpu };
static int next_ioapic_base;
@@ -202,9 +211,7 @@
mtx_lock_spin(&icu_lock);
if (intpin->io_masked) {
- flags = ioapic_read(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin));
- flags &= ~(IOART_INTMASK);
+ flags = intpin->io_lowreg & ~IOART_INTMASK;
ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
flags);
intpin->io_masked = 0;
@@ -221,9 +228,7 @@
mtx_lock_spin(&icu_lock);
if (!intpin->io_masked && !intpin->io_edgetrigger) {
- flags = ioapic_read(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin));
- flags |= IOART_INTMSET;
+ flags = intpin->io_lowreg | IOART_INTMSET;
ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
flags);
intpin->io_masked = 1;
@@ -308,6 +313,7 @@
/* Write the values to the APIC. */
mtx_lock_spin(&icu_lock);
+ intpin->io_lowreg = low;
ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin));
value &= ~IOART_DEST;
@@ -354,6 +360,23 @@
}
}
+static void
+ioapic_disable_intr(struct intsrc *isrc)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+ u_int vector;
+
+ if (intpin->io_vector != 0) {
+ /* Mask this interrupt pin and free its APIC vector. */
+ vector = intpin->io_vector;
+ apic_disable_vector(vector);
+ intpin->io_masked = 1;
+ intpin->io_vector = 0;
+ ioapic_program_intpin(intpin);
+ apic_free_vector(vector, intpin->io_irq);
+ }
+}
+
static int
ioapic_vector(struct intsrc *isrc)
{
@@ -416,24 +439,20 @@
}
static void
-ioapic_suspend(struct intsrc *isrc)
-{
-
- TODO;
-}
-
-static void
-ioapic_resume(struct intsrc *isrc)
+ioapic_resume(struct pic *pic)
{
+ struct ioapic *io = (struct ioapic *)pic;
+ int i;
- ioapic_program_intpin((struct ioapic_intsrc *)isrc);
+ for (i = 0; i < io->io_numintr; i++)
+ ioapic_program_intpin(&io->io_pins[i]);
}
/*
* Create a plain I/O APIC object.
*/
void *
-ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
+ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase)
{
struct ioapic *io;
struct ioapic_intsrc *intpin;
@@ -442,7 +461,7 @@
uint32_t value;
/* Map the register window so we can access the device. */
- apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION);
+ apic = pmap_mapdev(addr, IOAPIC_MEM_REGION);
mtx_lock_spin(&icu_lock);
value = ioapic_read(apic, IOAPIC_VER);
mtx_unlock_spin(&icu_lock);
@@ -473,13 +492,14 @@
intbase = next_ioapic_base;
printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
intbase);
- } else if (intbase != next_ioapic_base)
+ } else if (intbase != next_ioapic_base && bootverbose)
printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
io->io_id, intbase, next_ioapic_base);
io->io_intbase = intbase;
next_ioapic_base = intbase + numintr;
io->io_numintr = numintr;
io->io_addr = apic;
+ io->io_paddr = addr;
/*
* Initialize pins. Start off with interrupts disabled. Default
@@ -517,13 +537,6 @@
* be routed to other CPUs later after they are enabled.
*/
intpin->io_cpu = PCPU_GET(apic_id);
- if (bootverbose && intpin->io_irq != IRQ_DISABLED) {
- printf("ioapic%u: intpin %d -> ", io->io_id, i);
- ioapic_print_irq(intpin);
- printf(" (%s, %s)\n", intpin->io_edgetrigger ?
- "edge" : "level", intpin->io_activehi ? "high" :
- "low");
- }
value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
}
@@ -588,6 +601,8 @@
return (EINVAL);
if (io->io_pins[pin].io_irq >= NUM_IO_INTS)
return (EINVAL);
+ if (io->io_pins[pin].io_bus == bus_type)
+ return (0);
io->io_pins[pin].io_bus = bus_type;
if (bootverbose)
printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
@@ -671,13 +686,17 @@
ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
{
struct ioapic *io;
+ int activehi;
io = (struct ioapic *)cookie;
if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
return (EINVAL);
if (io->io_pins[pin].io_irq >= NUM_IO_INTS)
return (EINVAL);
- io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH);
+ activehi = (pol == INTR_POLARITY_HIGH);
+ if (io->io_pins[pin].io_activehi == activehi)
+ return (0);
+ io->io_pins[pin].io_activehi = activehi;
if (bootverbose)
printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
pol == INTR_POLARITY_HIGH ? "high" : "low");
@@ -688,13 +707,17 @@
ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
{
struct ioapic *io;
+ int edgetrigger;
io = (struct ioapic *)cookie;
if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
return (EINVAL);
if (io->io_pins[pin].io_irq >= NUM_IO_INTS)
return (EINVAL);
- io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+ edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+ if (io->io_pins[pin].io_edgetrigger == edgetrigger)
+ return (0);
+ io->io_pins[pin].io_edgetrigger = edgetrigger;
if (bootverbose)
printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
@@ -724,7 +747,129 @@
io->io_intbase + io->io_numintr - 1);
/* Register valid pins as interrupt sources. */
+ intr_register_pic(&io->io_pic);
for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
if (pin->io_irq < NUM_IO_INTS)
intr_register_source(&pin->io_intsrc);
}
+
+/* A simple new-bus driver to consume PCI I/O APIC devices. */
+static int
+ioapic_pci_probe(device_t dev)
+{
+
+ if (pci_get_class(dev) == PCIC_BASEPERIPH &&
+ pci_get_subclass(dev) == PCIS_BASEPERIPH_PIC) {
+ switch (pci_get_progif(dev)) {
+ case PCIP_BASEPERIPH_PIC_IO_APIC:
+ device_set_desc(dev, "IO APIC");
+ break;
+ case PCIP_BASEPERIPH_PIC_IOX_APIC:
+ device_set_desc(dev, "IO(x) APIC");
+ break;
+ default:
+ return (ENXIO);
+ }
+ device_quiet(dev);
+ return (-10000);
+ }
+ return (ENXIO);
+}
+
+static int
+ioapic_pci_attach(device_t dev)
+{
+
+ return (0);
+}
+
+static device_method_t ioapic_pci_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ioapic_pci_probe),
+ DEVMETHOD(device_attach, ioapic_pci_attach),
+
+ { 0, 0 }
+};
+
+DEFINE_CLASS_0(ioapic, ioapic_pci_driver, ioapic_pci_methods, 0);
+
+static devclass_t ioapic_devclass;
+DRIVER_MODULE(ioapic, pci, ioapic_pci_driver, ioapic_devclass, 0, 0);
+
+/*
+ * A new-bus driver to consume the memory resources associated with
+ * the APICs in the system. On some systems ACPI or PnPBIOS system
+ * resource devices may already claim these resources. To keep from
+ * breaking those devices, we attach ourself to the nexus device after
+ * legacy0 and acpi0 and ignore any allocation failures.
+ */
+static void
+apic_identify(driver_t *driver, device_t parent)
+{
+
+ /*
+ * Add at order 12. acpi0 is probed at order 10 and legacy0
+ * is probed at order 11.
+ */
+ if (lapic_paddr != 0)
+ BUS_ADD_CHILD(parent, 12, "apic", 0);
+}
+
+static int
+apic_probe(device_t dev)
+{
+
+ device_set_desc(dev, "APIC resources");
+ device_quiet(dev);
+ return (0);
+}
+
+static void
+apic_add_resource(device_t dev, int rid, vm_paddr_t base, size_t length)
+{
+ int error;
+
+#ifdef PAE
+ /*
+ * Resources use long's to track resources, so we can't
+ * include memory regions above 4GB.
+ */
+ if (base >= ~0ul)
+ return;
+#endif
+ error = bus_set_resource(dev, SYS_RES_MEMORY, rid, base, length);
+ if (error)
+ panic("apic_add_resource: resource %d failed set with %d", rid,
+ error);
+ bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0);
+}
+
+static int
+apic_attach(device_t dev)
+{
+ struct ioapic *io;
+ int i;
+
+ /* Reserve the local APIC. */
+ apic_add_resource(dev, 0, lapic_paddr, sizeof(lapic_t));
+ i = 1;
+ STAILQ_FOREACH(io, &ioapic_list, io_next) {
+ apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION);
+ i++;
+ }
+ return (0);
+}
+
+static device_method_t apic_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, apic_identify),
+ DEVMETHOD(device_probe, apic_probe),
+ DEVMETHOD(device_attach, apic_attach),
+
+ { 0, 0 }
+};
+
+DEFINE_CLASS_0(apic, apic_driver, apic_methods, 0);
+
+static devclass_t apic_devclass;
+DRIVER_MODULE(apic, nexus, apic_driver, apic_devclass, 0, 0);
Index: trap.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/trap.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/trap.c -L sys/i386/i386/trap.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/trap.c
+++ sys/i386/i386/trap.c
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.277.2.3 2005/11/28 20:03:04 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.307.2.1 2007/12/06 14:20:24 kib Exp $");
/*
* 386 Trap and System call handling
@@ -77,6 +77,7 @@
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
+#include <security/audit/audit.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -101,8 +102,8 @@
#include <machine/clock.h>
#endif
-extern void trap(struct trapframe frame);
-extern void syscall(struct trapframe frame);
+extern void trap(struct trapframe *frame);
+extern void syscall(struct trapframe *frame);
static int trap_pfault(struct trapframe *, int, vm_offset_t);
static void trap_fatal(struct trapframe *, vm_offset_t);
@@ -157,10 +158,11 @@
static int panic_on_nmi = 1;
SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
+static int prot_fault_translation = 0;
+SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
+ &prot_fault_translation, 0, "Select signal to deliver on protection fault");
-#ifdef WITNESS
extern char *syscallnames[];
-#endif
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
@@ -170,28 +172,31 @@
*/
void
-trap(frame)
- struct trapframe frame;
+trap(struct trapframe *frame)
{
struct thread *td = curthread;
struct proc *p = td->td_proc;
- u_int sticks = 0;
- int i = 0, ucode = 0, type, code;
+ int i = 0, ucode = 0, code;
+ u_int type;
+ register_t addr = 0;
vm_offset_t eva;
+ ksiginfo_t ksi;
#ifdef POWERFAIL_NMI
static int lastalert = 0;
#endif
- PCPU_LAZY_INC(cnt.v_trap);
- type = frame.tf_trapno;
+ PCPU_INC(cnt.v_trap);
+ type = frame->tf_trapno;
-#ifdef KDB_STOP_NMI
- /* Handler for NMI IPIs used for debugging */
+#ifdef SMP
+#ifdef STOP_NMI
+ /* Handler for NMI IPIs used for stopping CPUs. */
if (type == T_NMI) {
if (ipi_nmi_handler() == 0)
goto out;
}
-#endif /* KDB_STOP_NMI */
+#endif /* STOP_NMI */
+#endif /* SMP */
#ifdef KDB
if (kdb_active) {
@@ -209,12 +214,12 @@
* return immediately.
*/
if (type == T_NMI && pmc_intr &&
- (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame.tf_eip,
- TRAPF_USERMODE(&frame)))
+ (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame->tf_eip,
+ TRAPF_USERMODE(frame)))
goto out;
#endif
- if ((frame.tf_eflags & PSL_I) == 0) {
+ if ((frame->tf_eflags & PSL_I) == 0) {
/*
* Buggy application or kernel code has disabled
* interrupts and then trapped. Enabling interrupts
@@ -222,12 +227,12 @@
* interrupts disabled until they are accidentally
* enabled later.
*/
- if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
+ if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
printf(
"pid %ld (%s): trap %d with interrupts disabled\n",
(long)curproc->p_pid, curproc->p_comm, type);
else if (type != T_BPTFLT && type != T_TRCTRAP &&
- frame.tf_eip != (int)cpu_switch_load_gs) {
+ frame->tf_eip != (int)cpu_switch_load_gs) {
/*
* XXX not quite right, since this may be for a
* multiple fault in user mode.
@@ -236,17 +241,17 @@
type);
/*
* Page faults need interrupts disabled until later,
- * and we shouldn't enable interrupts while in a
- * critical section or if servicing an NMI.
+ * and we shouldn't enable interrupts while holding
+ * a spin lock or if servicing an NMI.
*/
if (type != T_NMI && type != T_PAGEFLT &&
- td->td_critnest == 0)
+ td->td_md.md_spinlock_count == 0)
enable_intr();
}
}
eva = 0;
- code = frame.tf_err;
+ code = frame->tf_err;
if (type == T_PAGEFLT) {
/*
* For some Cyrix CPUs, %cr2 is clobbered by
@@ -261,35 +266,45 @@
* do the VM lookup, so just consider it a fatal trap so the
* kernel can print out a useful trap message and even get
* to the debugger.
+ *
+ * If we get a page fault while holding a non-sleepable
+ * lock, then it is most likely a fatal kernel page fault.
+ * If WITNESS is enabled, then it's going to whine about
+ * bogus LORs with various VM locks, so just skip to the
+ * fatal trap handling directly.
*/
eva = rcr2();
- if (td->td_critnest == 0)
- enable_intr();
+ if (td->td_critnest != 0 ||
+ WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+ "Kernel page fault") != 0)
+ trap_fatal(frame, eva);
else
- trap_fatal(&frame, eva);
+ enable_intr();
}
- if ((ISPL(frame.tf_cs) == SEL_UPL) ||
- ((frame.tf_eflags & PSL_VM) &&
+ if ((ISPL(frame->tf_cs) == SEL_UPL) ||
+ ((frame->tf_eflags & PSL_VM) &&
!(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
/* user trap */
- sticks = td->td_sticks;
- td->td_frame = &frame;
+ td->td_pticks = 0;
+ td->td_frame = frame;
+ addr = frame->tf_eip;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
switch (type) {
case T_PRIVINFLT: /* privileged instruction fault */
- ucode = type;
i = SIGILL;
+ ucode = ILL_PRVOPC;
break;
case T_BPTFLT: /* bpt instruction fault */
case T_TRCTRAP: /* trace trap */
enable_intr();
- frame.tf_eflags &= ~PSL_T;
+ frame->tf_eflags &= ~PSL_T;
i = SIGTRAP;
+ ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
break;
case T_ARITHTRAP: /* arithmetic trap */
@@ -298,7 +313,7 @@
if (ucode == -1)
goto userout;
#else
- ucode = code;
+ ucode = 0;
#endif
i = SIGFPE;
break;
@@ -310,27 +325,36 @@
*/
case T_PROTFLT: /* general protection fault */
case T_STKFLT: /* stack fault */
- if (frame.tf_eflags & PSL_VM) {
- i = vm86_emulate((struct vm86frame *)&frame);
+ if (frame->tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)frame);
if (i == 0)
goto user;
break;
}
- /* FALLTHROUGH */
-
+ i = SIGBUS;
+ ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
+ break;
case T_SEGNPFLT: /* segment not present fault */
+ i = SIGBUS;
+ ucode = BUS_ADRERR;
+ break;
case T_TSSFLT: /* invalid TSS fault */
+ i = SIGBUS;
+ ucode = BUS_OBJERR;
+ break;
case T_DOUBLEFLT: /* double fault */
default:
- ucode = code + BUS_SEGM_FAULT ;
i = SIGBUS;
+ ucode = BUS_OBJERR;
break;
case T_PAGEFLT: /* page fault */
+#ifdef KSE
if (td->td_pflags & TDP_SA)
thread_user_enter(td);
+#endif
- i = trap_pfault(&frame, TRUE, eva);
+ i = trap_pfault(frame, TRUE, eva);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -338,10 +362,10 @@
* treat the fault as an illegal instruction
* (T_PRIVINFLT) instead of a page fault.
*/
- type = frame.tf_trapno = T_PRIVINFLT;
+ type = frame->tf_trapno = T_PRIVINFLT;
/* Proceed as in that case. */
- ucode = type;
+ ucode = ILL_PRVOPC;
i = SIGILL;
break;
}
@@ -351,7 +375,37 @@
if (i == 0)
goto user;
- ucode = T_PAGEFLT;
+ if (i == SIGSEGV)
+ ucode = SEGV_MAPERR;
+ else {
+ if (prot_fault_translation == 0) {
+ /*
+ * Autodetect.
+ * This check also covers the images
+ * without the ABI-tag ELF note.
+ */
+ if (p->p_osrel >= 700004) {
+ i = SIGSEGV;
+ ucode = SEGV_ACCERR;
+ } else {
+ i = SIGBUS;
+ ucode = BUS_PAGE_FAULT;
+ }
+ } else if (prot_fault_translation == 1) {
+ /*
+ * Always compat mode.
+ */
+ i = SIGBUS;
+ ucode = BUS_PAGE_FAULT;
+ } else {
+ /*
+ * Always SIGSEGV mode.
+ */
+ i = SIGSEGV;
+ ucode = SEGV_ACCERR;
+ }
+ }
+ addr = eva;
break;
case T_DIVIDE: /* integer divide fault */
@@ -384,7 +438,7 @@
*/
if (kdb_on_nmi) {
printf ("NMI ... going to debugger\n");
- kdb_trap(type, 0, &frame);
+ kdb_trap(type, 0, frame);
}
#endif /* KDB */
goto userout;
@@ -410,12 +464,14 @@
if (npxdna())
goto userout;
#endif
- i = SIGFPE;
- ucode = FPE_FPU_NP_TRAP;
+ printf("pid %d killed due to lack of floating point\n",
+ p->p_pid);
+ i = SIGKILL;
+ ucode = 0;
break;
case T_FPOPFLT: /* FPU operand fetch fault */
- ucode = T_FPOPFLT;
+ ucode = ILL_COPROC;
i = SIGILL;
break;
@@ -431,7 +487,7 @@
("kernel trap doesn't have ucred"));
switch (type) {
case T_PAGEFLT: /* page fault */
- (void) trap_pfault(&frame, FALSE, eva);
+ (void) trap_pfault(frame, FALSE, eva);
goto out;
case T_DNA:
@@ -453,13 +509,13 @@
*/
case T_PROTFLT: /* general protection fault */
case T_STKFLT: /* stack fault */
- if (frame.tf_eflags & PSL_VM) {
- i = vm86_emulate((struct vm86frame *)&frame);
+ if (frame->tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)frame);
if (i != 0)
/*
* returns to original process
*/
- vm86_trap((struct vm86frame *)&frame);
+ vm86_trap((struct vm86frame *)frame);
goto out;
}
if (type == T_STKFLT)
@@ -480,7 +536,7 @@
* (XXX) so that we can continue, and generate
* a signal.
*/
- if (frame.tf_eip == (int)cpu_switch_load_gs) {
+ if (frame->tf_eip == (int)cpu_switch_load_gs) {
PCPU_GET(curpcb)->pcb_gs = 0;
#if 0
PROC_LOCK(p);
@@ -503,24 +559,24 @@
* selectors and pointers when the user changes
* them.
*/
- if (frame.tf_eip == (int)doreti_iret) {
- frame.tf_eip = (int)doreti_iret_fault;
+ if (frame->tf_eip == (int)doreti_iret) {
+ frame->tf_eip = (int)doreti_iret_fault;
goto out;
}
- if (frame.tf_eip == (int)doreti_popl_ds) {
- frame.tf_eip = (int)doreti_popl_ds_fault;
+ if (frame->tf_eip == (int)doreti_popl_ds) {
+ frame->tf_eip = (int)doreti_popl_ds_fault;
goto out;
}
- if (frame.tf_eip == (int)doreti_popl_es) {
- frame.tf_eip = (int)doreti_popl_es_fault;
+ if (frame->tf_eip == (int)doreti_popl_es) {
+ frame->tf_eip = (int)doreti_popl_es_fault;
goto out;
}
- if (frame.tf_eip == (int)doreti_popl_fs) {
- frame.tf_eip = (int)doreti_popl_fs_fault;
+ if (frame->tf_eip == (int)doreti_popl_fs) {
+ frame->tf_eip = (int)doreti_popl_fs_fault;
goto out;
}
if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame.tf_eip =
+ frame->tf_eip =
(int)PCPU_GET(curpcb)->pcb_onfault;
goto out;
}
@@ -536,14 +592,14 @@
* problem here and not every time the kernel is
* entered.
*/
- if (frame.tf_eflags & PSL_NT) {
- frame.tf_eflags &= ~PSL_NT;
+ if (frame->tf_eflags & PSL_NT) {
+ frame->tf_eflags &= ~PSL_NT;
goto out;
}
break;
case T_TRCTRAP: /* trace trap */
- if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) {
+ if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
/*
* We've just entered system mode via the
* syscall lcall. Continue single stepping
@@ -552,12 +608,12 @@
*/
goto out;
}
- if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
+ if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
/*
* The syscall handler has now saved the
* flags. Stop single stepping it.
*/
- frame.tf_eflags &= ~PSL_T;
+ frame->tf_eflags &= ~PSL_T;
goto out;
}
/*
@@ -589,8 +645,7 @@
* Otherwise, debugger traps "can't happen".
*/
#ifdef KDB
- /* XXX Giant */
- if (kdb_trap(type, 0, &frame))
+ if (kdb_trap(type, 0, frame))
goto out;
#endif
break;
@@ -617,7 +672,7 @@
*/
if (kdb_on_nmi) {
printf ("NMI ... going to debugger\n");
- kdb_trap(type, 0, &frame);
+ kdb_trap(type, 0, frame);
}
#endif /* KDB */
goto out;
@@ -628,7 +683,7 @@
#endif /* DEV_ISA */
}
- trap_fatal(&frame, eva);
+ trap_fatal(frame, eva);
goto out;
}
@@ -636,7 +691,12 @@
if (*p->p_sysent->sv_transtrap)
i = (*p->p_sysent->sv_transtrap)(i, type);
- trapsignal(td, i, ucode);
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = i;
+ ksi.ksi_code = ucode;
+ ksi.ksi_addr = (void *)addr;
+ ksi.ksi_trapno = type;
+ trapsignal(td, &ksi);
#ifdef DEBUG
if (type <= MAX_TRAP_MSG) {
@@ -649,7 +709,7 @@
#endif
user:
- userret(td, &frame, sticks);
+ userret(td, frame);
mtx_assert(&Giant, MA_NOTOWNED);
userout:
out:
@@ -664,7 +724,7 @@
{
vm_offset_t va;
struct vmspace *vm = NULL;
- vm_map_t map = 0;
+ vm_map_t map;
int rv = 0;
vm_prot_t ftype;
struct thread *td = curthread;
@@ -703,8 +763,16 @@
map = &vm->vm_map;
}
+ /*
+ * PGEX_I is defined only if the execute disable bit capability is
+ * supported and enabled.
+ */
if (frame->tf_err & PGEX_W)
ftype = VM_PROT_WRITE;
+#ifdef PAE
+ else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
+ ftype = VM_PROT_EXECUTE;
+#endif
else
ftype = VM_PROT_READ;
@@ -745,9 +813,6 @@
return (-1);
}
- /* kludge to pass faulting virtual address to sendsig */
- frame->tf_err = eva;
-
return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
}
@@ -756,7 +821,8 @@
struct trapframe *frame;
vm_offset_t eva;
{
- int code, type, ss, esp;
+ int code, ss, esp;
+ u_int type;
struct soft_segment_descriptor softseg;
char *msg;
@@ -813,22 +879,19 @@
printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
printf("current process = ");
if (curproc) {
- printf("%lu (%s)\n",
- (u_long)curproc->p_pid, curproc->p_comm ?
- curproc->p_comm : "");
+ printf("%lu (%s)\n", (u_long)curproc->p_pid, curproc->p_comm);
} else {
printf("Idle\n");
}
#ifdef KDB
if (debugger_on_panic || kdb_active) {
- register_t eflags;
- eflags = intr_disable();
+ frame->tf_err = eva; /* smuggle fault address to ddb */
if (kdb_trap(type, 0, frame)) {
- intr_restore(eflags);
+ frame->tf_err = code; /* restore error code */
return;
}
- intr_restore(eflags);
+ frame->tf_err = code; /* restore error code */
}
#endif
printf("trap number = %d\n", type);
@@ -871,50 +934,45 @@
* A system call is essentially treated as a trap.
*/
void
-syscall(frame)
- struct trapframe frame;
+syscall(struct trapframe *frame)
{
caddr_t params;
struct sysent *callp;
struct thread *td = curthread;
struct proc *p = td->td_proc;
register_t orig_tf_eflags;
- u_int sticks;
int error;
int narg;
int args[8];
u_int code;
+ ksiginfo_t ksi;
- /*
- * note: PCPU_LAZY_INC() can only be used if we can afford
- * occassional inaccuracy in the count.
- */
- PCPU_LAZY_INC(cnt.v_syscall);
+ PCPU_INC(cnt.v_syscall);
#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL) {
- mtx_lock(&Giant); /* try to stabilize the system XXX */
+ if (ISPL(frame->tf_cs) != SEL_UPL) {
panic("syscall");
/* NOT REACHED */
- mtx_unlock(&Giant);
}
#endif
- sticks = td->td_sticks;
- td->td_frame = &frame;
+ td->td_pticks = 0;
+ td->td_frame = frame;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+#ifdef KSE
if (p->p_flag & P_SA)
thread_user_enter(td);
- params = (caddr_t)frame.tf_esp + sizeof(int);
- code = frame.tf_eax;
- orig_tf_eflags = frame.tf_eflags;
+#endif
+ params = (caddr_t)frame->tf_esp + sizeof(int);
+ code = frame->tf_eax;
+ orig_tf_eflags = frame->tf_eflags;
if (p->p_sysent->sv_prepsyscall) {
/*
* The prep code is MP aware.
*/
- (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms);
+ (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -944,7 +1002,7 @@
else
callp = &p->p_sysent->sv_table[code];
- narg = callp->sy_narg & SYF_ARGMASK;
+ narg = callp->sy_narg;
/*
* copyin and the ktrsyscall()/ktrsysret() code is MP-aware
@@ -963,29 +1021,26 @@
CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
td->td_proc->p_pid, td->td_proc->p_comm, code);
- /*
- * Try to run the syscall without Giant if the syscall
- * is MP safe.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_lock(&Giant);
+ td->td_syscalls++;
if (error == 0) {
td->td_retval[0] = 0;
- td->td_retval[1] = frame.tf_edx;
+ td->td_retval[1] = frame->tf_edx;
STOPEVENT(p, S_SCE, narg);
PTRACESTOP_SC(p, td, S_PT_SCE);
+ AUDIT_SYSCALL_ENTER(code, td);
error = (*callp->sy_call)(td, args);
+ AUDIT_SYSCALL_EXIT(error, td);
}
switch (error) {
case 0:
- frame.tf_eax = td->td_retval[0];
- frame.tf_edx = td->td_retval[1];
- frame.tf_eflags &= ~PSL_C;
+ frame->tf_eax = td->td_retval[0];
+ frame->tf_edx = td->td_retval[1];
+ frame->tf_eflags &= ~PSL_C;
break;
case ERESTART:
@@ -993,7 +1048,7 @@
* Reconstruct pc, assuming lcall $X,y is 7 bytes,
* int 0x80 is 2 bytes. We saved this in tf_err.
*/
- frame.tf_eip -= frame.tf_err;
+ frame->tf_eip -= frame->tf_err;
break;
case EJUSTRETURN:
@@ -1006,29 +1061,40 @@
else
error = p->p_sysent->sv_errtbl[error];
}
- frame.tf_eax = error;
- frame.tf_eflags |= PSL_C;
+ frame->tf_eax = error;
+ frame->tf_eflags |= PSL_C;
break;
}
/*
- * Release Giant if we previously set it.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_unlock(&Giant);
-
- /*
* Traced syscall.
*/
if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
- frame.tf_eflags &= ~PSL_T;
- trapsignal(td, SIGTRAP, 0);
+ frame->tf_eflags &= ~PSL_T;
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGTRAP;
+ ksi.ksi_code = TRAP_TRACE;
+ ksi.ksi_addr = (void *)frame->tf_eip;
+ trapsignal(td, &ksi);
}
/*
+ * Check for misbehavior.
+ */
+ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
+ (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
+ KASSERT(td->td_critnest == 0,
+ ("System call %s returning in a critical section",
+ (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"));
+ KASSERT(td->td_locks == 0,
+ ("System call %s returning with %d locks held",
+ (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???",
+ td->td_locks));
+
+ /*
* Handle reschedule and other end-of-syscall issues
*/
- userret(td, &frame, sticks);
+ userret(td, frame);
CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
td->td_proc->p_pid, td->td_proc->p_comm, code);
@@ -1046,10 +1112,5 @@
STOPEVENT(p, S_SCX, code);
PTRACESTOP_SC(p, td, S_PT_SCX);
-
- WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
- (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
- mtx_assert(&sched_lock, MA_NOTOWNED);
- mtx_assert(&Giant, MA_NOTOWNED);
}
Index: elf_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/elf_machdep.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/elf_machdep.c -L sys/i386/i386/elf_machdep.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/elf_machdep.c
+++ sys/i386/i386/elf_machdep.c
@@ -24,7 +24,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/elf_machdep.c,v 1.20 2004/08/11 02:35:05 marcel Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/elf_machdep.c,v 1.22 2007/05/22 02:22:58 kan Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -82,6 +82,7 @@
"/libexec/ld-elf.so.1",
&elf32_freebsd_sysvec,
NULL,
+ BI_CAN_EXEC_DYN,
};
SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY,
@@ -96,6 +97,7 @@
"/usr/libexec/ld-elf.so.1",
&elf32_freebsd_sysvec,
NULL,
+ BI_CAN_EXEC_DYN,
};
SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
Index: mem.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mem.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/mem.c -L sys/i386/i386/mem.c -u -r1.2 -r1.3
--- sys/i386/i386/mem.c
+++ sys/i386/i386/mem.c
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mem.c,v 1.116.8.1 2006/04/04 19:46:44 ups Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mem.c,v 1.117 2006/01/23 15:46:09 ups Exp $");
/*
* Memory special file
Index: busdma_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/busdma_machdep.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/busdma_machdep.c -L sys/i386/i386/busdma_machdep.c -u -r1.2 -r1.3
--- sys/i386/i386/busdma_machdep.c
+++ sys/i386/i386/busdma_machdep.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.74.2.2 2006/03/28 06:28:37 delphij Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.89 2007/06/11 17:57:24 mjacob Exp $");
#include <sys/param.h>
#include <sys/kdb.h>
@@ -51,8 +51,11 @@
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/md_var.h>
+#include <machine/specialreg.h>
#define MAX_BPAGES 512
+#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3
+#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4
struct bounce_zone;
@@ -137,7 +140,9 @@
static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
vm_offset_t vaddr, bus_size_t size);
static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
-static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+int _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+ bus_size_t buflen, int flags, int *nb);
/*
* Return true if a match is made.
@@ -147,7 +152,7 @@
* If paddr is within the bounds of the dma tag then call the filter callback
* to check for a match, if there is no filter callback then assume a match.
*/
-static __inline int
+int
run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
{
int retval;
@@ -202,8 +207,6 @@
panic("driver error: busdma dflt_lock called");
}
-#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3
-#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4
/*
* Allocate a device specific dma_tag.
*/
@@ -222,6 +225,10 @@
if (boundary != 0 && boundary < maxsegsz)
maxsegsz = boundary;
+ if (maxsegsz == 0) {
+ return (EINVAL);
+ }
+
/* Return a NULL tag on failure */
*dmat = NULL;
@@ -265,6 +272,9 @@
else if (parent->boundary != 0)
newtag->boundary = MIN(parent->boundary,
newtag->boundary);
+ if ((newtag->filter != NULL) ||
+ ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0))
+ newtag->flags |= BUS_DMA_COULD_BOUNCE;
if (newtag->filter == NULL) {
/*
* Short circuit looking at our parent directly
@@ -495,7 +505,16 @@
}
}
+ /*
+ * XXX:
+ * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact
+ * alignment guarantees of malloc need to be nailed down, and the
+ * code below should be rewritten to take that into account.
+ *
+ * In the meantime, we'll warn the user if malloc gets it wrong.
+ */
if ((dmat->maxsize <= PAGE_SIZE) &&
+ (dmat->alignment < dmat->maxsize) &&
dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem)) {
*vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags);
} else {
@@ -513,7 +532,12 @@
CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
__func__, dmat, dmat->flags, ENOMEM);
return (ENOMEM);
+ } else if ((uintptr_t)*vaddr & (dmat->alignment - 1)) {
+ printf("bus_dmamem_alloc failed to align memory properly.\n");
}
+ if (flags & BUS_DMA_NOCACHE)
+ pmap_change_attr((vm_offset_t)*vaddr, dmat->maxsize,
+ PAT_UNCACHEABLE);
CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
__func__, dmat, dmat->flags, ENOMEM);
return (0);
@@ -532,8 +556,10 @@
*/
if (map != NULL)
panic("bus_dmamem_free: Invalid map freed\n");
- if ((dmat->maxsize <= PAGE_SIZE)
- && dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem))
+ pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, PAT_WRITE_BACK);
+ if ((dmat->maxsize <= PAGE_SIZE) &&
+ (dmat->alignment < dmat->maxsize) &&
+ dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem))
free(vaddr, M_DEVBUF);
else {
contigfree(vaddr, dmat->maxsize, M_DEVBUF);
@@ -541,37 +567,16 @@
CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
}
-/*
- * Utility function to load a linear buffer. lastaddrp holds state
- * between invocations (for multiple-buffer loads). segp contains
- * the starting segment on entrace, and the ending segment on exit.
- * first indicates if this is the first invocation of this function.
- */
-static __inline int
-_bus_dmamap_load_buffer(bus_dma_tag_t dmat,
- bus_dmamap_t map,
- void *buf, bus_size_t buflen,
- pmap_t pmap,
- int flags,
- bus_addr_t *lastaddrp,
- bus_dma_segment_t *segs,
- int *segp,
- int first)
+int
+_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+ bus_size_t buflen, int flags, int *nb)
{
- bus_size_t sgsize;
- bus_addr_t curaddr, lastaddr, baddr, bmask;
vm_offset_t vaddr;
+ vm_offset_t vendaddr;
bus_addr_t paddr;
- int needbounce = 0;
- int seg;
-
- if (map == NULL)
- map = &nobounce_dmamap;
-
- if ((map != &nobounce_dmamap && map->pagesneeded == 0)
- && ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) {
- vm_offset_t vendaddr;
+ int needbounce = *nb;
+ if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
"alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem),
dmat->boundary, dmat->alignment);
@@ -586,7 +591,8 @@
while (vaddr < vendaddr) {
paddr = pmap_kextract(vaddr);
- if (run_filter(dmat, paddr) != 0) {
+ if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
+ run_filter(dmat, paddr) != 0) {
needbounce = 1;
map->pagesneeded++;
}
@@ -618,6 +624,43 @@
mtx_unlock(&bounce_lock);
}
+ *nb = needbounce;
+ return (0);
+}
+
+/*
+ * Utility function to load a linear buffer. lastaddrp holds state
+ * between invocations (for multiple-buffer loads). segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ * first indicates if this is the first invocation of this function.
+ */
+static __inline int
+_bus_dmamap_load_buffer(bus_dma_tag_t dmat,
+ bus_dmamap_t map,
+ void *buf, bus_size_t buflen,
+ pmap_t pmap,
+ int flags,
+ bus_addr_t *lastaddrp,
+ bus_dma_segment_t *segs,
+ int *segp,
+ int first)
+{
+ bus_size_t sgsize;
+ bus_addr_t curaddr, lastaddr, baddr, bmask;
+ vm_offset_t vaddr;
+ int needbounce = 0;
+ int seg, error;
+
+ if (map == NULL)
+ map = &nobounce_dmamap;
+
+ if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
+ error = _bus_dmamap_count_pages(dmat, map, buf, buflen, flags,
+ &needbounce);
+ if (error)
+ return (error);
+ }
+
vaddr = (vm_offset_t)buf;
lastaddr = *lastaddrp;
bmask = ~(dmat->boundary - 1);
@@ -635,6 +678,8 @@
* Compute the segment size, and adjust counts.
*/
sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
+ if (sgsize > dmat->maxsegsz)
+ sgsize = dmat->maxsegsz;
if (buflen < sgsize)
sgsize = buflen;
@@ -647,7 +692,8 @@
sgsize = (baddr - curaddr);
}
- if (map->pagesneeded != 0 && run_filter(dmat, curaddr))
+ if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
+ map->pagesneeded != 0 && run_filter(dmat, curaddr))
curaddr = add_bounce_page(dmat, map, vaddr, sgsize);
/*
@@ -706,9 +752,10 @@
error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, NULL, flags,
&lastaddr, dmat->segments, &nsegs, 1);
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, dmat->flags, error, nsegs + 1);
+
if (error == EINPROGRESS) {
- CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
- __func__, dmat, dmat->flags, error);
return (error);
}
@@ -717,8 +764,13 @@
else
(*callback)(callback_arg, dmat->segments, nsegs + 1, 0);
- CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error 0 nsegs %d",
- __func__, dmat, dmat->flags, nsegs + 1);
+ /*
+ * Return ENOMEM to the caller so that it can pass it up the stack.
+ * This error only happens when NOWAIT is set, so deferal is disabled.
+ */
+ if (error == ENOMEM)
+ return (error);
+
return (0);
}
@@ -726,18 +778,17 @@
/*
* Like _bus_dmamap_load(), but for mbufs.
*/
-int
-bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
- struct mbuf *m0,
- bus_dmamap_callback2_t *callback, void *callback_arg,
- int flags)
+static __inline int
+_bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs,
+ int flags)
{
- int nsegs, error;
+ int error;
M_ASSERTPKTHDR(m0);
flags |= BUS_DMA_NOWAIT;
- nsegs = 0;
+ *nsegs = 0;
error = 0;
if (m0->m_pkthdr.len <= dmat->maxsize) {
int first = 1;
@@ -749,7 +800,7 @@
error = _bus_dmamap_load_buffer(dmat, map,
m->m_data, m->m_len,
NULL, flags, &lastaddr,
- dmat->segments, &nsegs, first);
+ segs, nsegs, first);
first = 0;
}
}
@@ -757,15 +808,33 @@
error = EINVAL;
}
+ /* XXX FIXME: Having to increment nsegs is really annoying */
+ ++*nsegs;
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, dmat->flags, error, *nsegs);
+ return (error);
+}
+
+int
+bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct mbuf *m0,
+ bus_dmamap_callback2_t *callback, void *callback_arg,
+ int flags)
+{
+ int nsegs, error;
+
+ error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, dmat->segments, &nsegs,
+ flags);
+
if (error) {
/* force "no valid mappings" in callback */
(*callback)(callback_arg, dmat->segments, 0, 0, error);
} else {
(*callback)(callback_arg, dmat->segments,
- nsegs+1, m0->m_pkthdr.len, error);
+ nsegs, m0->m_pkthdr.len, error);
}
CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
- __func__, dmat, dmat->flags, error, nsegs + 1);
+ __func__, dmat, dmat->flags, error, nsegs);
return (error);
}
@@ -774,36 +843,7 @@
struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs,
int flags)
{
- int error;
-
- M_ASSERTPKTHDR(m0);
-
- flags |= BUS_DMA_NOWAIT;
- *nsegs = 0;
- error = 0;
- if (m0->m_pkthdr.len <= dmat->maxsize) {
- int first = 1;
- bus_addr_t lastaddr = 0;
- struct mbuf *m;
-
- for (m = m0; m != NULL && error == 0; m = m->m_next) {
- if (m->m_len > 0) {
- error = _bus_dmamap_load_buffer(dmat, map,
- m->m_data, m->m_len,
- NULL, flags, &lastaddr,
- segs, nsegs, first);
- first = 0;
- }
- }
- } else {
- error = EINVAL;
- }
-
- /* XXX FIXME: Having to increment nsegs is really annoying */
- ++*nsegs;
- CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
- __func__, dmat, dmat->flags, error, *nsegs);
- return (error);
+ return (_bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags));
}
/*
@@ -835,6 +875,7 @@
nsegs = 0;
error = 0;
first = 1;
+ lastaddr = (bus_addr_t) 0;
for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
/*
* Now at the first iovec to load. Load each iovec
@@ -891,7 +932,6 @@
* want to add support for invalidating
* the caches on broken hardware
*/
- dmat->bounce_zone->total_bounced++;
CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
"performing bounce", __func__, op, dmat, dmat->flags);
@@ -902,6 +942,7 @@
bpage->datacount);
bpage = STAILQ_NEXT(bpage, links);
}
+ dmat->bounce_zone->total_bounced++;
}
if (op & BUS_DMASYNC_POSTREAD) {
@@ -911,6 +952,7 @@
bpage->datacount);
bpage = STAILQ_NEXT(bpage, links);
}
+ dmat->bounce_zone->total_bounced++;
}
}
}
Index: vm_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/vm_machdep.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/vm_machdep.c -L sys/i386/i386/vm_machdep.c -u -r1.2 -r1.3
--- sys/i386/i386/vm_machdep.c
+++ sys/i386/i386/vm_machdep.c
@@ -41,12 +41,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.259.2.3 2006/03/13 02:46:55 davidxu Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.283 2007/07/07 16:59:01 attilio Exp $");
#include "opt_isa.h"
#include "opt_npx.h"
#include "opt_reset.h"
#include "opt_cpu.h"
+#include "opt_xbox.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -94,6 +95,10 @@
#include <i386/isa/isa.h>
#endif
+#ifdef XBOX
+#include <machine/xbox.h>
+#endif
+
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
@@ -153,23 +158,25 @@
struct mdproc *mdp1 = &p1->p_md;
struct proc_ldt *pldt;
- pldt = mdp1->md_ldt;
- if (pldt && pldt->ldt_refcnt > 1) {
+ mtx_lock_spin(&dt_lock);
+ if ((pldt = mdp1->md_ldt) != NULL &&
+ pldt->ldt_refcnt > 1) {
pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
if (pldt == NULL)
panic("could not copy LDT");
mdp1->md_ldt = pldt;
set_user_ldt(mdp1);
user_ldt_free(td1);
- }
+ } else
+ mtx_unlock_spin(&dt_lock);
}
return;
}
/* Ensure that p1's pcb is up to date. */
-#ifdef DEV_NPX
if (td1 == curthread)
td1->td_pcb->pcb_gs = rgs();
+#ifdef DEV_NPX
savecrit = intr_disable();
if (PCPU_GET(fpcurthread) == td1)
npxsave(&td1->td_pcb->pcb_save);
@@ -228,7 +235,6 @@
pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
pcb2->pcb_eip = (int)fork_trampoline;
pcb2->pcb_psl = PSL_KERNEL; /* ints disabled */
- pcb2->pcb_gs = rgs();
/*-
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
@@ -244,7 +250,7 @@
pcb2->pcb_ext = 0;
/* Copy the LDT, if necessary. */
- mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&dt_lock);
if (mdp2->md_ldt != NULL) {
if (flags & RFMEM) {
mdp2->md_ldt->ldt_refcnt++;
@@ -255,9 +261,9 @@
panic("could not copy LDT");
}
}
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
- /* Setup to release sched_lock in fork_exit(). */
+ /* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
@@ -300,11 +306,13 @@
* If this process has a custom LDT, release it. Reset pc->pcb_gs
* and %gs before we free it in case they refer to an LDT entry.
*/
+ mtx_lock_spin(&dt_lock);
if (td->td_proc->p_md.md_ldt) {
td->td_pcb->pcb_gs = _udatasel;
load_gs(_udatasel);
user_ldt_free(td);
- }
+ } else
+ mtx_unlock_spin(&dt_lock);
}
void
@@ -429,7 +437,7 @@
*/
pcb2->pcb_ext = NULL;
- /* Setup to release sched_lock in fork_exit(). */
+ /* Setup to release spin count in fork_exit(). */
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
}
@@ -536,6 +544,14 @@
void
cpu_reset()
{
+#ifdef XBOX
+ if (arch_i386_is_xbox) {
+ /* Kick the PIC16L, it can reboot the box */
+ pic16l_reboot();
+ for (;;);
+ }
+#endif
+
#ifdef SMP
u_int cnt, map;
@@ -551,7 +567,10 @@
cpustop_restartfunc = cpu_reset_proxy;
cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
- started_cpus = (1<<0); /* Restart CPU #0 */
+
+ /* Restart CPU #0. */
+ /* XXX: restart_cpus(1 << 0); */
+ atomic_store_rel_int(&started_cpus, (1 << 0));
cnt = 0;
while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -575,7 +594,12 @@
static void
cpu_reset_real()
{
+ struct region_descriptor null_idt;
+#ifndef PC98
+ int b;
+#endif
+ disable_intr();
#ifdef CPU_ELAN
if (elan_mmcr != NULL)
elan_mmcr->RESCFG = 1;
@@ -591,7 +615,6 @@
/*
* Attempt to do a CPU reset via CPU reset port.
*/
- disable_intr();
if ((inb(0x35) & 0xa0) != 0xa0) {
outb(0x37, 0x0f); /* SHUT0 = 0. */
outb(0x37, 0x0b); /* SHUT1 = 0. */
@@ -606,16 +629,46 @@
*/
outb(IO_KBD + 4, 0xFE);
DELAY(500000); /* wait 0.5 sec to see if that did it */
- printf("Keyboard reset did not work, attempting CPU shutdown\n");
- DELAY(1000000); /* wait 1 sec for printf to complete */
#endif
+
+ /*
+ * Attempt to force a reset via the Reset Control register at
+ * I/O port 0xcf9. Bit 2 forces a system reset when it is
+ * written as 1. Bit 1 selects the type of reset to attempt:
+ * 0 selects a "soft" reset, and 1 selects a "hard" reset. We
+ * try to do a "soft" reset first, and then a "hard" reset.
+ */
+ outb(0xcf9, 0x2);
+ outb(0xcf9, 0x6);
+ DELAY(500000); /* wait 0.5 sec to see if that did it */
+
+ /*
+ * Attempt to force a reset via the Fast A20 and Init register
+ * at I/O port 0x92. Bit 1 serves as an alternate A20 gate.
+ * Bit 0 asserts INIT# when set to 1. We are careful to only
+ * preserve bit 1 while setting bit 0. We also must clear bit
+ * 0 before setting it if it isn't already clear.
+ */
+ b = inb(0x92);
+ if (b != 0xff) {
+ if ((b & 0x1) != 0)
+ outb(0x92, b & 0xfe);
+ outb(0x92, b | 0x1);
+ DELAY(500000); /* wait 0.5 sec to see if that did it */
+ }
#endif /* PC98 */
- /* Force a shutdown by unmapping entire address space. */
- bzero((caddr_t)PTD, NBPTD);
+ printf("No known reset method worked, attempting CPU shutdown\n");
+ DELAY(1000000); /* wait 1 sec for printf to complete */
+
+ /* Wipe the IDT. */
+ null_idt.rd_limit = 0;
+ null_idt.rd_base = 0;
+ lidt(&null_idt);
/* "good night, sweet prince .... <THUNK!>" */
- invltlb();
+ breakpoint();
+
/* NOTREACHED */
while(1);
}
@@ -647,7 +700,7 @@
}
/*
- * Get an sf_buf from the freelist. Will block if none are available.
+ * Get an sf_buf from the freelist. May block if none are available.
*/
struct sf_buf *
sf_buf_alloc(struct vm_page *m, int flags)
@@ -734,9 +787,7 @@
other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
if (other_cpus != 0) {
sf->cpumask |= other_cpus;
- mtx_lock_spin(&smp_ipi_mtx);
smp_masked_invlpg(other_cpus, sf->kva);
- mtx_unlock_spin(&smp_ipi_mtx);
}
}
sched_unpin();
Index: mptable.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mptable.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/mptable.c -L sys/i386/i386/mptable.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/mptable.c
+++ sys/i386/i386/mptable.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.241 2005/04/14 17:59:58 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.245 2007/05/08 22:01:03 jhb Exp $");
#include "opt_mptable_force_htt.h"
#include <sys/param.h>
@@ -51,7 +51,7 @@
/* string defined by the Intel MP Spec as identifying the MP table */
#define MP_SIG 0x5f504d5f /* _MP_ */
-#define NAPICID 32 /* Max number of APIC's */
+#define MAX_LAPIC_ID 31 /* Max local APIC ID for HTT fixup */
#ifdef PC98
#define BIOS_BASE (0xe8000)
@@ -142,12 +142,12 @@
static mpfps_t mpfps;
static mpcth_t mpct;
-static void *ioapics[NAPICID];
+static void *ioapics[MAX_APIC_ID + 1];
static bus_datum *busses;
static int mptable_nioapics, mptable_nbusses, mptable_maxbusid;
static int pci0 = -1;
-static MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items");
+static MALLOC_DEFINE(M_MPTABLE, "mptable", "MP Table Items");
static enum intr_polarity conforming_polarity(u_char src_bus,
u_char src_bus_irq);
@@ -321,18 +321,20 @@
static int
mptable_setup_local(void)
{
+ vm_paddr_t addr;
/* Is this a pre-defined config? */
printf("MPTable: <");
if (mpfps->config_type != 0) {
- lapic_init(DEFAULT_APIC_BASE);
+ addr = DEFAULT_APIC_BASE;
printf("Default Configuration %d", mpfps->config_type);
} else {
- lapic_init((uintptr_t)mpct->apic_address);
+ addr = mpct->apic_address;
printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id,
(int)sizeof(mpct->product_id), mpct->product_id);
}
printf(">\n");
+ lapic_init(addr);
return (0);
}
@@ -359,7 +361,7 @@
mptable_parse_ints();
/* Fourth, we register all the I/O APIC's. */
- for (i = 0; i < NAPICID; i++)
+ for (i = 0; i <= MAX_APIC_ID; i++)
if (ioapics[i] != NULL)
ioapic_register(ioapics[i]);
@@ -425,8 +427,10 @@
if (proc->cpu_flags & PROCENTRY_FLAG_EN) {
lapic_create(proc->apic_id, proc->cpu_flags &
PROCENTRY_FLAG_BP);
- cpu_mask = (u_int *)arg;
- *cpu_mask |= (1 << proc->apic_id);
+ if (proc->apic_id < MAX_LAPIC_ID) {
+ cpu_mask = (u_int *)arg;
+ *cpu_mask |= (1ul << proc->apic_id);
+ }
}
break;
}
@@ -513,14 +517,14 @@
apic = (io_apic_entry_ptr)entry;
if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN))
break;
- if (apic->apic_id >= NAPICID)
+ if (apic->apic_id > MAX_APIC_ID)
panic("%s: I/O APIC ID %d too high", __func__,
apic->apic_id);
if (ioapics[apic->apic_id] != NULL)
panic("%s: Double APIC ID %d", __func__,
apic->apic_id);
- ioapics[apic->apic_id] = ioapic_create(
- (uintptr_t)apic->apic_address, apic->apic_id, -1);
+ ioapics[apic->apic_id] = ioapic_create(apic->apic_address,
+ apic->apic_id, -1);
break;
default:
break;
@@ -662,7 +666,7 @@
return;
}
}
- if (apic_id >= NAPICID) {
+ if (apic_id > MAX_APIC_ID) {
printf("MPTable: Ignoring interrupt entry for ioapic%d\n",
intr->dst_apic_id);
return;
@@ -892,7 +896,7 @@
* physical processor. If any of those ID's are
* already in the table, then kill the fixup.
*/
- for (id = 0; id < NAPICID; id++) {
+ for (id = 0; id <= MAX_LAPIC_ID; id++) {
if ((id_mask & 1 << id) == 0)
continue;
/* First, make sure we are on a logical_cpus boundary. */
Index: local_apic.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/local_apic.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/local_apic.c -L sys/i386/i386/local_apic.c -u -r1.2 -r1.3
--- sys/i386/i386/local_apic.c
+++ sys/i386/i386/local_apic.c
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.17.2.6 2006/03/10 19:37:33 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.44 2007/09/11 22:54:09 attilio Exp $");
#include "opt_hwpmc_hooks.h"
@@ -51,6 +51,7 @@
#include <vm/pmap.h>
#include <machine/apicreg.h>
+#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/frame.h>
#include <machine/intr_machdep.h>
@@ -64,13 +65,6 @@
#include <ddb/ddb.h>
#endif
-/*
- * We can handle up to 60 APICs via our logical cluster IDs, but currently
- * the physical IDs on Intel processors up to the Pentium 4 are limited to
- * 16.
- */
-#define MAX_APICID 16
-
/* Sanity checks on IDT vectors. */
CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
@@ -113,7 +107,7 @@
u_long la_hard_ticks;
u_long la_stat_ticks;
u_long la_prof_ticks;
-} static lapics[MAX_APICID];
+} static lapics[MAX_APIC_ID + 1];
/* XXX: should thermal be an NMI? */
@@ -146,16 +140,22 @@
APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
};
+extern inthand_t IDTVEC(rsvd);
+
volatile lapic_t *lapic;
+vm_paddr_t lapic_paddr;
static u_long lapic_timer_divisor, lapic_timer_period, lapic_timer_hz;
static void lapic_enable(void);
+static void lapic_resume(struct pic *pic);
static void lapic_timer_enable_intr(void);
static void lapic_timer_oneshot(u_int count);
static void lapic_timer_periodic(u_int count);
static void lapic_timer_set_divisor(u_int divisor);
static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value);
+struct pic lapic_pic = { .pic_resume = lapic_resume };
+
static uint32_t
lvt_mode(struct lapic *la, u_int pin, uint32_t value)
{
@@ -201,13 +201,14 @@
* Map the local APIC and setup necessary interrupt vectors.
*/
void
-lapic_init(uintptr_t addr)
+lapic_init(vm_paddr_t addr)
{
/* Map the local APIC and setup the spurious interrupt handler. */
KASSERT(trunc_page(addr) == addr,
("local APIC not aligned on a page boundary"));
- lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t));
+ lapic = pmap_mapdev(addr, sizeof(lapic_t));
+ lapic_paddr = addr;
setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
@@ -217,7 +218,6 @@
/* Set BSP's per-CPU local APIC ID. */
PCPU_SET(apic_id, lapic_id());
- intr_add_cpu(PCPU_GET(apic_id));
/* Local APIC timer interrupt. */
setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYS386IGT, SEL_KPL,
@@ -235,7 +235,7 @@
{
int i;
- if (apic_id >= MAX_APICID) {
+ if (apic_id > MAX_APIC_ID) {
printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
if (boot_cpu)
panic("Can't ignore BSP");
@@ -278,7 +278,7 @@
}
void
-lapic_setup(void)
+lapic_setup(int boot)
{
struct lapic *la;
u_int32_t maxlvt;
@@ -307,9 +307,13 @@
/* Program timer LVT and setup handler. */
lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
- snprintf(buf, sizeof(buf), "cpu%d: timer", PCPU_GET(cpuid));
- intrcnt_add(buf, &la->la_timer_count);
- if (PCPU_GET(cpuid) != 0) {
+ if (boot) {
+ snprintf(buf, sizeof(buf), "cpu%d: timer", PCPU_GET(cpuid));
+ intrcnt_add(buf, &la->la_timer_count);
+ }
+
+ /* We don't setup the timer during boot on the BSP until later. */
+ if (!(boot && PCPU_GET(cpuid) == 0)) {
KASSERT(lapic_timer_period != 0, ("lapic%u: zero divisor",
lapic_id()));
lapic_timer_set_divisor(lapic_timer_divisor);
@@ -319,6 +323,29 @@
/* XXX: Error and thermal LVTs */
+ if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family. This feature renders
+ * the local APIC timer dead, so we disable it by reading
+ * the Interrupt Pending Message register and clearing both
+ * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
+ *
+ * Reference:
+ * "BIOS and Kernel Developer's Guide for AMD NPT
+ * Family 0Fh Processors"
+ * #32559 revision 3.00
+ */
+ if ((cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ uint64_t msr;
+
+ msr = rdmsr(0xc0010055);
+ if (msr & 0x18000000)
+ wrmsr(0xc0010055, msr & ~0x18000000ULL);
+ }
+ }
+
intr_restore(eflags);
}
@@ -399,6 +426,14 @@
lapic->svr = value;
}
+/* Reset the local APIC on the BSP during resume. */
+static void
+lapic_resume(struct pic *pic)
+{
+
+ lapic_setup(0);
+}
+
int
lapic_id(void)
{
@@ -596,21 +631,41 @@
}
void
-lapic_handle_intr(struct intrframe frame)
+lapic_handle_intr(int vector, struct trapframe *frame)
{
struct intsrc *isrc;
- if (frame.if_vec == -1)
+ if (vector == -1)
panic("Couldn't get vector from ISR!");
- isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec));
- intr_execute_handlers(isrc, &frame);
+ isrc = intr_lookup_source(apic_idt_to_irq(vector));
+ intr_execute_handlers(isrc, frame);
}
void
-lapic_handle_timer(struct clockframe frame)
+lapic_handle_timer(struct trapframe *frame)
{
struct lapic *la;
+ /* Send EOI first thing. */
+ lapic_eoi();
+
+#if defined(SMP) && !defined(SCHED_ULE)
+ /*
+ * Don't do any accounting for the disabled HTT cores, since it
+ * will provide misleading numbers for the userland.
+ *
+ * No locking is necessary here, since even if we loose the race
+ * when hlt_cpus_mask changes it is not a big deal, really.
+ *
+ * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
+ * and unlike other schedulers it actually schedules threads to
+ * those CPUs.
+ */
+ if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
+ return;
+#endif
+
+ /* Look up our local APIC structure for the tick counters. */
la = &lapics[PCPU_GET(apic_id)];
(*la->la_timer_count)++;
critical_enter();
@@ -620,16 +675,16 @@
if (la->la_hard_ticks >= lapic_timer_hz) {
la->la_hard_ticks -= lapic_timer_hz;
if (PCPU_GET(cpuid) == 0)
- hardclock(&frame);
+ hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
else
- hardclock_process(&frame);
+ hardclock_cpu(TRAPF_USERMODE(frame));
}
/* Fire statclock at stathz. */
la->la_stat_ticks += stathz;
if (la->la_stat_ticks >= lapic_timer_hz) {
la->la_stat_ticks -= lapic_timer_hz;
- statclock(&frame);
+ statclock(TRAPF_USERMODE(frame));
}
/* Fire profclock at profhz, but only when needed. */
@@ -637,7 +692,7 @@
if (la->la_prof_ticks >= lapic_timer_hz) {
la->la_prof_ticks -= lapic_timer_hz;
if (profprocs != 0)
- profclock(&frame);
+ profclock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
}
critical_exit();
}
@@ -710,6 +765,65 @@
panic("Couldn't find an APIC vector for IRQ %u", irq);
}
+/*
+ * Request 'count' free contiguous IDT vectors to be used by 'count'
+ * IRQs. 'count' must be a power of two and the vectors will be
+ * aligned on a boundary of 'align'. If the request cannot be
+ * satisfied, 0 is returned.
+ */
+u_int
+apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
+{
+ u_int first, run, vector;
+
+ KASSERT(powerof2(count), ("bad count"));
+ KASSERT(powerof2(align), ("bad align"));
+ KASSERT(align >= count, ("align < count"));
+#ifdef INVARIANTS
+ for (run = 0; run < count; run++)
+ KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
+ irqs[run], run));
+#endif
+
+ /*
+ * Search for 'count' free vectors. As with apic_alloc_vector(),
+ * this just uses a simple first fit algorithm.
+ */
+ run = 0;
+ first = 0;
+ mtx_lock_spin(&icu_lock);
+ for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
+
+ /* Vector is in use, end run. */
+ if (ioint_irqs[vector] != 0) {
+ run = 0;
+ first = 0;
+ continue;
+ }
+
+ /* Start a new run if run == 0 and vector is aligned. */
+ if (run == 0) {
+ if ((vector & (align - 1)) != 0)
+ continue;
+ first = vector;
+ }
+ run++;
+
+ /* Keep looping if the run isn't long enough yet. */
+ if (run < count)
+ continue;
+
+ /* Found a run, assign IRQs and return the first vector. */
+ for (vector = 0; vector < count; vector++)
+ ioint_irqs[first + vector] = irqs[vector];
+ mtx_unlock_spin(&icu_lock);
+ return (first + APIC_IO_INTS);
+ }
+ mtx_unlock_spin(&icu_lock);
+ printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
+ return (0);
+}
+
void
apic_enable_vector(u_int vector)
{
@@ -721,6 +835,17 @@
GSEL(GCODE_SEL, SEL_KPL));
}
+void
+apic_disable_vector(u_int vector)
+{
+
+ KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
+ KASSERT(ioint_handlers[vector / 32] != NULL,
+ ("No ISR handler for vector %u", vector));
+ setidt(vector, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+}
+
/* Release an APIC vector when it's no longer in use. */
void
apic_free_vector(u_int vector, u_int irq)
@@ -753,18 +878,16 @@
DB_SHOW_COMMAND(apic, db_show_apic)
{
struct intsrc *isrc;
- int quit, i, verbose;
+ int i, verbose;
u_int irq;
- quit = 0;
if (strcmp(modif, "vv") == 0)
verbose = 2;
else if (strcmp(modif, "v") == 0)
verbose = 1;
else
verbose = 0;
- db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
- for (i = 0; i < APIC_NUM_IOINTS + 1 && !quit; i++) {
+ for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
irq = ioint_irqs[i];
if (irq != 0 && irq != IRQ_SYSCALL) {
db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
@@ -782,6 +905,76 @@
}
}
}
+
+static void
+dump_mask(const char *prefix, uint32_t v, int base)
+{
+ int i, first;
+
+ first = 1;
+ for (i = 0; i < 32; i++)
+ if (v & (1 << i)) {
+ if (first) {
+ db_printf("%s:", prefix);
+ first = 0;
+ }
+ db_printf(" %02x", base + i);
+ }
+ if (!first)
+ db_printf("\n");
+}
+
+/* Show info from the lapic regs for this CPU. */
+DB_SHOW_COMMAND(lapic, db_show_lapic)
+{
+ uint32_t v;
+
+ db_printf("lapic ID = %d\n", lapic_id());
+ v = lapic->version;
+ db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
+ v & 0xf);
+ db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
+ v = lapic->svr;
+ db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR,
+ v & APIC_SVR_ENABLE ? "enabled" : "disabled");
+ db_printf("TPR = %02x\n", lapic->tpr);
+
+#define dump_field(prefix, index) \
+ dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index, \
+ index * 32)
+
+ db_printf("In-service Interrupts:\n");
+ dump_field(isr, 0);
+ dump_field(isr, 1);
+ dump_field(isr, 2);
+ dump_field(isr, 3);
+ dump_field(isr, 4);
+ dump_field(isr, 5);
+ dump_field(isr, 6);
+ dump_field(isr, 7);
+
+ db_printf("TMR Interrupts:\n");
+ dump_field(tmr, 0);
+ dump_field(tmr, 1);
+ dump_field(tmr, 2);
+ dump_field(tmr, 3);
+ dump_field(tmr, 4);
+ dump_field(tmr, 5);
+ dump_field(tmr, 6);
+ dump_field(tmr, 7);
+
+ db_printf("IRR Interrupts:\n");
+ dump_field(irr, 0);
+ dump_field(irr, 1);
+ dump_field(irr, 2);
+ dump_field(irr, 3);
+ dump_field(irr, 4);
+ dump_field(irr, 5);
+ dump_field(irr, 6);
+ dump_field(irr, 7);
+
+#undef dump_field
+}
#endif
/*
@@ -871,12 +1064,8 @@
if (retval != 0)
printf("%s: Failed to setup the local APIC: returned %d\n",
best_enum->apic_name, retval);
-#ifdef SMP
- /* Last, setup the cpu topology now that we have probed CPUs */
- mp_topology();
-#endif
}
-SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL)
+SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_SECOND, apic_init, NULL)
/*
* Setup the I/O APICs.
@@ -897,9 +1086,13 @@
* Finish setting up the local APIC on the BSP once we know how to
* properly program the LINT pins.
*/
- lapic_setup();
+ lapic_setup(1);
+ intr_register_pic(&lapic_pic);
if (bootverbose)
lapic_dump("BSP");
+
+ /* Enable the MSI "pic". */
+ msi_init();
}
SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
Index: vm86.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/vm86.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/vm86.c -L sys/i386/i386/vm86.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/vm86.c
+++ sys/i386/i386/vm86.c
@@ -25,10 +25,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/vm86.c,v 1.57 2004/11/27 06:51:36 das Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/vm86.c,v 1.62 2006/12/17 05:07:01 kmacy Exp $");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -54,7 +55,7 @@
extern int vm86_bioscall(struct vm86frame *);
extern void vm86_biosret(struct vm86frame *);
-void vm86_prepcall(struct vm86frame);
+void vm86_prepcall(struct vm86frame *);
struct system_map {
int type;
@@ -505,46 +506,33 @@
panic("vm86_addpage: not enough room");
}
-static void
-vm86_initflags(struct vm86frame *vmf)
-{
- int eflags = vmf->vmf_eflags;
- struct vm86_kernel *vm86 = &PCPU_GET(curpcb)->pcb_ext->ext_vm86;
-
- if (vm86->vm86_has_vme) {
- eflags = (vmf->vmf_eflags & ~VME_USERCHANGE) |
- (eflags & VME_USERCHANGE) | PSL_VM;
- } else {
- vm86->vm86_eflags = eflags; /* save VIF, VIP */
- eflags = (vmf->vmf_eflags & ~VM_USERCHANGE) |
- (eflags & VM_USERCHANGE) | PSL_VM;
- }
- vmf->vmf_eflags = eflags | PSL_VM;
-}
-
/*
* called from vm86_bioscall, while in vm86 address space, to finalize setup.
*/
void
-vm86_prepcall(struct vm86frame vmf)
+vm86_prepcall(struct vm86frame *vmf)
{
uintptr_t addr[] = { 0xA00, 0x1000 }; /* code, stack */
u_char intcall[] = {
CLI, INTn, 0x00, STI, HLT
};
+ struct vm86_kernel *vm86;
- if ((vmf.vmf_trapno & PAGE_MASK) <= 0xff) {
+ if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
/* interrupt call requested */
- intcall[2] = (u_char)(vmf.vmf_trapno & 0xff);
+ intcall[2] = (u_char)(vmf->vmf_trapno & 0xff);
memcpy((void *)addr[0], (void *)intcall, sizeof(intcall));
- vmf.vmf_ip = addr[0];
- vmf.vmf_cs = 0;
+ vmf->vmf_ip = addr[0];
+ vmf->vmf_cs = 0;
}
- vmf.vmf_sp = addr[1] - 2; /* keep aligned */
- vmf.kernel_fs = vmf.kernel_es = vmf.kernel_ds = 0;
- vmf.vmf_ss = 0;
- vmf.vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
- vm86_initflags(&vmf);
+ vmf->vmf_sp = addr[1] - 2; /* keep aligned */
+ vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
+ vmf->vmf_ss = 0;
+ vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
+
+ vm86 = &PCPU_GET(curpcb)->pcb_ext->ext_vm86;
+ if (!vm86->vm86_has_vme)
+ vm86->vm86_eflags = vmf->vmf_eflags; /* save VIF, VIP */
}
/*
@@ -724,7 +712,7 @@
case VM86_INTCALL: {
struct vm86_intcall_args sa;
- if ((error = suser(td)))
+ if ((error = priv_check(td, PRIV_VM86_INTCALL)))
return (error);
if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
return (error);
Index: swtch.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/swtch.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/swtch.s -L sys/i386/i386/swtch.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/swtch.s
+++ sys/i386/i386/swtch.s
@@ -29,15 +29,32 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.148 2005/04/13 22:57:17 peter Exp $
+ * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.156 2007/08/22 05:06:14 jkoshy Exp $
*/
#include "opt_npx.h"
+#include "opt_sched.h"
#include <machine/asmacros.h>
#include "assym.s"
+#if defined(SMP) && defined(SCHED_ULE)
+#define SETOP xchgl
+#define BLOCK_SPIN(reg) \
+ movl $blocked_lock,%eax ; \
+ 100: ; \
+ lock ; \
+ cmpxchgl %eax,TD_LOCK(reg) ; \
+ jne 101f ; \
+ pause ; \
+ jmp 100b ; \
+ 101:
+#else
+#define SETOP movl
+#define BLOCK_SPIN(reg)
+#endif
+
/*****************************************************************************/
/* Scheduling */
/*****************************************************************************/
@@ -82,6 +99,7 @@
#endif
btsl %esi, PM_ACTIVE(%ebx) /* set new */
jmp sw1
+END(cpu_throw)
/*
* cpu_switch(old, new)
@@ -91,6 +109,7 @@
* 0(%esp) = ret
* 4(%esp) = oldtd
* 8(%esp) = newtd
+ * 12(%esp) = newlock
*/
ENTRY(cpu_switch)
@@ -114,12 +133,6 @@
movl %gs,PCB_GS(%edx)
pushfl /* PSL */
popl PCB_PSL(%edx)
- /* Check to see if we need to call a switchout function. */
- movl PCB_SWITCHOUT(%edx),%eax
- cmpl $0, %eax
- je 1f
- call *%eax
-1:
/* Test if debug registers should be saved. */
testl $PCB_DBREGS,PCB_FLAGS(%edx)
jz 1f /* no, skip over */
@@ -151,14 +164,14 @@
#endif
/* Save is done. Now fire up new thread. Leave old vmspace. */
- movl %ecx,%edi
+ movl 4(%esp),%edi
movl 8(%esp),%ecx /* New thread */
+ movl 12(%esp),%esi /* New lock */
#ifdef INVARIANTS
testl %ecx,%ecx /* no thread? */
jz badsw3 /* no, panic */
#endif
movl TD_PCB(%ecx),%edx
- movl PCPU(CPUID), %esi
/* switch address space */
movl PCB_CR3(%edx),%eax
@@ -167,11 +180,14 @@
#else
cmpl %eax,IdlePTD /* Kernel address space? */
#endif
- je sw1
+ je sw0
movl %cr3,%ebx /* The same address space? */
cmpl %ebx,%eax
- je sw1
+ je sw0
movl %eax,%cr3 /* new address space */
+ movl %esi,%eax
+ movl PCPU(CPUID),%esi
+ SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */
/* Release bit from old pmap->pm_active */
movl PCPU(CURPMAP), %ebx
@@ -189,15 +205,19 @@
lock
#endif
btsl %esi, PM_ACTIVE(%ebx) /* set new */
+ jmp sw1
+sw0:
+ SETOP %esi,TD_LOCK(%edi) /* Switchout td_lock */
sw1:
+ BLOCK_SPIN(%ecx)
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
*/
cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
je 1f /* If not, use the default */
- btsl %esi, private_tss /* mark use of private tss */
+ movl $1, PCPU(PRIVATE_TSS) /* mark use of private tss */
movl PCB_EXT(%edx), %edi /* new tss descriptor */
jmp 2f /* Load it up */
@@ -213,8 +233,9 @@
* Test this CPU's bit in the bitmap to see if this
* CPU was using a private TSS.
*/
- btrl %esi, private_tss /* Already using the common? */
- jae 3f /* if so, skip reloading */
+ cmpl $0, PCPU(PRIVATE_TSS) /* Already using the common? */
+ je 3f /* if so, skip reloading */
+ movl $0, PCPU(PRIVATE_TSS)
PCPU_ADDR(COMMON_TSSD, %edi)
2:
/* Move correct tss descriptor into GDT slot, then reload tr. */
@@ -223,7 +244,7 @@
movl 4(%edi), %esi
movl %eax, 0(%ebx)
movl %esi, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
+ movl $GPROC0_SEL*8, %esi /* GSEL(GPROC0_SEL, SEL_KPL) */
ltr %si
3:
@@ -251,6 +272,7 @@
popfl
movl %edx, PCPU(CURPCB)
+ movl TD_TID(%ecx),%eax
movl %ecx, PCPU(CURTHREAD) /* into next thread */
/*
@@ -327,6 +349,7 @@
call panic
sw0_3: .asciz "cpu_switch: no newthread supplied"
#endif
+END(cpu_switch)
/*
* savectx(pcb)
@@ -392,3 +415,4 @@
#endif /* DEV_NPX */
ret
+END(savectx)
--- /dev/null
+++ sys/i386/i386/bpf_jit_machdep.c
@@ -0,0 +1,514 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/bpf_jit_machdep.c,v 1.4 2006/01/03 20:26:02 jkim Exp $");
+
+#include "opt_bpf.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+
+#include <net/if.h>
+#include <net/bpf.h>
+#include <net/bpf_jitter.h>
+
+#include <i386/i386/bpf_jit_machdep.h>
+
+bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, int *);
+
+/*
+ * emit routine to update the jump table
+ */
+static void
+emit_length(bpf_bin_stream *stream, u_int value, u_int len)
+{
+
+ (stream->refs)[stream->bpf_pc] += len;
+ stream->cur_ip += len;
+}
+
+/*
+ * emit routine to output the actual binary code
+ */
+static void
+emit_code(bpf_bin_stream *stream, u_int value, u_int len)
+{
+
+ switch (len) {
+ case 1:
+ stream->ibuf[stream->cur_ip] = (u_char)value;
+ stream->cur_ip++;
+ break;
+
+ case 2:
+ *((u_short *)(stream->ibuf + stream->cur_ip)) = (u_short)value;
+ stream->cur_ip += 2;
+ break;
+
+ case 4:
+ *((u_int *)(stream->ibuf + stream->cur_ip)) = value;
+ stream->cur_ip += 4;
+ break;
+ }
+
+ return;
+}
+
+/*
+ * Function that does the real stuff
+ */
+bpf_filter_func
+bpf_jit_compile(struct bpf_insn *prog, u_int nins, int *mem)
+{
+ struct bpf_insn *ins;
+ u_int i, pass;
+ bpf_bin_stream stream;
+
+ /*
+ * NOTE: do not modify the name of this variable, as it's used by
+ * the macros to emit code.
+ */
+ emit_func emitm;
+
+ /* Do not compile an empty filter. */
+ if (nins == 0)
+ return NULL;
+
+ /* Allocate the reference table for the jumps */
+ stream.refs = (u_int *)malloc((nins + 1) * sizeof(u_int),
+ M_BPFJIT, M_NOWAIT);
+ if (stream.refs == NULL)
+ return NULL;
+
+ /* Reset the reference table */
+ for (i = 0; i < nins + 1; i++)
+ stream.refs[i] = 0;
+
+ stream.cur_ip = 0;
+ stream.bpf_pc = 0;
+
+ /*
+ * the first pass will emit the lengths of the instructions
+ * to create the reference table
+ */
+ emitm = emit_length;
+
+ pass = 0;
+ for (;;) {
+ ins = prog;
+
+ /* create the procedure header */
+ PUSH(EBP);
+ MOVrd(EBP, ESP);
+ PUSH(EDI);
+ PUSH(ESI);
+ PUSH(EBX);
+ MOVodd(EBX, EBP, 8);
+
+ for (i = 0; i < nins; i++) {
+ stream.bpf_pc++;
+
+ switch (ins->code) {
+ default:
+ return NULL;
+
+ case BPF_RET|BPF_K:
+ MOVid(EAX, ins->k);
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ break;
+
+ case BPF_RET|BPF_A:
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ break;
+
+ case BPF_LD|BPF_W|BPF_ABS:
+ MOVid(ECX, ins->k);
+ MOVrd(ESI, ECX);
+ ADDib(ECX, sizeof(int));
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(7);
+ ZERO_EAX();
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobd(EAX, EBX, ESI);
+ BSWAP(EAX);
+ break;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ ZERO_EAX();
+ MOVid(ECX, ins->k);
+ MOVrd(ESI, ECX);
+ ADDib(ECX, sizeof(short));
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(5);
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobw(AX, EBX, ESI);
+ SWAP_AX();
+ break;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ ZERO_EAX();
+ MOVid(ECX, ins->k);
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(5);
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobb(AL, EBX, ECX);
+ break;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ MOVodd(EAX, EBP, 0xc);
+ break;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ MOVodd(EDX, EBP, 0xc);
+ break;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ MOVid(ECX, ins->k);
+ ADDrd(ECX, EDX);
+ MOVrd(ESI, ECX);
+ ADDib(ECX, sizeof(int));
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(7);
+ ZERO_EAX();
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobd(EAX, EBX, ESI);
+ BSWAP(EAX);
+ break;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ ZERO_EAX();
+ MOVid(ECX, ins->k);
+ ADDrd(ECX, EDX);
+ MOVrd(ESI, ECX);
+ ADDib(ECX, sizeof(short));
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(5);
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobw(AX, EBX, ESI);
+ SWAP_AX();
+ break;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ ZERO_EAX();
+ MOVid(ECX, ins->k);
+ ADDrd(ECX, EDX);
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(5);
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVobb(AL, EBX, ECX);
+ break;
+
+ case BPF_LDX|BPF_MSH|BPF_B:
+ MOVid(ECX, ins->k);
+ CMPodd(ECX, EBP, 0x10);
+ JLEb(7);
+ ZERO_EAX();
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ ZERO_EDX();
+ MOVobb(DL, EBX, ECX);
+ ANDib(DL, 0xf);
+ SHLib(EDX, 2);
+ break;
+
+ case BPF_LD|BPF_IMM:
+ MOVid(EAX, ins->k);
+ break;
+
+ case BPF_LDX|BPF_IMM:
+ MOVid(EDX, ins->k);
+ break;
+
+ case BPF_LD|BPF_MEM:
+ MOVid(ECX, (uintptr_t)mem);
+ MOVid(ESI, ins->k * 4);
+ MOVobd(EAX, ECX, ESI);
+ break;
+
+ case BPF_LDX|BPF_MEM:
+ MOVid(ECX, (uintptr_t)mem);
+ MOVid(ESI, ins->k * 4);
+ MOVobd(EDX, ECX, ESI);
+ break;
+
+ case BPF_ST:
+ /*
+ * XXX this command and the following could
+ * be optimized if the previous instruction
+ * was already of this type
+ */
+ MOVid(ECX, (uintptr_t)mem);
+ MOVid(ESI, ins->k * 4);
+ MOVomd(ECX, ESI, EAX);
+ break;
+
+ case BPF_STX:
+ MOVid(ECX, (uintptr_t)mem);
+ MOVid(ESI, ins->k * 4);
+ MOVomd(ECX, ESI, EDX);
+ break;
+
+ case BPF_JMP|BPF_JA:
+ JMP(stream.refs[stream.bpf_pc + ins->k] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ CMPid(EAX, ins->k);
+ /* 5 is the size of the following JMP */
+ JG(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5 );
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ CMPid(EAX, ins->k);
+ JGE(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ CMPid(EAX, ins->k);
+ JE(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ MOVrd(ECX, EAX);
+ ANDid(ECX, ins->k);
+ JE(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ CMPrd(EAX, EDX);
+ JA(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ CMPrd(EAX, EDX);
+ JAE(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ CMPrd(EAX, EDX);
+ JE(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ MOVrd(ECX, EAX);
+ ANDrd(ECX, EDX);
+ JE(stream.refs[stream.bpf_pc + ins->jf] -
+ stream.refs[stream.bpf_pc] + 5);
+ JMP(stream.refs[stream.bpf_pc + ins->jt] -
+ stream.refs[stream.bpf_pc]);
+ break;
+
+ case BPF_ALU|BPF_ADD|BPF_X:
+ ADDrd(EAX, EDX);
+ break;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ SUBrd(EAX, EDX);
+ break;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ MOVrd(ECX, EDX);
+ MULrd(EDX);
+ MOVrd(EDX, ECX);
+ break;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ CMPid(EDX, 0);
+ JNEb(7);
+ ZERO_EAX();
+ POP(EBX);
+ POP(ESI);
+ POP(EDI);
+ LEAVE_RET();
+ MOVrd(ECX, EDX);
+ ZERO_EDX();
+ DIVrd(ECX);
+ MOVrd(EDX, ECX);
+ break;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ ANDrd(EAX, EDX);
+ break;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ ORrd(EAX, EDX);
+ break;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ MOVrd(ECX, EDX);
+ SHL_CLrb(EAX);
+ break;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ MOVrd(ECX, EDX);
+ SHR_CLrb(EAX);
+ break;
+
+ case BPF_ALU|BPF_ADD|BPF_K:
+ ADD_EAXi(ins->k);
+ break;
+
+ case BPF_ALU|BPF_SUB|BPF_K:
+ SUB_EAXi(ins->k);
+ break;
+
+ case BPF_ALU|BPF_MUL|BPF_K:
+ MOVrd(ECX, EDX);
+ MOVid(EDX, ins->k);
+ MULrd(EDX);
+ MOVrd(EDX, ECX);
+ break;
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ MOVrd(ECX, EDX);
+ ZERO_EDX();
+ MOVid(ESI, ins->k);
+ DIVrd(ESI);
+ MOVrd(EDX, ECX);
+ break;
+
+ case BPF_ALU|BPF_AND|BPF_K:
+ ANDid(EAX, ins->k);
+ break;
+
+ case BPF_ALU|BPF_OR|BPF_K:
+ ORid(EAX, ins->k);
+ break;
+
+ case BPF_ALU|BPF_LSH|BPF_K:
+ SHLib(EAX, (ins->k) & 255);
+ break;
+
+ case BPF_ALU|BPF_RSH|BPF_K:
+ SHRib(EAX, (ins->k) & 255);
+ break;
+
+ case BPF_ALU|BPF_NEG:
+ NEGd(EAX);
+ break;
+
+ case BPF_MISC|BPF_TAX:
+ MOVrd(EDX, EAX);
+ break;
+
+ case BPF_MISC|BPF_TXA:
+ MOVrd(EAX, EDX);
+ break;
+ }
+ ins++;
+ }
+
+ pass++;
+ if (pass == 2)
+ break;
+
+ stream.ibuf = (char *)malloc(stream.cur_ip, M_BPFJIT, M_NOWAIT);
+ if (stream.ibuf == NULL) {
+ free(stream.refs, M_BPFJIT);
+ return NULL;
+ }
+
+ /*
+ * modify the reference table to contain the offsets and
+ * not the lengths of the instructions
+ */
+ for (i = 1; i < nins + 1; i++)
+ stream.refs[i] += stream.refs[i - 1];
+
+ /* Reset the counters */
+ stream.cur_ip = 0;
+ stream.bpf_pc = 0;
+
+ /* the second pass creates the actual code */
+ emitm = emit_code;
+ }
+
+ /*
+ * the reference table is needed only during compilation,
+ * now we can free it
+ */
+ free(stream.refs, M_BPFJIT);
+
+ return (bpf_filter_func)stream.ibuf;
+}
Index: identcpu.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/identcpu.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/i386/i386/identcpu.c -L sys/i386/i386/identcpu.c -u -r1.4 -r1.5
--- sys/i386/i386/identcpu.c
+++ sys/i386/i386/identcpu.c
@@ -39,13 +39,14 @@
*/
#include <sys/cdefs.h>
-/*$FreeBSD: src/sys/i386/i386/identcpu.c,v 1.145.2.5 2006/08/08 08:41:34 mr Exp $ */
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: src/sys/i386/i386/identcpu.c,v 1.180 2007/05/29 19:39:18 des Exp $");
#include "opt_cpu.h"
#include <sys/param.h>
#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -75,14 +76,21 @@
void panicifcpuunsupported(void);
static void identifycyrix(void);
+static void init_exthigh(void);
+void setPQL2(int *const size, int *const ways);
+static void setPQL2_AMD(int *const size, int *const ways);
+static void setPQL2_INTEL(int *const size, int *const ways);
+static void get_INTEL_TLB(u_int data, int *const size, int *const ways);
static void print_AMD_info(void);
+static void print_INTEL_info(void);
+static void print_INTEL_TLB(u_int data);
static void print_AMD_assoc(int i);
static void print_transmeta_info(void);
int cpu_class;
u_int cpu_exthigh; /* Highest arg to extended CPUID */
u_int cyrix_did; /* Device ID of Cyrix CPU */
-char machine[] = "i386";
+char machine[] = MACHINE;
SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
machine, 0, "Machine class");
@@ -94,7 +102,6 @@
SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD,
&hw_clockrate, 0, "CPU instruction clock rate");
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
static char cpu_brand[48];
#define MAX_BRAND_INDEX 8
@@ -110,7 +117,6 @@
NULL,
"Intel Pentium 4"
};
-#endif
static struct {
char *cpu_name;
@@ -139,57 +145,53 @@
int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
+static void
+init_exthigh(void)
+{
+ static int done = 0;
+ u_int regs[4];
+
+ if (done == 0) {
+ if (cpu_high > 0 &&
+ (strcmp(cpu_vendor, "GenuineIntel") == 0 ||
+ strcmp(cpu_vendor, "AuthenticAMD") == 0 ||
+ strcmp(cpu_vendor, "GenuineTMx86") == 0 ||
+ strcmp(cpu_vendor, "TransmetaCPU") == 0 ||
+ strcmp(cpu_vendor, "Geode by NSC") == 0)) {
+ do_cpuid(0x80000000, regs);
+ if (regs[0] >= 0x80000000)
+ cpu_exthigh = regs[0];
+ }
+
+ done = 1;
+ }
+}
+
void
printcpuinfo(void)
{
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
u_int regs[4], i;
char *brand;
-#endif
cpu_class = i386_cpus[cpu].cpu_class;
printf("CPU: ");
strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof (cpu_model));
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
/* Check for extended CPUID information and a processor name. */
- if (cpu_high > 0 &&
- (strcmp(cpu_vendor, "GenuineIntel") == 0 ||
- strcmp(cpu_vendor, "AuthenticAMD") == 0 ||
- strcmp(cpu_vendor, "GenuineTMx86") == 0 ||
- strcmp(cpu_vendor, "TransmetaCPU") == 0 ||
- strcmp(cpu_vendor, "Geode by NSC") == 0)) {
- do_cpuid(0x80000000, regs);
- if (regs[0] >= 0x80000000) {
- cpu_exthigh = regs[0];
- if (cpu_exthigh >= 0x80000004) {
- brand = cpu_brand;
- for (i = 0x80000002; i < 0x80000005; i++) {
- do_cpuid(i, regs);
- memcpy(brand, regs, sizeof(regs));
- brand += sizeof(regs);
- }
- }
- }
- }
-
- /* Detect AMD features (PTE no-execute bit, 3dnow, 64 bit mode etc) */
- if (strcmp(cpu_vendor, "GenuineIntel") == 0 ||
- strcmp(cpu_vendor, "AuthenticAMD") == 0) {
- if (cpu_exthigh >= 0x80000001) {
- do_cpuid(0x80000001, regs);
- amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
- amd_feature2 = regs[2];
- }
- if (cpu_exthigh >= 0x80000008) {
- do_cpuid(0x80000008, regs);
- cpu_procinfo2 = regs[2];
+ init_exthigh();
+ if (cpu_exthigh >= 0x80000004) {
+ brand = cpu_brand;
+ for (i = 0x80000002; i < 0x80000005; i++) {
+ do_cpuid(i, regs);
+ memcpy(brand, regs, sizeof(regs));
+ brand += sizeof(regs);
}
}
if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
if ((cpu_id & 0xf00) > 0x300) {
u_int brand_index;
+ u_int model;
cpu_model[0] = '\0';
@@ -302,6 +304,16 @@
case 0xf00:
strcat(cpu_model, "Pentium 4");
cpu = CPU_P4;
+ model = (cpu_id & 0x0f0) >> 4;
+ if (model == 3 || model == 4 || model == 6) {
+ uint64_t tmp;
+
+ tmp = rdmsr(MSR_IA32_MISC_ENABLE);
+ wrmsr(MSR_IA32_MISC_ENABLE,
+ tmp & ~(1LL << 22));
+ do_cpuid(0, regs);
+ cpu_high = regs[0];
+ }
break;
default:
strcat(cpu_model, "unknown");
@@ -374,6 +386,14 @@
case 0x590:
strcat(cpu_model, "K6-III");
break;
+ case 0x5a0:
+ strcat(cpu_model, "Geode LX");
+ /*
+ * Make sure the TSC runs through suspension,
+ * otherwise we can't use it as timecounter
+ */
+ wrmsr(0x1900, rdmsr(0x1900) | 0x20ULL);
+ break;
default:
strcat(cpu_model, "Unknown");
break;
@@ -575,16 +595,12 @@
i = 0;
if (i & VIA_CPUID_HAS_RNG)
strcat(cpu_model, "+RNG");
-
if (i & VIA_CPUID_HAS_ACE)
strcat(cpu_model, "+AES");
-
if (i & VIA_CPUID_HAS_ACE2)
strcat(cpu_model, "+AES-CTR");
-
if (i & VIA_CPUID_HAS_PHE)
strcat(cpu_model, "+SHA1+SHA256");
-
if (i & VIA_CPUID_HAS_PMM)
strcat(cpu_model, "+RSA");
break;
@@ -616,8 +632,6 @@
if (*brand != '\0')
strcpy(cpu_model, brand);
-#endif
-
printf("%s (", cpu_model);
switch(cpu_class) {
case CPUCLASS_286:
@@ -654,7 +668,6 @@
printf("Unknown"); /* will panic below... */
}
printf("-class CPU)\n");
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
if(*cpu_vendor)
printf(" Origin = \"%s\"",cpu_vendor);
if(cpu_id)
@@ -858,7 +871,8 @@
else if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
(cpu_high >= 4)) {
cpuid_count(4, 0, regs);
- cmp = ((regs[0] & 0xfc000000) >> 26) + 1;
+ if ((regs[0] & 0x1f) != 0)
+ cmp = ((regs[0] >> 26) & 0x3f) + 1;
}
if (cmp > 1)
printf("\n Cores per package: %d", cmp);
@@ -879,24 +893,16 @@
if (*cpu_vendor || cpu_id)
printf("\n");
-#endif
-
if (!bootverbose)
return;
if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
print_AMD_info();
+ else if (strcmp(cpu_vendor, "GenuineIntel") == 0)
+ print_INTEL_info();
else if (strcmp(cpu_vendor, "GenuineTMx86") == 0 ||
strcmp(cpu_vendor, "TransmetaCPU") == 0)
print_transmeta_info();
-
-#ifdef I686_CPU
- /*
- * XXX - Do PPro CPUID level=2 stuff here?
- *
- * No, but maybe in a print_Intel_info() function called from here.
- */
-#endif
}
void
@@ -1062,6 +1068,21 @@
write_eflags(eflags);
}
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+ /* If there was an error during the transition, don't do anything. */
+ if (status != 0)
+ return;
+
+ /* Total setting for this level gives the new frequency in MHz. */
+ hw_clockrate = level->total_set.freq;
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+ EVENTHANDLER_PRI_ANY);
+
/*
* Final stage of CPU identification. -- Should I check TI?
*/
@@ -1072,7 +1093,20 @@
u_char ccr3;
u_int regs[4];
- if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
+ /* Detect AMD features (PTE no-execute bit, 3dnow, 64 bit mode etc) */
+ if (strcmp(cpu_vendor, "GenuineIntel") == 0 ||
+ strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+ init_exthigh();
+ if (cpu_exthigh >= 0x80000001) {
+ do_cpuid(0x80000001, regs);
+ amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
+ amd_feature2 = regs[2];
+ }
+ if (cpu_exthigh >= 0x80000008) {
+ do_cpuid(0x80000008, regs);
+ cpu_procinfo2 = regs[2];
+ }
+ } else if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
if (cpu == CPU_486) {
/*
* These conditions are equivalent to:
@@ -1231,6 +1265,506 @@
}
static void
+print_INTEL_info(void)
+{
+ u_int regs[4];
+ u_int rounds, regnum;
+ u_int nwaycode, nway;
+
+ if (cpu_high >= 2) {
+ rounds = 0;
+ do {
+ do_cpuid(0x2, regs);
+ if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0)
+ break; /* we have a buggy CPU */
+
+ for (regnum = 0; regnum <= 3; ++regnum) {
+ if (regs[regnum] & (1<<31))
+ continue;
+ if (regnum != 0)
+ print_INTEL_TLB(regs[regnum] & 0xff);
+ print_INTEL_TLB((regs[regnum] >> 8) & 0xff);
+ print_INTEL_TLB((regs[regnum] >> 16) & 0xff);
+ print_INTEL_TLB((regs[regnum] >> 24) & 0xff);
+ }
+ } while (--rounds > 0);
+ }
+
+ if (cpu_exthigh >= 0x80000006) {
+ do_cpuid(0x80000006, regs);
+ nwaycode = (regs[2] >> 12) & 0x0f;
+ if (nwaycode >= 0x02 && nwaycode <= 0x08)
+ nway = 1 << (nwaycode / 2);
+ else
+ nway = 0;
+ printf("\nL2 cache: %u kbytes, %u-way associative, %u bytes/line",
+ (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff);
+ }
+
+ printf("\n");
+}
+
+static void
+print_INTEL_TLB(u_int data)
+{
+ switch (data) {
+ case 0x0:
+ case 0x40:
+ default:
+ break;
+ case 0x1:
+ printf("\nInstruction TLB: 4 KB pages, 4-way set associative, 32 entries");
+ break;
+ case 0x2:
+ printf("\nInstruction TLB: 4 MB pages, fully associative, 2 entries");
+ break;
+ case 0x3:
+ printf("\nData TLB: 4 KB pages, 4-way set associative, 64 entries");
+ break;
+ case 0x4:
+ printf("\nData TLB: 4 MB Pages, 4-way set associative, 8 entries");
+ break;
+ case 0x6:
+ printf("\n1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x8:
+ printf("\n1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0xa:
+ printf("\n1st-level data cache: 8 KB, 2-way set associative, 32 byte line size");
+ break;
+ case 0xc:
+ printf("\n1st-level data cache: 16 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x22:
+ printf("\n3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x23:
+ printf("\n3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x25:
+ printf("\n3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x29:
+ printf("\n3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x2c:
+ printf("\n1st-level data cache: 32 KB, 8-way set associative, 64 byte line size");
+ break;
+ case 0x30:
+ printf("\n1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size");
+ break;
+ case 0x39:
+ printf("\n2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x3b:
+ printf("\n2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x3c:
+ printf("\n2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x41:
+ printf("\n2nd-level cache: 128 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x42:
+ printf("\n2nd-level cache: 256 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x43:
+ printf("\n2nd-level cache: 512 KB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x44:
+ printf("\n2nd-level cache: 1 MB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x45:
+ printf("\n2nd-level cache: 2 MB, 4-way set associative, 32 byte line size");
+ break;
+ case 0x46:
+ printf("\n3rd-level cache: 4 MB, 4-way set associative, 64 byte line size");
+ break;
+ case 0x47:
+ printf("\n3rd-level cache: 8 MB, 8-way set associative, 64 byte line size");
+ break;
+ case 0x50:
+ printf("\nInstruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries");
+ break;
+ case 0x51:
+ printf("\nInstruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries");
+ break;
+ case 0x52:
+ printf("\nInstruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries");
+ break;
+ case 0x5b:
+ printf("\nData TLB: 4 KB or 4 MB pages, fully associative, 64 entries");
+ break;
+ case 0x5c:
+ printf("\nData TLB: 4 KB or 4 MB pages, fully associative, 128 entries");
+ break;
+ case 0x5d:
+ printf("\nData TLB: 4 KB or 4 MB pages, fully associative, 256 entries");
+ break;
+ case 0x60:
+ printf("\n1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x66:
+ printf("\n1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x67:
+ printf("\n1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x68:
+ printf("\n1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x70:
+ printf("\nTrace cache: 12K-uops, 8-way set associative");
+ break;
+ case 0x71:
+ printf("\nTrace cache: 16K-uops, 8-way set associative");
+ break;
+ case 0x72:
+ printf("\nTrace cache: 32K-uops, 8-way set associative");
+ break;
+ case 0x78:
+ printf("\n2nd-level cache: 1 MB, 4-way set associative, 64-byte line size");
+ break;
+ case 0x79:
+ printf("\n2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x7a:
+ printf("\n2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x7b:
+ printf("\n2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x7c:
+ printf("\n2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size");
+ break;
+ case 0x7d:
+ printf("\n2nd-level cache: 2-MB, 8-way set associative, 64-byte line size");
+ break;
+ case 0x7f:
+ printf("\n2nd-level cache: 512-KB, 2-way set associative, 64-byte line size");
+ break;
+ case 0x82:
+ printf("\n2nd-level cache: 256 KB, 8-way set associative, 32 byte line size");
+ break;
+ case 0x83:
+ printf("\n2nd-level cache: 512 KB, 8-way set associative, 32 byte line size");
+ break;
+ case 0x84:
+ printf("\n2nd-level cache: 1 MB, 8-way set associative, 32 byte line size");
+ break;
+ case 0x85:
+ printf("\n2nd-level cache: 2 MB, 8-way set associative, 32 byte line size");
+ break;
+ case 0x86:
+ printf("\n2nd-level cache: 512 KB, 4-way set associative, 64 byte line size");
+ break;
+ case 0x87:
+ printf("\n2nd-level cache: 1 MB, 8-way set associative, 64 byte line size");
+ break;
+ case 0xb0:
+ printf("\nInstruction TLB: 4 KB Pages, 4-way set associative, 128 entries");
+ break;
+ case 0xb3:
+ printf("\nData TLB: 4 KB Pages, 4-way set associative, 128 entries");
+ break;
+ }
+}
+
+
+static void
+setPQL2_AMD(int *const size, int *const ways) {
+ if (cpu_exthigh >= 0x80000006) {
+ u_int regs[4];
+
+ do_cpuid(0x80000006, regs);
+ *size = regs[2] >> 16;
+ *ways = (regs[2] >> 12) & 0x0f;
+ }
+}
+
+
+static void
+setPQL2_INTEL(int *const size, int *const ways)
+{
+ u_int rounds, regnum;
+ u_int regs[4];
+ u_int nwaycode;
+
+ if (cpu_high >= 2) {
+ rounds = 0;
+ do {
+ do_cpuid(0x2, regs);
+ if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0)
+ break; /* we have a buggy CPU */
+
+ for (regnum = 0; regnum <= 3; ++regnum) {
+ if (regs[regnum] & (1<<31))
+ continue;
+ if (regnum != 0)
+ get_INTEL_TLB(regs[regnum] & 0xff,
+ size, ways);
+ get_INTEL_TLB((regs[regnum] >> 8) & 0xff,
+ size, ways);
+ get_INTEL_TLB((regs[regnum] >> 16) & 0xff,
+ size, ways);
+ get_INTEL_TLB((regs[regnum] >> 24) & 0xff,
+ size, ways);
+ }
+ } while (--rounds > 0);
+ }
+
+ if (cpu_exthigh >= 0x80000006) {
+ do_cpuid(0x80000006, regs);
+ if (*size < ((regs[2] >> 16) & 0xffff)) {
+ *size = (regs[2] >> 16) & 0xffff;
+ nwaycode = (regs[2] >> 12) & 0x0f;
+ if (nwaycode >= 0x02 && nwaycode <= 0x08)
+ *ways = 1 << (nwaycode / 2);
+ else
+ *ways = 0;
+ }
+ }
+}
+
+static void
+get_INTEL_TLB(u_int data, int *const size, int *const ways)
+{
+ switch (data) {
+ default:
+ break;
+ case 0x22:
+ /* 3rd-level cache: 512 KB, 4-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 4;
+ }
+ break;
+ case 0x23:
+ /* 3rd-level cache: 1 MB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 1024) {
+ *size = 1024;
+ *ways = 8;
+ }
+ break;
+ case 0x25:
+ /* 3rd-level cache: 2 MB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 2048) {
+ *size = 2048;
+ *ways = 8;
+ }
+ break;
+ case 0x29:
+ /* 3rd-level cache: 4 MB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 4096) {
+ *size = 4096;
+ *ways = 8;
+ }
+ break;
+ case 0x39:
+ /* 2nd-level cache: 128 KB, 4-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 128) {
+ *size = 128;
+ *ways = 4;
+ }
+ break;
+ case 0x3b:
+ /* 2nd-level cache: 128 KB, 2-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 128) {
+ *size = 128;
+ *ways = 2;
+ }
+ break;
+ case 0x3c:
+ /* 2nd-level cache: 256 KB, 4-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 256) {
+ *size = 256;
+ *ways = 4;
+ }
+ break;
+ case 0x41:
+ /* 2nd-level cache: 128 KB, 4-way set associative,
+ * 32 byte line size */
+ if (*size < 128) {
+ *size = 128;
+ *ways = 4;
+ }
+ break;
+ case 0x42:
+ /* 2nd-level cache: 256 KB, 4-way set associative,
+ * 32 byte line size */
+ if (*size < 256) {
+ *size = 256;
+ *ways = 4;
+ }
+ break;
+ case 0x43:
+ /* 2nd-level cache: 512 KB, 4-way set associative,
+ * 32 byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 4;
+ }
+ break;
+ case 0x44:
+ /* 2nd-level cache: 1 MB, 4-way set associative,
+ * 32 byte line size */
+ if (*size < 1024) {
+ *size = 1024;
+ *ways = 4;
+ }
+ break;
+ case 0x45:
+ /* 2nd-level cache: 2 MB, 4-way set associative,
+ * 32 byte line size */
+ if (*size < 2048) {
+ *size = 2048;
+ *ways = 4;
+ }
+ break;
+ case 0x46:
+ /* 3rd-level cache: 4 MB, 4-way set associative,
+ * 64 byte line size */
+ if (*size < 4096) {
+ *size = 4096;
+ *ways = 4;
+ }
+ break;
+ case 0x47:
+ /* 3rd-level cache: 8 MB, 8-way set associative,
+ * 64 byte line size */
+ if (*size < 8192) {
+ *size = 8192;
+ *ways = 8;
+ }
+ break;
+ case 0x78:
+ /* 2nd-level cache: 1 MB, 4-way set associative,
+ * 64-byte line size */
+ if (*size < 1024) {
+ *size = 1024;
+ *ways = 4;
+ }
+ break;
+ case 0x79:
+ /* 2nd-level cache: 128 KB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 128) {
+ *size = 128;
+ *ways = 8;
+ }
+ break;
+ case 0x7a:
+ /* 2nd-level cache: 256 KB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 256) {
+ *size = 256;
+ *ways = 8;
+ }
+ break;
+ case 0x7b:
+ /* 2nd-level cache: 512 KB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 8;
+ }
+ break;
+ case 0x7c:
+ /* 2nd-level cache: 1 MB, 8-way set associative,
+ * sectored cache, 64 byte line size */
+ if (*size < 1024) {
+ *size = 1024;
+ *ways = 8;
+ }
+ break;
+ case 0x7d:
+ /* 2nd-level cache: 2 MB, 8-way set associative,
+ * 64-byte line size */
+ if (*size < 2048) {
+ *size = 2048;
+ *ways = 8;
+ }
+ break;
+ case 0x7f:
+ /* 2nd-level cache: 512 KB, 2-way set associative,
+ * 64-byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 2;
+ }
+ break;
+ case 0x82:
+ /* 2nd-level cache: 256 KB, 8-way set associative,
+ * 32 byte line size */
+ if (*size < 256) {
+ *size = 256;
+ *ways = 8;
+ }
+ break;
+ case 0x83:
+ /* 2nd-level cache: 512 KB, 8-way set associative,
+ * 32 byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 8;
+ }
+ break;
+ case 0x84:
+ /* 2nd-level cache: 1 MB, 8-way set associative,
+ * 32 byte line size */
+ if (*size < 1024) {
+ *size = 1024;
+ *ways = 8;
+ }
+ break;
+ case 0x85:
+ /* 2nd-level cache: 2 MB, 8-way set associative,
+ * 32 byte line size */
+ if (*size < 2048) {
+ *size = 2048;
+ *ways = 8;
+ }
+ break;
+ case 0x86:
+ /* 2nd-level cache: 512 KB, 4-way set associative,
+ * 64 byte line size */
+ if (*size < 512) {
+ *size = 512;
+ *ways = 4;
+ }
+ break;
+ case 0x87:
+ /* 2nd-level cache: 1 MB, 8-way set associative,
+ * 64 byte line size */
+ if (*size < 1024) {
+ *size = 512;
+ *ways = 8;
+ }
+ break;
+ }
+}
+
+void
+setPQL2(int *const size, int *const ways)
+{
+ /* make sure the cpu_exthigh variable is initialized */
+ init_exthigh();
+
+ if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
+ setPQL2_AMD(size, ways);
+ else if (strcmp(cpu_vendor, "GenuineIntel") == 0)
+ setPQL2_INTEL(size, ways);
+}
+
+static void
print_transmeta_info()
{
u_int regs[4], nreg = 0;
Index: vm86bios.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/vm86bios.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/vm86bios.s -L sys/i386/i386/vm86bios.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/vm86bios.s
+++ sys/i386/i386/vm86bios.s
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/vm86bios.s,v 1.31 2005/04/13 18:13:40 peter Exp $
+ * $FreeBSD: src/sys/i386/i386/vm86bios.s,v 1.32 2006/12/17 05:07:01 kmacy Exp $
*/
#include "opt_npx.h"
@@ -128,9 +128,11 @@
#endif
movl %ecx,%cr3 /* new page tables */
movl SCR_VMFRAME(%edx),%esp /* switch to new stack */
-
- call vm86_prepcall /* finish setup */
+ pushl %esp
+ call vm86_prepcall /* finish setup */
+ add $4, %esp
+
/*
* Return via doreti
*/
Index: tsc.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/tsc.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/tsc.c -L sys/i386/i386/tsc.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/tsc.c
+++ sys/i386/i386/tsc.c
@@ -25,11 +25,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/tsc.c,v 1.204 2003/10/21 18:28:34 silby Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/tsc.c,v 1.208 2007/06/04 18:25:06 dwmalone Exp $");
#include "opt_clock.h"
#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
@@ -41,9 +44,12 @@
#include <machine/md_var.h>
#include <machine/specialreg.h>
+#include "cpufreq_if.h"
+
uint64_t tsc_freq;
int tsc_is_broken;
u_int tsc_present;
+static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
#ifdef SMP
static int smp_tsc;
@@ -52,14 +58,19 @@
TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
#endif
+static void tsc_freq_changed(void *arg, const struct cf_level *level,
+ int status);
+static void tsc_freq_changing(void *arg, const struct cf_level *level,
+ int *status);
static unsigned tsc_get_timecount(struct timecounter *tc);
+static void tsc_levels_changed(void *arg, int unit);
static struct timecounter tsc_timecounter = {
tsc_get_timecount, /* get_timecount */
0, /* no poll_pps */
- ~0u, /* counter_mask */
+ ~0u, /* counter_mask */
0, /* frequency */
- "TSC", /* name */
+ "TSC", /* name */
800, /* quality (adjusted in code) */
};
@@ -86,18 +97,33 @@
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
-}
+ /*
+ * Inform CPU accounting about our boot-time clock rate. Once the
+ * system is finished booting, we will get the real max clock rate
+ * via tsc_freq_max(). This also will be updated if someone loads
+ * a cpufreq driver after boot that discovers a new max frequency.
+ */
+ set_cputicker(rdtsc, tsc_freq, 1);
+
+ /* Register to find out about changes in CPU frequency. */
+ tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
+ tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
+ tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
+ tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
+ tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
+ tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
+}
void
init_TSC_tc(void)
{
/*
- * We can not use the TSC if we support APM. Precise timekeeping
+ * We can not use the TSC if we support APM. Precise timekeeping
* on an APM'ed machine is at best a fools pursuit, since
* any and all of the time spent in various SMM code can't
* be reliably accounted for. Reading the RTC is your only
- * source of reliable time info. The i8254 looses too of course
+ * source of reliable time info. The i8254 loses too, of course,
* but we need to have some kind of time...
* We don't know at this point whether APM is going to be used
* or not, nor when it might be activated. Play it safe.
@@ -127,6 +153,72 @@
}
}
+/*
+ * When cpufreq levels change, find out about the (new) max frequency. We
+ * use this to update CPU accounting in case it got a lower estimate at boot.
+ */
+static void
+tsc_levels_changed(void *arg, int unit)
+{
+ device_t cf_dev;
+ struct cf_level *levels;
+ int count, error;
+ uint64_t max_freq;
+
+ /* Only use values from the first CPU, assuming all are equal. */
+ if (unit != 0)
+ return;
+
+ /* Find the appropriate cpufreq device instance. */
+ cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
+ if (cf_dev == NULL) {
+ printf("tsc_levels_changed() called but no cpufreq device?\n");
+ return;
+ }
+
+ /* Get settings from the device and find the max frequency. */
+ count = 64;
+ levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+ if (levels == NULL)
+ return;
+ error = CPUFREQ_LEVELS(cf_dev, levels, &count);
+ if (error == 0 && count != 0) {
+ max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
+ set_cputicker(rdtsc, max_freq, 1);
+ } else
+ printf("tsc_levels_changed: no max freq found\n");
+ free(levels, M_TEMP);
+}
+
+/*
+ * If the TSC timecounter is in use, veto the pending change. It may be
+ * possible in the future to handle a dynamically-changing timecounter rate.
+ */
+static void
+tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
+{
+
+ if (*status != 0 || timecounter != &tsc_timecounter)
+ return;
+
+ printf("timecounter TSC must not be in use when "
+ "changing frequencies; change denied\n");
+ *status = EBUSY;
+}
+
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+ /* If there was an error during the transition, don't do anything. */
+ if (status != 0)
+ return;
+
+ /* Total setting for this level gives the new frequency in MHz. */
+ tsc_freq = (uint64_t)level->total_set.freq * 1000000;
+ tsc_timecounter.tc_frequency = tsc_freq;
+}
+
static int
sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
{
@@ -136,7 +228,7 @@
if (tsc_timecounter.tc_frequency == 0)
return (EOPNOTSUPP);
freq = tsc_freq;
- error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+ error = sysctl_handle_quad(oidp, &freq, 0, req);
if (error == 0 && req->newptr != NULL) {
tsc_freq = freq;
tsc_timecounter.tc_frequency = tsc_freq;
@@ -145,7 +237,7 @@
}
SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_QUAD | CTLFLAG_RW,
- 0, sizeof(u_int), sysctl_machdep_tsc_freq, "IU", "");
+ 0, sizeof(u_int), sysctl_machdep_tsc_freq, "QU", "");
static unsigned
tsc_get_timecount(struct timecounter *tc)
Index: sys_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/sys_machdep.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/sys_machdep.c -L sys/i386/i386/sys_machdep.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/sys_machdep.c
+++ sys/i386/i386/sys_machdep.c
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.102.2.1 2005/09/26 19:38:11 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.112 2007/07/08 18:17:42 attilio Exp $");
#include "opt_kstack_pages.h"
#include "opt_mac.h"
@@ -38,9 +38,9 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/sysproto.h>
@@ -56,21 +56,22 @@
#include <machine/proc.h>
#include <machine/sysarch.h>
+#include <security/audit/audit.h>
+
#include <vm/vm_kern.h> /* for kernel_map */
#define MAX_LD 8192
#define LD_PER_PAGE 512
#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
+#define NULL_LDT_BASE ((caddr_t)NULL)
-
-
+#ifdef SMP
+static void set_user_ldt_rv(struct vmspace *vmsp);
+#endif
static int i386_set_ldt_data(struct thread *, int start, int num,
union descriptor *descs);
static int i386_ldt_grow(struct thread *td, int len);
-#ifdef SMP
-static void set_user_ldt_rv(struct thread *);
-#endif
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
@@ -93,6 +94,7 @@
uint32_t base;
struct segment_descriptor sd, *sdp;
+ AUDIT_ARG(cmd, uap->op);
switch (uap->op) {
case I386_GET_IOPERM:
case I386_SET_IOPERM:
@@ -112,7 +114,6 @@
break;
}
- mtx_lock(&Giant);
switch(uap->op) {
case I386_GET_LDT:
error = i386_get_ldt(td, &kargs.largs);
@@ -212,7 +213,6 @@
error = EINVAL;
break;
}
- mtx_unlock(&Giant);
return (error);
}
@@ -267,12 +267,12 @@
KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
/* Switch to the new TSS. */
- mtx_lock_spin(&sched_lock);
+ critical_enter();
td->td_pcb->pcb_ext = ext;
- private_tss |= PCPU_GET(cpumask);
+ PCPU_SET(private_tss, 1);
*PCPU_GET(tss_gdt) = ext->ext_tssd;
ltr(GSEL(GPROC0_SEL, SEL_KPL));
- mtx_unlock_spin(&sched_lock);
+ critical_exit();
return 0;
}
@@ -285,11 +285,7 @@
int i, error;
char *iomap;
-#ifdef MAC
- if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0)
- return (error);
-#endif
- if ((error = suser(td)) != 0)
+ if ((error = priv_check(td, PRIV_IO)) != 0)
return (error);
if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
return (error);
@@ -352,16 +348,19 @@
/*
* Update the GDT entry pointing to the LDT to point to the LDT of the
- * current process.
- *
- * This must be called with sched_lock held. Unfortunately, we can't use a
- * mtx_assert() here because cpu_switch() calls this function after changing
- * curproc but before sched_lock's owner is updated in mi_switch().
+ * current process. Manage dt_lock holding/unholding autonomously.
*/
void
set_user_ldt(struct mdproc *mdp)
{
struct proc_ldt *pldt;
+ int dtlocked;
+
+ dtlocked = 0;
+ if (!mtx_owned(&dt_lock)) {
+ mtx_lock_spin(&dt_lock);
+ dtlocked = 1;
+ }
pldt = mdp->md_ldt;
#ifdef SMP
@@ -371,14 +370,18 @@
#endif
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
+ if (dtlocked)
+ mtx_unlock_spin(&dt_lock);
}
#ifdef SMP
static void
-set_user_ldt_rv(struct thread *td)
+set_user_ldt_rv(struct vmspace *vmsp)
{
+ struct thread *td;
- if (td->td_proc != curthread->td_proc)
+ td = curthread;
+ if (vmsp != td->td_proc->p_vmspace)
return;
set_user_ldt(&td->td_proc->p_md);
@@ -386,17 +389,15 @@
#endif
/*
- * Must be called with either sched_lock free or held but not recursed.
- * If it does not return NULL, it will return with it owned.
+ * dt_lock must be held. Returns with dt_lock held.
*/
struct proc_ldt *
user_ldt_alloc(struct mdproc *mdp, int len)
{
struct proc_ldt *pldt, *new_ldt;
- if (mtx_owned(&sched_lock))
- mtx_unlock_spin(&sched_lock);
- mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&dt_lock, MA_OWNED);
+ mtx_unlock_spin(&dt_lock);
MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt),
M_SUBPROC, M_WAITOK);
@@ -410,38 +411,35 @@
new_ldt->ldt_refcnt = 1;
new_ldt->ldt_active = 0;
- mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&dt_lock);
gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
- if ((pldt = mdp->md_ldt)) {
+ if ((pldt = mdp->md_ldt) != NULL) {
if (len > pldt->ldt_len)
len = pldt->ldt_len;
bcopy(pldt->ldt_base, new_ldt->ldt_base,
len * sizeof(union descriptor));
- } else {
+ } else
bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
- }
- return new_ldt;
+
+ return (new_ldt);
}
/*
- * Must be called either with sched_lock free or held but not recursed.
- * If md_ldt is not NULL, it will return with sched_lock released.
+ * Must be called with dt_lock held. Returns with dt_lock unheld.
*/
void
user_ldt_free(struct thread *td)
{
struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
+ struct proc_ldt *pldt;
- if (pldt == NULL)
+ mtx_assert(&dt_lock, MA_OWNED);
+ if ((pldt = mdp->md_ldt) == NULL)
return;
- if (!mtx_owned(&sched_lock))
- mtx_lock_spin(&sched_lock);
- mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
if (td == PCPU_GET(curthread)) {
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
@@ -449,12 +447,12 @@
mdp->md_ldt = NULL;
if (--pldt->ldt_refcnt == 0) {
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
pldt->ldt_len * sizeof(union descriptor));
FREE(pldt, M_SUBPROC);
} else
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
}
/*
@@ -469,7 +467,7 @@
struct i386_ldt_args *uap;
{
int error = 0;
- struct proc_ldt *pldt = td->td_proc->p_md.md_ldt;
+ struct proc_ldt *pldt;
int nldt, num;
union descriptor *lp;
@@ -478,11 +476,14 @@
uap->start, uap->num, (void *)uap->descs);
#endif
- if (pldt) {
+ mtx_lock_spin(&dt_lock);
+ if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
nldt = pldt->ldt_len;
- num = min(uap->num, nldt);
lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
+ mtx_unlock_spin(&dt_lock);
+ num = min(uap->num, nldt);
} else {
+ mtx_unlock_spin(&dt_lock);
nldt = sizeof(ldt)/sizeof(ldt[0]);
num = min(uap->num, nldt);
lp = &ldt[uap->start];
@@ -532,10 +533,10 @@
}
if (uap->num <= 0)
return (EINVAL);
- mtx_lock_spin(&sched_lock);
- pldt = mdp->md_ldt;
- if (pldt == NULL || uap->start >= pldt->ldt_len) {
- mtx_unlock_spin(&sched_lock);
+ mtx_lock_spin(&dt_lock);
+ if ((pldt = mdp->md_ldt) == NULL ||
+ uap->start >= pldt->ldt_len) {
+ mtx_unlock_spin(&dt_lock);
return (0);
}
largest_ld = uap->start + uap->num;
@@ -544,7 +545,7 @@
i = largest_ld - uap->start;
bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
sizeof(union descriptor) * i);
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
return (0);
}
@@ -627,15 +628,15 @@
if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
/* Allocate a free slot */
- pldt = mdp->md_ldt;
- if (pldt == NULL) {
- error = i386_ldt_grow(td, NLDT + 1);
- if (error)
+ mtx_lock_spin(&dt_lock);
+ if ((pldt = mdp->md_ldt) == NULL) {
+ if ((error = i386_ldt_grow(td, NLDT + 1))) {
+ mtx_unlock_spin(&dt_lock);
return (error);
+ }
pldt = mdp->md_ldt;
}
again:
- mtx_lock_spin(&sched_lock);
/*
* start scanning a bit up to leave room for NVidia and
* Wine, which still user the "Blat" method of allocation.
@@ -647,24 +648,23 @@
dp++;
}
if (i >= pldt->ldt_len) {
- mtx_unlock_spin(&sched_lock);
- error = i386_ldt_grow(td, pldt->ldt_len+1);
- if (error)
+ if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) {
+ mtx_unlock_spin(&dt_lock);
return (error);
+ }
goto again;
}
uap->start = i;
error = i386_set_ldt_data(td, i, 1, descs);
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
} else {
largest_ld = uap->start + uap->num;
- error = i386_ldt_grow(td, largest_ld);
- if (error == 0) {
- mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&dt_lock);
+ if (!(error = i386_ldt_grow(td, largest_ld))) {
error = i386_set_ldt_data(td, uap->start, uap->num,
descs);
- mtx_unlock_spin(&sched_lock);
}
+ mtx_unlock_spin(&dt_lock);
}
if (error == 0)
td->td_retval[0] = uap->start;
@@ -678,7 +678,7 @@
struct mdproc *mdp = &td->td_proc->p_md;
struct proc_ldt *pldt = mdp->md_ldt;
- mtx_assert(&sched_lock, MA_OWNED);
+ mtx_assert(&dt_lock, MA_OWNED);
/* Fill in range */
bcopy(descs,
@@ -691,9 +691,11 @@
i386_ldt_grow(struct thread *td, int len)
{
struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt;
- caddr_t old_ldt_base;
- int old_ldt_len;
+ struct proc_ldt *new_ldt, *pldt;
+ caddr_t old_ldt_base = NULL_LDT_BASE;
+ int old_ldt_len = 0;
+
+ mtx_assert(&dt_lock, MA_OWNED);
if (len > MAX_LD)
return (ENOMEM);
@@ -701,52 +703,58 @@
len = NLDT + 1;
/* Allocate a user ldt. */
- pldt = mdp->md_ldt;
- if (!pldt || len > pldt->ldt_len) {
- struct proc_ldt *new_ldt;
-
+ if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) {
new_ldt = user_ldt_alloc(mdp, len);
if (new_ldt == NULL)
return (ENOMEM);
pldt = mdp->md_ldt;
- /* sched_lock was acquired by user_ldt_alloc. */
- if (pldt) {
- if (new_ldt->ldt_len > pldt->ldt_len) {
- old_ldt_base = pldt->ldt_base;
- old_ldt_len = pldt->ldt_len;
- pldt->ldt_sd = new_ldt->ldt_sd;
- pldt->ldt_base = new_ldt->ldt_base;
- pldt->ldt_len = new_ldt->ldt_len;
- mtx_unlock_spin(&sched_lock);
- kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
- old_ldt_len * sizeof(union descriptor));
- FREE(new_ldt, M_SUBPROC);
- mtx_lock_spin(&sched_lock);
- } else {
+ if (pldt != NULL) {
+ if (new_ldt->ldt_len <= pldt->ldt_len) {
/*
- * If other threads already did the work,
- * do nothing.
+ * We just lost the race for allocation, so
+ * free the new object and return.
*/
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&dt_lock);
kmem_free(kernel_map,
(vm_offset_t)new_ldt->ldt_base,
new_ldt->ldt_len * sizeof(union descriptor));
FREE(new_ldt, M_SUBPROC);
+ mtx_lock_spin(&dt_lock);
return (0);
}
- } else {
+
+ /*
+ * We have to substitute the current LDT entry for
+ * curproc with the new one since its size grew.
+ */
+ old_ldt_base = pldt->ldt_base;
+ old_ldt_len = pldt->ldt_len;
+ pldt->ldt_sd = new_ldt->ldt_sd;
+ pldt->ldt_base = new_ldt->ldt_base;
+ pldt->ldt_len = new_ldt->ldt_len;
+ } else
mdp->md_ldt = pldt = new_ldt;
- }
#ifdef SMP
- mtx_unlock_spin(&sched_lock);
- /* signal other cpus to reload ldt */
+ /*
+ * Signal other cpus to reload ldt. We need to unlock dt_lock
+ * here because other CPU will contest on it since their
+ * curthreads won't hold the lock and will block when trying
+ * to acquire it.
+ */
+ mtx_unlock_spin(&dt_lock);
smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
- NULL, td);
+ NULL, td->td_proc->p_vmspace);
#else
- set_user_ldt(mdp);
- mtx_unlock_spin(&sched_lock);
+ set_user_ldt(&td->td_proc->p_md);
+ mtx_unlock_spin(&dt_lock);
#endif
+ if (old_ldt_base != NULL_LDT_BASE) {
+ kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
+ old_ldt_len * sizeof(union descriptor));
+ FREE(new_ldt, M_SUBPROC);
+ }
+ mtx_lock_spin(&dt_lock);
}
return (0);
}
Index: geode.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/geode.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/geode.c -L sys/i386/i386/geode.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/geode.c
+++ sys/i386/i386/geode.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/geode.c,v 1.5.8.1 2005/08/16 22:47:14 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/geode.c,v 1.10 2007/09/18 09:19:44 phk Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -49,6 +49,16 @@
}
};
+static struct bios_oem bios_soekris_55 = {
+ { 0xf0000, 0xf1000 },
+ {
+ { "Soekris", 0, 8 }, /* Soekris Engineering. */
+ { "net5", 0, 8 }, /* net5xxx */
+ { "comBIOS", 0, 54 }, /* comBIOS ver. 1.26a 20040819 ... */
+ { NULL, 0, 0 },
+ }
+};
+
static struct bios_oem bios_pcengines = {
{ 0xf9000, 0xfa000 },
{
@@ -94,6 +104,25 @@
outl(gpio, u);
}
+static void
+cs5536_led_func(void *ptr, int onoff)
+{
+ int bit;
+ uint16_t a;
+
+ bit = *(int *)ptr;
+ if (bit < 0) {
+ bit = -bit;
+ onoff = !onoff;
+ }
+
+ a = rdmsr(0x5140000c);
+ if (onoff)
+ outl(a, 1 << bit);
+ else
+ outl(a, 1 << (bit + 16));
+}
+
static unsigned
geode_get_timecount(struct timecounter *tc)
@@ -110,6 +139,20 @@
1000
};
+static uint64_t
+geode_cputicks(void)
+{
+ unsigned c;
+ static unsigned last;
+ static uint64_t offset;
+
+ c = inl(geode_counter);
+ if (c < last)
+ offset += (1LL << 32);
+ last = c;
+ return (offset | c);
+}
+
/*
* The GEODE watchdog runs from a 32kHz frequency. One period of that is
* 31250 nanoseconds which we round down to 2^14 nanoseconds. The watchdog
@@ -122,7 +165,7 @@
u_int u, p, r;
u = cmd & WD_INTERVAL;
- if (cmd && u >= 14 && u <= 43) {
+ if (u >= 14 && u <= 43) {
u -= 14;
if (u > 16) {
p = u - 16;
@@ -144,6 +187,43 @@
}
/*
+ * We run MFGPT0 off the 32kHz frequency and prescale by 16384 giving a
+ * period of half a second.
+ * Range becomes 2^30 (= 1 sec) to 2^44 (almost 5 hours)
+ */
+static void
+cs5536_watchdog(void *foo __unused, u_int cmd, int *error)
+{
+ u_int u, p;
+ uint16_t a;
+ uint32_t m;
+
+ a = rdmsr(0x5140000d);
+ m = rdmsr(0x51400029);
+ m &= ~(1 << 24);
+ wrmsr(0x51400029, m);
+
+ u = cmd & WD_INTERVAL;
+ if (u >= 30 && u <= 44) {
+ p = 1 << (u - 29);
+
+ /* Set up MFGPT0, 32khz, prescaler 16k, C2 event */
+ outw(a + 6, 0x030e);
+ /* set comparator 2 */
+ outw(a + 2, p);
+ /* reset counter */
+ outw(a + 4, 0);
+ /* Arm reset mechanism */
+ m |= (1 << 24);
+ wrmsr(0x51400029, m);
+ /* Start counter */
+ outw(a + 6, 0x8000);
+
+ *error = 0;
+ }
+}
+
+/*
* The Advantech PCM-582x watchdog expects 0x1 at I/O port 0x0443
* every 1.6 secs +/- 30%. Writing 0x0 disables the watchdog
* NB: reading the I/O port enables the timer as well
@@ -151,8 +231,15 @@
static void
advantech_watchdog(void *foo __unused, u_int cmd, int *error)
{
- outb(0x0443, (cmd & WD_INTERVAL) ? 1 : 0);
- *error = 0;
+ u_int u;
+
+ u = cmd & WD_INTERVAL;
+ if (u > 0 && u <= WD_TO_1SEC) {
+ outb(0x0443, 1);
+ *error = 0;
+ } else {
+ outb(0x0443, 0);
+ }
}
static int
@@ -161,7 +248,8 @@
#define BIOS_OEM_MAXLEN 80
static u_char bios_oem[BIOS_OEM_MAXLEN] = "\0";
- if (pci_get_devid(self) == 0x0515100b) {
+ switch (pci_get_devid(self)) {
+ case 0x0515100b:
if (geode_counter == 0) {
/*
* The address of the CBA is written to this register
@@ -176,8 +264,10 @@
tc_init(&geode_timecounter);
EVENTHANDLER_REGISTER(watchdog_list, geode_watchdog,
NULL, 0);
+ set_cputicker(geode_cputicks, 27000000, 0);
}
- } else if (pci_get_devid(self) == 0x0510100b) {
+ break;
+ case 0x0510100b:
gpio = pci_read_config(self, PCIR_BAR(0), 4);
gpio &= ~0x1f;
printf("Geode GPIO@ = %x\n", gpio);
@@ -201,13 +291,26 @@
}
if ( strlen(bios_oem) )
printf("Geode %s\n", bios_oem);
- } else if (pci_get_devid(self) == 0x01011078) {
+ break;
+ case 0x01011078:
if ( bios_oem_strings(&bios_advantech,
bios_oem, BIOS_OEM_MAXLEN) > 0 ) {
printf("Geode %s\n", bios_oem);
EVENTHANDLER_REGISTER(watchdog_list, advantech_watchdog,
NULL, 0);
}
+ break;
+ case 0x20801022:
+ if ( bios_oem_strings(&bios_soekris_55,
+ bios_oem, BIOS_OEM_MAXLEN) > 0 ) {
+ printf("Geode LX: %s\n", bios_oem);
+ led1b = 6;
+ led1 = led_create(cs5536_led_func, &led1b, "error");
+ }
+ printf("MFGPT bar: %jx\n", rdmsr(0x5140000d));
+ EVENTHANDLER_REGISTER(watchdog_list, cs5536_watchdog,
+ NULL, 0);
+ break;
}
return (ENXIO);
}
Index: initcpu.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/initcpu.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/i386/i386/initcpu.c -L sys/i386/i386/initcpu.c -u -r1.3 -r1.4
--- sys/i386/i386/initcpu.c
+++ sys/i386/i386/initcpu.c
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.52.2.3 2006/07/21 15:12:02 mr Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.56 2007/04/06 18:15:02 ru Exp $");
#include "opt_cpu.h"
@@ -41,6 +41,9 @@
#include <machine/md_var.h>
#include <machine/specialreg.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
@@ -686,6 +689,15 @@
break;
}
}
+#ifdef PAE
+ if ((amd_feature & AMDID_NX) != 0) {
+ uint64_t msr;
+
+ msr = rdmsr(MSR_EFER) | EFER_NXE;
+ wrmsr(MSR_EFER, msr);
+ pg_nx = PG_NX;
+ }
+#endif
break;
#endif
default:
Index: bios.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/bios.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/bios.c -L sys/i386/i386/bios.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/bios.c
+++ sys/i386/i386/bios.c
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/bios.c,v 1.72.2.1 2005/08/16 22:47:14 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/bios.c,v 1.74 2007/04/19 09:18:51 phk Exp $");
/*
* Code for dealing with the BIOS in x86 PC systems.
@@ -475,7 +475,8 @@
return (i);
}
-int bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen)
+int
+bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen)
{
size_t idx = 0;
struct bios_oem_signature *sig;
Index: legacy.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/legacy.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/legacy.c -L sys/i386/i386/legacy.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/legacy.c
+++ sys/i386/i386/legacy.c
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/legacy.c,v 1.61 2005/02/15 07:21:20 njl Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/legacy.c,v 1.63 2007/09/30 11:05:16 marius Exp $");
/*
* This code implements a system driver for legacy systems that do not
@@ -110,10 +110,10 @@
{
/*
- * Add child device with order of 1 so it gets probed
- * after ACPI (which is at order 0.
+ * Add child device with order of 11 so it gets probed
+ * after ACPI (which is at order 10).
*/
- if (BUS_ADD_CHILD(parent, 1, "legacy", 0) == NULL)
+ if (BUS_ADD_CHILD(parent, 11, "legacy", 0) == NULL)
panic("legacy: could not attach");
}
@@ -228,6 +228,9 @@
struct legacy_device *atdev = DEVTOAT(child);
switch (which) {
+ case LEGACY_IVAR_PCIDOMAIN:
+ *result = 0;
+ break;
case LEGACY_IVAR_PCIBUS:
*result = atdev->lg_pcibus;
break;
@@ -244,6 +247,8 @@
struct legacy_device *atdev = DEVTOAT(child);
switch (which) {
+ case LEGACY_IVAR_PCIDOMAIN:
+ return EINVAL;
case LEGACY_IVAR_PCIBUS:
atdev->lg_pcibus = value;
break;
Index: machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/machdep.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/i386/i386/machdep.c -L sys/i386/i386/machdep.c -u -r1.4 -r1.5
--- sys/i386/i386/machdep.c
+++ sys/i386/i386/machdep.c
@@ -14,7 +14,11 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -34,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.616.2.2 2006/02/07 00:29:33 davidxu Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.658.2.1.2.1 2008/01/19 18:15:03 kib Exp $");
#include "opt_apic.h"
#include "opt_atalk.h"
@@ -49,6 +53,7 @@
#include "opt_msgbuf.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
+#include "opt_xbox.h"
#include <sys/param.h>
#include <sys/proc.h>
@@ -57,6 +62,7 @@
#include <sys/buf.h>
#include <sys/bus.h>
#include <sys/callout.h>
+#include <sys/clock.h>
#include <sys/cons.h>
#include <sys/cpu.h>
#include <sys/eventhandler.h>
@@ -129,6 +135,13 @@
#include <i386/isa/icu.h>
#endif
+#ifdef XBOX
+#include <machine/xbox.h>
+
+int arch_i386_is_xbox = 0;
+uint32_t arch_i386_xbox_memsize = 0;
+#endif
+
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
@@ -161,24 +174,35 @@
extern vm_offset_t ksym_start, ksym_end;
#endif
+/* Intel ICH registers */
+#define ICH_PMBASE 0x400
+#define ICH_SMI_EN ICH_PMBASE + 0x30
+
int _udatasel, _ucodesel;
u_int basemem;
int cold = 1;
#ifdef COMPAT_43
-static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
+static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
#endif
#ifdef COMPAT_FREEBSD4
-static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
- u_long code);
+static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
#endif
long Maxmem = 0;
long realmem = 0;
-vm_paddr_t phys_avail[10];
-vm_paddr_t dump_avail[10];
+/*
+ * The number of PHYSMAP entries must be one less than the number of
+ * PHYSSEG entries because the PHYSMAP entry that spans the largest
+ * physical address that is accessible by ISA DMA is split into two
+ * PHYSSEG entries.
+ */
+#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
/* must be 2 less so 0 0 can signal end of chunks */
#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
@@ -199,6 +223,27 @@
cpu_startup(dummy)
void *dummy;
{
+ char *sysenv;
+
+ /*
+ * On MacBooks, we need to disallow the legacy USB circuit to
+ * generate an SMI# because this can cause several problems,
+ * namely: incorrect CPU frequency detection and failure to
+ * start the APs.
+ * We do this by disabling a bit in the SMI_EN (SMI Control and
+ * Enable register) of the Intel ICH LPC Interface Bridge.
+ */
+ sysenv = getenv("smbios.system.product");
+ if (sysenv != NULL) {
+ if (strncmp(sysenv, "MacBook", 7) == 0) {
+ if (bootverbose)
+ printf("Disabling LEGACY_USB_EN bit on "
+ "Intel ICH.\n");
+ outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
+ }
+ freeenv(sysenv);
+ }
+
/*
* Good {morning,afternoon,evening,night}.
*/
@@ -257,22 +302,20 @@
*/
#ifdef COMPAT_43
static void
-osendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
+osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
struct osigframe sf, *fp;
struct proc *p;
struct thread *td;
struct sigacts *psp;
struct trapframe *regs;
+ int sig;
int oonstack;
td = curthread;
p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
+ sig = ksi->ksi_signo;
psp = p->p_sigacts;
mtx_assert(&psp->ps_mtx, MA_OWNED);
regs = td->td_frame;
@@ -300,12 +343,12 @@
/* Signal handler installed with SA_SIGINFO. */
sf.sf_arg2 = (register_t)&fp->sf_siginfo;
sf.sf_siginfo.si_signo = sig;
- sf.sf_siginfo.si_code = code;
+ sf.sf_siginfo.si_code = ksi->ksi_code;
sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
} else {
/* Old FreeBSD-style arguments. */
- sf.sf_arg2 = code;
- sf.sf_addr = regs->tf_err;
+ sf.sf_arg2 = ksi->ksi_code;
+ sf.sf_addr = (register_t)ksi->ksi_addr;
sf.sf_ahu.sf_handler = catcher;
}
mtx_unlock(&psp->ps_mtx);
@@ -387,22 +430,20 @@
#ifdef COMPAT_FREEBSD4
static void
-freebsd4_sendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
+freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
struct sigframe4 sf, *sfp;
struct proc *p;
struct thread *td;
struct sigacts *psp;
struct trapframe *regs;
+ int sig;
int oonstack;
td = curthread;
p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
+ sig = ksi->ksi_signo;
psp = p->p_sigacts;
mtx_assert(&psp->ps_mtx, MA_OWNED);
regs = td->td_frame;
@@ -443,12 +484,12 @@
/* Fill in POSIX parts */
sf.sf_si.si_signo = sig;
- sf.sf_si.si_code = code;
- sf.sf_si.si_addr = (void *)regs->tf_err;
+ sf.sf_si.si_code = ksi->ksi_code;
+ sf.sf_si.si_addr = ksi->ksi_addr;
} else {
/* Old FreeBSD-style arguments. */
- sf.sf_siginfo = code;
- sf.sf_addr = regs->tf_err;
+ sf.sf_siginfo = ksi->ksi_code;
+ sf.sf_addr = (register_t)ksi->ksi_addr;
sf.sf_ahu.sf_handler = catcher;
}
mtx_unlock(&psp->ps_mtx);
@@ -508,11 +549,7 @@
#endif /* COMPAT_FREEBSD4 */
void
-sendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
+sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
struct sigframe sf, *sfp;
struct proc *p;
@@ -520,22 +557,24 @@
struct sigacts *psp;
char *sp;
struct trapframe *regs;
+ int sig;
int oonstack;
td = curthread;
p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
+ sig = ksi->ksi_signo;
psp = p->p_sigacts;
mtx_assert(&psp->ps_mtx, MA_OWNED);
#ifdef COMPAT_FREEBSD4
if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
- freebsd4_sendsig(catcher, sig, mask, code);
+ freebsd4_sendsig(catcher, ksi, mask);
return;
}
#endif
#ifdef COMPAT_43
if (SIGISMEMBER(psp->ps_osigset, sig)) {
- osendsig(catcher, sig, mask, code);
+ osendsig(catcher, ksi, mask);
return;
}
#endif
@@ -581,13 +620,12 @@
sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
/* Fill in POSIX parts */
- sf.sf_si.si_signo = sig;
- sf.sf_si.si_code = code;
- sf.sf_si.si_addr = (void *)regs->tf_err;
+ sf.sf_si = ksi->ksi_info;
+ sf.sf_si.si_signo = sig; /* maybe a translated signal */
} else {
/* Old FreeBSD-style arguments. */
- sf.sf_siginfo = code;
- sf.sf_addr = regs->tf_err;
+ sf.sf_siginfo = ksi->ksi_code;
+ sf.sf_addr = (register_t)ksi->ksi_addr;
sf.sf_ahu.sf_handler = catcher;
}
mtx_unlock(&psp->ps_mtx);
@@ -646,26 +684,6 @@
}
/*
- * Build siginfo_t for SA thread
- */
-void
-cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
-{
- struct proc *p;
- struct thread *td;
-
- td = curthread;
- p = td->td_proc;
- PROC_LOCK_ASSERT(p, MA_OWNED);
-
- bzero(si, sizeof(*si));
- si->si_signo = sig;
- si->si_code = code;
- si->si_addr = (void *)td->td_frame->tf_err;
- /* XXXKSE fill other fields */
-}
-
-/*
* System call to cleanup state after a signal
* has been taken. Reset signal mask and
* stack state from context left by sendsig (above).
@@ -689,6 +707,7 @@
struct osigcontext *scp;
struct proc *p = td->td_proc;
int eflags, error;
+ ksiginfo_t ksi;
regs = td->td_frame;
error = copyin(uap->sigcntxp, &sc, sizeof(sc));
@@ -711,8 +730,13 @@
return (EINVAL);
/* Go back to user mode if both flags are set. */
- if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
- trapsignal(td, SIGBUS, 0);
+ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
+ }
if (vm86->vm86_has_vme) {
eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
@@ -753,7 +777,12 @@
* other selectors, invalid %eip's and invalid %esp's.
*/
if (!CS_SECURE(scp->sc_cs)) {
- trapsignal(td, SIGBUS, T_PROTFLT);
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_trapno = T_PROTFLT;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
return (EINVAL);
}
regs->tf_ds = scp->sc_ds;
@@ -807,6 +836,7 @@
struct trapframe *regs;
const struct ucontext4 *ucp;
int cs, eflags, error;
+ ksiginfo_t ksi;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
@@ -829,9 +859,13 @@
return (EINVAL);
/* Go back to user mode if both flags are set. */
- if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
- trapsignal(td, SIGBUS, 0);
-
+ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
+ }
if (vm86->vm86_has_vme) {
eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
(eflags & VME_USERCHANGE) | PSL_VM;
@@ -876,7 +910,12 @@
cs = ucp->uc_mcontext.mc_cs;
if (!CS_SECURE(cs)) {
printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
- trapsignal(td, SIGBUS, T_PROTFLT);
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_trapno = T_PROTFLT;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
return (EINVAL);
}
@@ -906,7 +945,7 @@
sigreturn(td, uap)
struct thread *td;
struct sigreturn_args /* {
- const __ucontext *sigcntxp;
+ const struct __ucontext *sigcntxp;
} */ *uap;
{
ucontext_t uc;
@@ -914,6 +953,7 @@
struct trapframe *regs;
const ucontext_t *ucp;
int cs, eflags, error, ret;
+ ksiginfo_t ksi;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
@@ -936,8 +976,13 @@
return (EINVAL);
/* Go back to user mode if both flags are set. */
- if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
- trapsignal(td, SIGBUS, 0);
+ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
+ }
if (vm86->vm86_has_vme) {
eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
@@ -983,7 +1028,12 @@
cs = ucp->uc_mcontext.mc_cs;
if (!CS_SECURE(cs)) {
printf("sigreturn: cs = 0x%x\n", cs);
- trapsignal(td, SIGBUS, T_PROTFLT);
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGBUS;
+ ksi.ksi_code = BUS_OBJERR;
+ ksi.ksi_trapno = T_PROTFLT;
+ ksi.ksi_addr = (void *)regs->tf_eip;
+ trapsignal(td, &ksi);
return (EINVAL);
}
@@ -1039,9 +1089,9 @@
#ifdef SMP
/* Schedule ourselves on the indicated cpu. */
- mtx_lock_spin(&sched_lock);
+ thread_lock(curthread);
sched_bind(curthread, cpu_id);
- mtx_unlock_spin(&sched_lock);
+ thread_unlock(curthread);
#endif
/* Calibrate by measuring a short delay. */
@@ -1052,9 +1102,9 @@
intr_restore(reg);
#ifdef SMP
- mtx_lock_spin(&sched_lock);
+ thread_lock(curthread);
sched_unbind(curthread);
- mtx_unlock_spin(&sched_lock);
+ thread_unlock(curthread);
#endif
/*
@@ -1093,6 +1143,7 @@
* help lock contention somewhat, and this is critical for HTT. -Peter
*/
static int cpu_idle_hlt = 1;
+TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt);
SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
&cpu_idle_hlt, 0, "Idle loop HLT enable");
@@ -1151,8 +1202,11 @@
pcb->pcb_gs = _udatasel;
load_gs(_udatasel);
+ mtx_lock_spin(&dt_lock);
if (td->td_proc->p_md.md_ldt)
user_ldt_free(td);
+ else
+ mtx_unlock_spin(&dt_lock);
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_eip = entry;
@@ -1218,38 +1272,28 @@
unsigned int cr0;
cr0 = rcr0();
+
/*
- * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
- * BSP. See the comments there about why we set them.
+ * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
+ *
+ * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
+ * instructions. We must set the CR0_MP bit and use the CR0_TS
+ * bit to control the trap, because setting the CR0_EM bit does
+ * not cause WAIT instructions to trap. It's important to trap
+ * WAIT instructions - otherwise the "wait" variants of no-wait
+ * control instructions would degenerate to the "no-wait" variants
+ * after FP context switches but work correctly otherwise. It's
+ * particularly important to trap WAITs when there is no NPX -
+ * otherwise the "wait" variants would always degenerate.
+ *
+ * Try setting CR0_NE to get correct error reporting on 486DX's.
+ * Setting it should fail or do nothing on lesser processors.
*/
cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
load_cr0(cr0);
load_gs(_udatasel);
}
-static int
-sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
-{
- int error;
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
- req);
- if (!error && req->newptr)
- resettodr();
- return (error);
-}
-
-SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
- &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
-
-SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
- CTLFLAG_RW, &disable_rtc_set, 0, "");
-
-SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
- CTLFLAG_RD, &bootinfo, bootinfo, "");
-
-SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
- CTLFLAG_RW, &wall_cmos_clock, 0, "");
-
u_long bootdev; /* not a struct cdev *- encoding is different */
SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
@@ -1268,8 +1312,7 @@
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
union descriptor ldt[NLDT]; /* local descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
-
-int private_tss; /* flag indicating private tss */
+struct mtx dt_lock; /* lock for GDT and LDT */
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -1540,8 +1583,6 @@
ip->gd_hioffset = ((int)func)>>16 ;
}
-#define IDTVEC(name) __CONCAT(X,name)
-
extern inthand_t
IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
@@ -1557,12 +1598,11 @@
DB_SHOW_COMMAND(idt, db_show_idt)
{
struct gate_descriptor *ip;
- int idx, quit;
+ int idx;
uintptr_t func;
ip = idt;
- db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
- for (idx = 0, quit = 0; idx < NIDT; idx++) {
+ for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
func = (ip->gd_hioffset << 16 | ip->gd_looffset);
if (func != (uintptr_t)&IDTVEC(rsvd)) {
db_printf("%3d\t", idx);
@@ -1572,6 +1612,25 @@
ip++;
}
}
+
+/* Show privileged registers. */
+DB_SHOW_COMMAND(sysregs, db_show_sysregs)
+{
+ uint64_t idtr, gdtr;
+
+ idtr = ridt();
+ db_printf("idtr\t0x%08x/%04x\n",
+ (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
+ gdtr = rgdt();
+ db_printf("gdtr\t0x%08x/%04x\n",
+ (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
+ db_printf("ldtr\t0x%04x\n", rldt());
+ db_printf("tr\t0x%04x\n", rtr());
+ db_printf("cr0\t0x%08x\n", rcr0());
+ db_printf("cr2\t0x%08x\n", rcr2());
+ db_printf("cr3\t0x%08x\n", rcr3());
+ db_printf("cr4\t0x%08x\n", rcr4());
+}
#endif
void
@@ -1588,8 +1647,6 @@
ssd->ssd_gran = sd->sd_gran;
}
-#define PHYSMAP_SIZE (2 * 8)
-
/*
* Populate the (physmap) array with base/bound pairs describing the
* available physical memory in the system, then test this memory and
@@ -1606,8 +1663,8 @@
static void
getmemsize(int first)
{
- int i, physmap_idx, pa_indx, da_indx;
- int hasbrokenint12;
+ int i, off, physmap_idx, pa_indx, da_indx;
+ int hasbrokenint12, has_smap;
u_long physmem_tunable;
u_int extmem;
struct vm86frame vmf;
@@ -1617,6 +1674,20 @@
struct bios_smap *smap;
quad_t dcons_addr, dcons_size;
+ has_smap = 0;
+#ifdef XBOX
+ if (arch_i386_is_xbox) {
+ /*
+ * We queried the memory size before, so chop off 4MB for
+ * the framebuffer and inform the OS of this.
+ */
+ physmap[0] = 0;
+ physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE;
+ physmap_idx = 0;
+ goto physmap_done;
+ }
+#endif
+
hasbrokenint12 = 0;
TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
bzero(&vmf, sizeof(vmf));
@@ -1703,6 +1774,7 @@
if (boothowto & RB_VERBOSE)
printf("SMAP type=%02x base=%016llx len=%016llx\n",
smap->type, smap->base, smap->length);
+ has_smap = 1;
if (smap->type != 0x01)
continue;
@@ -1722,7 +1794,7 @@
if (smap->base < physmap[i + 1]) {
if (boothowto & RB_VERBOSE)
printf(
- "Overlapping or non-montonic memory region, ignoring second region\n");
+ "Overlapping or non-monotonic memory region, ignoring second region\n");
continue;
}
}
@@ -1844,6 +1916,13 @@
if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
Maxmem = atop(physmem_tunable);
+ /*
+ * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
+ * the amount of memory in the system.
+ */
+ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
+ Maxmem = atop(physmap[physmap_idx + 1]);
+
if (atop(physmap[physmap_idx + 1]) != Maxmem &&
(boothowto & RB_VERBOSE))
printf("Physical memory use set to %ldK\n", Maxmem * 4);
@@ -1856,7 +1935,7 @@
physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
/* call pmap initialization to make new kernel address space */
- pmap_bootstrap(first, 0);
+ pmap_bootstrap(first);
/*
* Size up each available chunk of physical memory.
@@ -2012,7 +2091,10 @@
/* Trim off space for the message buffer. */
phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
- avail_end = phys_avail[pa_indx];
+ /* Map the message buffer. */
+ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
+ pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
+ off);
}
void
@@ -2020,7 +2102,7 @@
int first;
{
struct gate_descriptor *gdp;
- int gsel_tss, metadata_missing, off, x;
+ int gsel_tss, metadata_missing, x;
struct pcpu *pc;
thread0.td_kstack = proc0kstack;
@@ -2031,7 +2113,7 @@
* This may be done better later if it gets more high level
* components in it. If so just link td->td_proc here.
*/
- proc_linkup(&proc0, &ksegrp0, &thread0);
+ proc_linkup0(&proc0, &thread0);
metadata_missing = 0;
if (bootinfo.bi_modulep) {
@@ -2073,6 +2155,7 @@
r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
r_gdt.rd_base = (int) gdt;
+ mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
lgdt(&r_gdt);
pcpu_init(pc, 0, sizeof(struct pcpu));
@@ -2089,8 +2172,7 @@
* under witness.
*/
mutex_init();
- mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
- mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+ mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
/* make ldt memory segments */
ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
@@ -2150,6 +2232,34 @@
r_idt.rd_base = (int) idt;
lidt(&r_idt);
+#ifdef XBOX
+ /*
+ * The following code queries the PCI ID of 0:0:0. For the XBOX,
+ * This should be 0x10de / 0x02a5.
+ *
+ * This is exactly what Linux does.
+ */
+ outl(0xcf8, 0x80000000);
+ if (inl(0xcfc) == 0x02a510de) {
+ arch_i386_is_xbox = 1;
+ pic16l_setled(XBOX_LED_GREEN);
+
+ /*
+ * We are an XBOX, but we may have either 64MB or 128MB of
+ * memory. The PCI host bridge should be programmed for this,
+ * so we just query it.
+ */
+ outl(0xcf8, 0x80000084);
+ arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
+ }
+#endif /* XBOX */
+
+ /*
+ * Initialize the i8254 before the console so that console
+ * initialization can use DELAY().
+ */
+ i8254_init();
+
/*
* Initialize the console before we print anything out.
*/
@@ -2188,7 +2298,6 @@
KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- private_tss = 0;
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
@@ -2220,10 +2329,6 @@
/* now running on new page tables, configured,and u/iom is accessible */
- /* Map the message buffer. */
- for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
- pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
-
msgbufinit(msgbufp, MSGBUF_SIZE);
/* make a call gate to reenter kernel with */
@@ -2249,7 +2354,7 @@
_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
/* setup proc 0's pcb */
- thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+ thread0.td_pcb->pcb_flags = 0;
#ifdef PAE
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
@@ -2650,8 +2755,8 @@
}
#ifdef DEV_NPX
#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr)
- addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
+ if (cpu_fxsr)
+ addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
#endif
/*
* XXX we violate the dubious requirement that npxsetregs()
@@ -2726,7 +2831,6 @@
{
struct pcb *pcb;
int i;
- u_int32_t mask1, mask2;
if (td == NULL) {
load_dr0(dbregs->dr[0]);
@@ -2744,10 +2848,12 @@
* result in undefined behaviour and can lead to an unexpected
* TRCTRAP.
*/
- for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
- i++, mask1 <<= 2, mask2 <<= 2)
- if ((dbregs->dr[7] & mask1) == mask2)
+ for (i = 0; i < 4; i++) {
+ if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
return (EINVAL);
+ if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
+ return (EINVAL);
+ }
pcb = td->td_pcb;
@@ -2765,25 +2871,25 @@
* from within kernel mode?
*/
- if (dbregs->dr[7] & 0x3) {
+ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
/* dr0 is enabled */
if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
return (EINVAL);
}
- if (dbregs->dr[7] & (0x3<<2)) {
+ if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
/* dr1 is enabled */
if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
return (EINVAL);
}
- if (dbregs->dr[7] & (0x3<<4)) {
+ if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
/* dr2 is enabled */
if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
return (EINVAL);
}
- if (dbregs->dr[7] & (0x3<<6)) {
+ if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
/* dr3 is enabled */
if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
return (EINVAL);
@@ -2855,9 +2961,8 @@
addr[nbp++] = (caddr_t)rdr3();
}
- for (i=0; i<nbp; i++) {
- if (addr[i] <
- (caddr_t)VM_MAXUSER_ADDRESS) {
+ for (i = 0; i < nbp; i++) {
+ if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
/*
* addr[i] is in user space
*/
@@ -2886,7 +2991,7 @@
}
void *
-ioapic_create(uintptr_t addr, int32_t id, int intbase)
+ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase)
{
return (NULL);
}
@@ -2944,7 +3049,7 @@
}
void
-lapic_init(uintptr_t addr)
+lapic_init(vm_paddr_t addr)
{
}
Index: pmap.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/pmap.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/i386/i386/pmap.c -L sys/i386/i386/pmap.c -u -r1.2 -r1.3
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -75,7 +75,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.523.2.6 2006/03/08 23:59:41 tegge Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.594.2.4.2.1 2008/01/19 18:15:03 kib Exp $");
/*
* Manages physical address maps.
@@ -106,6 +106,8 @@
#include "opt_cpu.h"
#include "opt_pmap.h"
#include "opt_msgbuf.h"
+#include "opt_smp.h"
+#include "opt_xbox.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -144,6 +146,10 @@
#include <machine/smp.h>
#endif
+#ifdef XBOX
+#include <machine/xbox.h>
+#endif
+
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
@@ -157,11 +163,18 @@
#endif
#if !defined(PMAP_DIAGNOSTIC)
-#define PMAP_INLINE __inline
+#define PMAP_INLINE __gnu89_inline
#else
#define PMAP_INLINE
#endif
+#define PV_STATS
+#ifdef PV_STATS
+#define PV_STAT(x) do { x ; } while (0)
+#else
+#define PV_STAT(x) do { } while (0)
+#endif
+
/*
* Get PDEs and PTEs for user/kernel address space
*/
@@ -183,7 +196,6 @@
static struct pmaplist allpmaps;
static struct mtx allpmaps_lock;
-vm_paddr_t avail_end; /* PA of last available physical page */
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
int pgeflag = 0; /* PG_G or-in */
@@ -194,16 +206,19 @@
extern u_int32_t KERNend;
#ifdef PAE
+pt_entry_t pg_nx;
static uma_zone_t pdptzone;
#endif
/*
* Data for the pv entry allocation mechanism
*/
-static uma_zone_t pvzone;
-static struct vm_object pvzone_obj;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-int pmap_pagedaemon_waken;
+static int shpgperproc = PMAP_SHPGPERPROC;
+
+struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
+int pv_maxchunks; /* How many chunks we have KVA for */
+vm_offset_t pv_vafree; /* freelist stored in the PTE */
/*
* All those kernel PT submaps that BSD is so fond of
@@ -249,23 +264,28 @@
"Number of times pmap_pte_quick didn't change PMAP1");
static struct mtx PMAP2mutex;
-static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
-static pv_entry_t get_pv_entry(void);
-static void pmap_clear_ptes(vm_page_t m, int bit);
+static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
+ vm_page_t *free);
+static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
+ vm_page_t *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
vm_offset_t va);
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
+static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
+ vm_page_t m);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
-static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
+static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
#ifdef PAE
static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
@@ -303,9 +323,7 @@
* (physical) address starting relative to 0]
*/
void
-pmap_bootstrap(firstaddr, loadaddr)
- vm_paddr_t firstaddr;
- vm_paddr_t loadaddr;
+pmap_bootstrap(vm_paddr_t firstaddr)
{
vm_offset_t va;
pt_entry_t *pte, *unused;
@@ -334,7 +352,7 @@
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
kernel_pmap->pm_active = -1; /* don't allow deactivation */
- TAILQ_INIT(&kernel_pmap->pm_pvlist);
+ TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
mtx_lock_spin(&allpmaps_lock);
@@ -392,14 +410,71 @@
virtual_avail = va;
*CMAP1 = 0;
- for (i = 0; i < NKPT; i++)
+
+ /*
+ * Leave in place an identity mapping (virt == phys) for the low 1 MB
+ * physical memory region that is used by the ACPI wakeup code. This
+ * mapping must not have PG_G set.
+ */
+#ifdef XBOX
+ /* FIXME: This is gross, but needed for the XBOX. Since we are in such
+ * an early stadium, we cannot yet neatly map video memory ... :-(
+ * Better fixes are very welcome! */
+ if (!arch_i386_is_xbox)
+#endif
+ for (i = 1; i < NKPT; i++)
PTD[i] = 0;
+ /* Initialize the PAT MSR if present. */
+ pmap_init_pat();
+
/* Turn on PG_G on kernel page(s) */
pmap_set_pg();
}
/*
+ * Setup the PAT MSR.
+ */
+void
+pmap_init_pat(void)
+{
+ uint64_t pat_msr;
+
+ /* Bail if this CPU doesn't implement PAT. */
+ if (!(cpu_feature & CPUID_PAT))
+ return;
+
+#ifdef PAT_WORKS
+ /*
+ * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
+ * Program 4 and 5 as WP and WC.
+ * Leave 6 and 7 as UC and UC-.
+ */
+ pat_msr = rdmsr(MSR_PAT);
+ pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
+ pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
+ PAT_VALUE(5, PAT_WRITE_COMBINING);
+#else
+ /*
+ * Due to some Intel errata, we can only safely use the lower 4
+ * PAT entries. Thus, just replace PAT Index 2 with WC instead
+ * of UC-.
+ *
+ * Intel Pentium III Processor Specification Update
+ * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
+ * or Mode C Paging)
+ *
+ * Intel Pentium IV Processor Specification Update
+ * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
+ */
+ pat_msr = rdmsr(MSR_PAT);
+ pat_msr &= ~PAT_MASK(2);
+ pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
+#endif
+ wrmsr(MSR_PAT, pat_msr);
+}
+
+/*
* Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on.
*/
void
@@ -463,6 +538,61 @@
#endif
/*
+ * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
+ * Requirements:
+ * - Must deal with pages in order to ensure that none of the PG_* bits
+ * are ever set, PG_V in particular.
+ * - Assumes we can write to ptes without pte_store() atomic ops, even
+ * on PAE systems. This should be ok.
+ * - Assumes nothing will ever test these addresses for 0 to indicate
+ * no mapping instead of correctly checking PG_V.
+ * - Assumes a vm_offset_t will fit in a pte (true for i386).
+ * Because PG_V is never set, there can be no mappings to invalidate.
+ */
+static vm_offset_t
+pmap_ptelist_alloc(vm_offset_t *head)
+{
+ pt_entry_t *pte;
+ vm_offset_t va;
+
+ va = *head;
+ if (va == 0)
+ return (va); /* Out of memory */
+ pte = vtopte(va);
+ *head = *pte;
+ if (*head & PG_V)
+ panic("pmap_ptelist_alloc: va with PG_V set!");
+ *pte = 0;
+ return (va);
+}
+
+static void
+pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
+{
+ pt_entry_t *pte;
+
+ if (va & PG_V)
+ panic("pmap_ptelist_free: freeing va with PG_V set!");
+ pte = vtopte(va);
+ *pte = *head; /* virtual! PG_V is 0 though */
+ *head = va;
+}
+
+static void
+pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
+{
+ int i;
+ vm_offset_t va;
+
+ *head = 0;
+ for (i = npages - 1; i >= 0; i--) {
+ va = (vm_offset_t)base + i * PAGE_SIZE;
+ pmap_ptelist_free(head, va);
+ }
+}
+
+
+/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
* system needs to map virtual memory.
@@ -470,21 +600,24 @@
void
pmap_init(void)
{
- int shpgperproc = PMAP_SHPGPERPROC;
/*
* Initialize the address space (zone) for the pv entries. Set a
* high water mark so that the system can recover from excessive
* numbers of pv entries.
*/
- pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
+ pv_entry_max = roundup(pv_entry_max, _NPCPV);
pv_entry_high_water = 9 * (pv_entry_max / 10);
- uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+ pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
+ pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
+ PAGE_SIZE * pv_maxchunks);
+ if (pv_chunkbase == NULL)
+ panic("pmap_init: not enough kvm for pv chunks");
+ pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
#ifdef PAE
pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
@@ -493,53 +626,114 @@
#endif
}
-void
-pmap_init2()
-{
-}
+SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
+ "Max number of PV entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
+ "Page share factor per proc");
/***************************************************
* Low level helper routines.....
***************************************************/
-#if defined(PMAP_DIAGNOSTIC)
-
/*
- * This code checks for non-writeable/modified pages.
- * This should be an invalid condition.
+ * Determine the appropriate bits to set in a PTE or PDE for a specified
+ * caching mode.
*/
static int
-pmap_nw_modified(pt_entry_t ptea)
+pmap_cache_bits(int mode, boolean_t is_pde)
{
- int pte;
-
- pte = (int) ptea;
-
- if ((pte & (PG_M|PG_RW)) == PG_M)
- return 1;
- else
- return 0;
-}
-#endif
+ int pat_flag, pat_index, cache_bits;
+ /* The PAT bit is different for PTE's and PDE's. */
+ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
-/*
- * this routine defines the region(s) of memory that should
- * not be tested for the modified bit.
- */
-static PMAP_INLINE int
-pmap_track_modified(vm_offset_t va)
-{
- if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
- return 1;
- else
- return 0;
+ /* If we don't support PAT, map extended modes to older ones. */
+ if (!(cpu_feature & CPUID_PAT)) {
+ switch (mode) {
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_THROUGH:
+ case PAT_WRITE_BACK:
+ break;
+ case PAT_UNCACHED:
+ case PAT_WRITE_COMBINING:
+ case PAT_WRITE_PROTECTED:
+ mode = PAT_UNCACHEABLE;
+ break;
+ }
+ }
+
+ /* Map the caching mode to a PAT index. */
+ switch (mode) {
+#ifdef PAT_WORKS
+ case PAT_UNCACHEABLE:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_UNCACHED:
+ pat_index = 2;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 5;
+ break;
+ case PAT_WRITE_PROTECTED:
+ pat_index = 4;
+ break;
+#else
+ case PAT_UNCACHED:
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_PROTECTED:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 2;
+ break;
+#endif
+ default:
+ panic("Unknown caching mode %d\n", mode);
+ }
+
+ /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+ cache_bits = 0;
+ if (pat_index & 0x4)
+ cache_bits |= pat_flag;
+ if (pat_index & 0x2)
+ cache_bits |= PG_NC_PCD;
+ if (pat_index & 0x1)
+ cache_bits |= PG_NC_PWT;
+ return (cache_bits);
}
-
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
+ *
+ * N.B.: Before calling any of the following TLB invalidation functions,
+ * the calling processor must ensure that all stores updating a non-
+ * kernel page table are globally performed. Otherwise, another
+ * processor could cache an old, pre-update entry without being
+ * invalidated. This can happen one of two ways: (1) The pmap becomes
+ * active on another processor after its pm_active field is checked by
+ * one of the following functions but before a store updating the page
+ * table is globally performed. (2) The pmap becomes active on another
+ * processor before its pm_active field is checked but due to
+ * speculative loads one of the following functions stills reads the
+ * pmap as inactive on the other processor.
+ *
+ * The kernel page table is exempt because its pm_active field is
+ * immutable. The kernel page table is always active on every
+ * processor.
*/
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
@@ -547,18 +741,7 @@
u_int cpumask;
u_int other_cpus;
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
+ sched_pin();
if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
invlpg(va);
smp_invlpg(va);
@@ -570,10 +753,7 @@
if (pmap->pm_active & other_cpus)
smp_masked_invlpg(pmap->pm_active & other_cpus, va);
}
- if (smp_started)
- mtx_unlock_spin(&smp_ipi_mtx);
- else
- critical_exit();
+ sched_unpin();
}
void
@@ -583,18 +763,7 @@
u_int other_cpus;
vm_offset_t addr;
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
+ sched_pin();
if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
@@ -609,10 +778,7 @@
smp_masked_invlpg_range(pmap->pm_active & other_cpus,
sva, eva);
}
- if (smp_started)
- mtx_unlock_spin(&smp_ipi_mtx);
- else
- critical_exit();
+ sched_unpin();
}
void
@@ -621,18 +787,7 @@
u_int cpumask;
u_int other_cpus;
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
+ sched_pin();
if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
invltlb();
smp_invltlb();
@@ -644,10 +799,17 @@
if (pmap->pm_active & other_cpus)
smp_masked_invltlb(pmap->pm_active & other_cpus);
}
- if (smp_started)
- mtx_unlock_spin(&smp_ipi_mtx);
- else
- critical_exit();
+ sched_unpin();
+}
+
+void
+pmap_invalidate_cache(void)
+{
+
+ sched_pin();
+ wbinvd();
+ smp_cache_flush();
+ sched_unpin();
}
#else /* !SMP */
/*
@@ -679,6 +841,13 @@
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
+
+PMAP_INLINE void
+pmap_invalidate_cache(void)
+{
+
+ wbinvd();
+}
#endif /* !SMP */
/*
@@ -808,7 +977,7 @@
pde = pmap->pm_pdir[va >> PDRSHIFT];
if (pde != 0) {
if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+ rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
PMAP_UNLOCK(pmap);
return rtval;
}
@@ -841,7 +1010,7 @@
if (pde != 0) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
+ m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
(va & PDRMASK));
vm_page_hold(m);
}
@@ -878,6 +1047,15 @@
pte_store(pte, pa | PG_RW | PG_V | pgeflag);
}
+PMAP_INLINE void
+pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
+{
+ pt_entry_t *pte;
+
+ pte = vtopte(va);
+ pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
+}
+
/*
* Remove a page from the kernel pagetables.
* Note: not SMP coherent.
@@ -930,17 +1108,22 @@
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
-pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
- vm_offset_t va;
+ pt_entry_t *endpte, oldpte, *pte;
- va = sva;
- while (count-- > 0) {
- pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
- va += PAGE_SIZE;
- m++;
- }
- pmap_invalidate_range(kernel_pmap, sva, va);
+ oldpte = 0;
+ pte = vtopte(sva);
+ endpte = pte + count;
+ while (pte < endpte) {
+ oldpte |= *pte;
+ pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V);
+ pte++;
+ ma++;
+ }
+ if ((oldpte & PG_V) != 0)
+ pmap_invalidate_range(kernel_pmap, sva, sva + count *
+ PAGE_SIZE);
}
/*
@@ -964,24 +1147,35 @@
/***************************************************
* Page table page management routines.....
***************************************************/
+static __inline void
+pmap_free_zero_pages(vm_page_t free)
+{
+ vm_page_t m;
+
+ while (free != NULL) {
+ m = free;
+ free = m->right;
+ vm_page_free_zero(m);
+ }
+}
/*
* This routine unholds page table pages, and if the hold count
* drops to zero, then it decrements the wire count.
*/
-static PMAP_INLINE int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+static __inline int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
{
--m->wire_count;
if (m->wire_count == 0)
- return _pmap_unwire_pte_hold(pmap, m);
+ return _pmap_unwire_pte_hold(pmap, m, free);
else
return 0;
}
static int
-_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
{
vm_offset_t pteva;
@@ -992,14 +1186,26 @@
--pmap->pm_stats.resident_count;
/*
+ * This is a release store so that the ordinary store unmapping
+ * the page table page is globally performed before TLB shoot-
+ * down is begun.
+ */
+ atomic_subtract_rel_int(&cnt.v_wire_count, 1);
+
+ /*
* Do an invltlb to make the invalidated mapping
* take effect immediately.
*/
pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
pmap_invalidate_page(pmap, pteva);
- vm_page_free_zero(m);
- atomic_subtract_int(&cnt.v_wire_count, 1);
+ /*
+ * Put page on a list so that it is released after
+ * *ALL* TLB shootdown is done
+ */
+ m->right = *free;
+ *free = m;
+
return 1;
}
@@ -1008,7 +1214,7 @@
* conditionally free the page, and manage the hold/wire counts.
*/
static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
{
pd_entry_t ptepde;
vm_page_t mpte;
@@ -1017,12 +1223,11 @@
return 0;
ptepde = *pmap_pde(pmap, va);
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return pmap_unwire_pte_hold(pmap, mpte);
+ return pmap_unwire_pte_hold(pmap, mpte, free);
}
void
-pmap_pinit0(pmap)
- struct pmap *pmap;
+pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
@@ -1032,7 +1237,7 @@
#endif
pmap->pm_active = 0;
PCPU_SET(curpmap, pmap);
- TAILQ_INIT(&pmap->pm_pvlist);
+ TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
@@ -1043,9 +1248,8 @@
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
*/
-void
-pmap_pinit(pmap)
- register struct pmap *pmap;
+int
+pmap_pinit(pmap_t pmap)
{
vm_page_t m, ptdpg[NPGPTD];
vm_paddr_t pa;
@@ -1061,6 +1265,11 @@
if (pmap->pm_pdir == NULL) {
pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
NBPTD);
+
+ if (pmap->pm_pdir == NULL) {
+ PMAP_LOCK_DESTROY(pmap);
+ return (0);
+ }
#ifdef PAE
pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
KASSERT(((vm_offset_t)pmap->pm_pdpt &
@@ -1112,8 +1321,10 @@
}
pmap->pm_active = 0;
- TAILQ_INIT(&pmap->pm_pvlist);
+ TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+
+ return (1);
}
/*
@@ -1203,7 +1414,7 @@
* hold count, and activate it.
*/
if (ptepa) {
- m = PHYS_TO_VM_PAGE(ptepa);
+ m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
m->wire_count++;
} else {
/*
@@ -1238,6 +1449,9 @@
{
u_int mymask = PCPU_GET(cpumask);
+#ifdef COUNT_IPIS
+ (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
+#endif
if (rcr3() == lazyptd)
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
atomic_clear_int(lazymask, mymask);
@@ -1259,7 +1473,7 @@
{
u_int mymask;
u_int mask;
- register u_int spins;
+ u_int spins;
while ((mask = pmap->pm_active) != 0) {
spins = 50000000;
@@ -1333,7 +1547,8 @@
mtx_unlock_spin(&allpmaps_lock);
for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]);
+ ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
+ PG_FRAME);
bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
sizeof(*pmap->pm_pdir));
@@ -1343,7 +1558,6 @@
pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
- vm_page_lock_queues();
for (i = 0; i < NPGPTD; i++) {
m = ptdpg[i];
#ifdef PAE
@@ -1354,7 +1568,6 @@
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
}
- vm_page_unlock_queues();
PMAP_LOCK_DESTROY(pmap);
}
@@ -1450,35 +1663,260 @@
* page management routines.
***************************************************/
+CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
+CTASSERT(_NPCM == 11);
+
+static __inline struct pv_chunk *
+pv_to_chunk(pv_entry_t pv)
+{
+
+ return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
+}
+
+#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
+
+#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
+#define PC_FREE10 0x0000fffful /* Free values for index 10 */
+
+static uint32_t pc_freemask[11] = {
+ PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
+ PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
+ PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
+ PC_FREE0_9, PC_FREE10
+};
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
+ "Current number of pv entries");
+
+#ifdef PV_STATS
+static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
+ "Current number of pv entry chunks");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
+ "Current number of pv entry chunks allocated");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
+ "Current number of pv entry chunks frees");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
+ "Number of times tried to get a chunk page but failed.");
+
+static long pv_entry_frees, pv_entry_allocs;
+static int pv_entry_spare;
+
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
+ "Current number of pv entry frees");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
+ "Current number of pv entry allocs");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
+ "Current number of spare pv entries");
+
+static int pmap_collect_inactive, pmap_collect_active;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
+ "Current number times pmap_collect called on inactive queue");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
+ "Current number times pmap_collect called on active queue");
+#endif
+
+/*
+ * We are in a serious low memory condition. Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk. This is normally called to
+ * unmap inactive pages, and if necessary, active pages.
+ */
+static void
+pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
+{
+ pmap_t pmap;
+ pt_entry_t *pte, tpte;
+ pv_entry_t next_pv, pv;
+ vm_offset_t va;
+ vm_page_t m, free;
+
+ sched_pin();
+ TAILQ_FOREACH(m, &vpq->pl, pageq) {
+ if (m->hold_count || m->busy)
+ continue;
+ TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
+ va = pv->pv_va;
+ pmap = PV_PMAP(pv);
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap)
+ PMAP_LOCK(pmap);
+ else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
+ continue;
+ pmap->pm_stats.resident_count--;
+ pte = pmap_pte_quick(pmap, va);
+ tpte = pte_load_clear(pte);
+ KASSERT((tpte & PG_W) == 0,
+ ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
+ if (tpte & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+ if (tpte & PG_M) {
+ KASSERT((tpte & PG_RW),
+ ("pmap_collect: modified page not writable: va: %#x, pte: %#jx",
+ va, (uintmax_t)tpte));
+ vm_page_dirty(m);
+ }
+ free = NULL;
+ pmap_unuse_pt(pmap, va, &free);
+ pmap_invalidate_page(pmap, va);
+ pmap_free_zero_pages(free);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ m->md.pv_list_count--;
+ free_pv_entry(pmap, pv);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ }
+ }
+ sched_unpin();
+}
+
+
/*
* free the pv_entry back to the free list
*/
-static PMAP_INLINE void
-free_pv_entry(pv_entry_t pv)
+static void
+free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
+ vm_page_t m;
+ struct pv_chunk *pc;
+ int idx, field, bit;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PV_STAT(pv_entry_frees++);
+ PV_STAT(pv_entry_spare++);
pv_entry_count--;
- uma_zfree(pvzone, pv);
+ pc = pv_to_chunk(pv);
+ idx = pv - &pc->pc_pventry[0];
+ field = idx / 32;
+ bit = idx % 32;
+ pc->pc_map[field] |= 1ul << bit;
+ /* move to head of list */
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ for (idx = 0; idx < _NPCM; idx++)
+ if (pc->pc_map[idx] != pc_freemask[idx])
+ return;
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ /* entire chunk is free, return it */
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
+ pmap_qremove((vm_offset_t)pc, 1);
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
}
/*
* get a new pv_entry, allocating a block from the system
* when needed.
- * the memory allocation is performed bypassing the malloc code
- * because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
-get_pv_entry(void)
+get_pv_entry(pmap_t pmap, int try)
{
+ static const struct timeval printinterval = { 60, 0 };
+ static struct timeval lastprint;
+ static vm_pindex_t colour;
+ int bit, field;
+ pv_entry_t pv;
+ struct pv_chunk *pc;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PV_STAT(pv_entry_allocs++);
pv_entry_count++;
- if ((pv_entry_count > pv_entry_high_water) &&
- (pmap_pagedaemon_waken == 0)) {
- pmap_pagedaemon_waken = 1;
- wakeup (&vm_pages_needed);
+ if (pv_entry_count > pv_entry_high_water)
+ if (ratecheck(&lastprint, &printinterval))
+ printf("Approaching the limit on PV entries, consider "
+ "increasing either the vm.pmap.shpgperproc or the "
+ "vm.pmap.pv_entry_max tunable.\n");
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ if (pc != NULL) {
+ for (field = 0; field < _NPCM; field++) {
+ if (pc->pc_map[field]) {
+ bit = bsfl(pc->pc_map[field]);
+ break;
+ }
+ }
+ if (field < _NPCM) {
+ pv = &pc->pc_pventry[field * 32 + bit];
+ pc->pc_map[field] &= ~(1ul << bit);
+ /* If this was the last item, move it to tail */
+ for (field = 0; field < _NPCM; field++)
+ if (pc->pc_map[field] != 0) {
+ PV_STAT(pv_entry_spare--);
+ return (pv); /* not full, return */
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(pv_entry_spare--);
+ return (pv);
+ }
}
- return uma_zalloc(pvzone, M_NOWAIT);
+ pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
+ m = vm_page_alloc(NULL, colour, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+ if (m == NULL || pc == NULL) {
+ if (try) {
+ pv_entry_count--;
+ PV_STAT(pc_chunk_tryfail++);
+ if (m) {
+ vm_page_lock_queues();
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ vm_page_unlock_queues();
+ }
+ if (pc)
+ pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ return (NULL);
+ }
+ /*
+ * Reclaim pv entries: At first, destroy mappings to
+ * inactive pages. After that, if a pv chunk entry
+ * is still needed, destroy mappings to active pages.
+ */
+ PV_STAT(pmap_collect_inactive++);
+ pmap_collect(pmap, &vm_page_queues[PQ_INACTIVE]);
+ if (m == NULL)
+ m = vm_page_alloc(NULL, colour, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+ if (pc == NULL)
+ pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
+ if (m == NULL || pc == NULL) {
+ PV_STAT(pmap_collect_active++);
+ pmap_collect(pmap, &vm_page_queues[PQ_ACTIVE]);
+ if (m == NULL)
+ m = vm_page_alloc(NULL, colour,
+ VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (pc == NULL)
+ pc = (struct pv_chunk *)
+ pmap_ptelist_alloc(&pv_vafree);
+ if (m == NULL || pc == NULL)
+ panic("get_pv_entry: increase vm.pmap.shpgperproc");
+ }
+ }
+ PV_STAT(pc_chunk_count++);
+ PV_STAT(pc_chunk_allocs++);
+ colour++;
+ pmap_qenter((vm_offset_t)pc, &m, 1);
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
+ for (field = 1; field < _NPCM; field++)
+ pc->pc_map[field] = pc_freemask[field];
+ pv = &pc->pc_pventry[0];
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(pv_entry_spare += _NPCPV - 1);
+ return (pv);
}
-
static void
pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
{
@@ -1486,24 +1924,16 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pmap == pv->pv_pmap && va == pv->pv_va)
- break;
- }
- } else {
- TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
- if (va == pv->pv_va)
- break;
- }
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ if (pmap == PV_PMAP(pv) && va == pv->pv_va)
+ break;
}
KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count--;
if (TAILQ_EMPTY(&m->md.pv_list))
vm_page_flag_clear(m, PG_WRITEABLE);
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
- free_pv_entry(pv);
+ free_pv_entry(pmap, pv);
}
/*
@@ -1515,24 +1945,39 @@
{
pv_entry_t pv;
- pv = get_pv_entry();
- if (pv == NULL)
- panic("no pv entries: increase vm.pmap.shpgperproc");
- pv->pv_va = va;
- pv->pv_pmap = pmap;
-
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
+ pv = get_pv_entry(pmap, FALSE);
+ pv->pv_va = va;
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count++;
}
/*
+ * Conditionally create a pv entry.
+ */
+static boolean_t
+pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+ pv_entry_t pv;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if (pv_entry_count < pv_entry_high_water &&
+ (pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ pv->pv_va = va;
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ m->md.pv_list_count++;
+ return (TRUE);
+ } else
+ return (FALSE);
+}
+
+/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
{
pt_entry_t oldpte;
vm_page_t m;
@@ -1550,30 +1995,25 @@
pmap_invalidate_page(kernel_pmap, va);
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(oldpte);
+ m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
if (oldpte & PG_M) {
-#if defined(PMAP_DIAGNOSTIC)
- if (pmap_nw_modified((pt_entry_t) oldpte)) {
- printf(
- "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
- va, oldpte);
- }
-#endif
- if (pmap_track_modified(va))
- vm_page_dirty(m);
+ KASSERT((oldpte & PG_RW),
+ ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx",
+ va, (uintmax_t)oldpte));
+ vm_page_dirty(m);
}
if (oldpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
pmap_remove_entry(pmap, m, va);
}
- return (pmap_unuse_pt(pmap, va));
+ return (pmap_unuse_pt(pmap, va, free));
}
/*
* Remove a single page from a process address space
*/
static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va)
+pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
{
pt_entry_t *pte;
@@ -1582,7 +2022,7 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
return;
- pmap_remove_pte(pmap, pte, va);
+ pmap_remove_pte(pmap, pte, va, free);
pmap_invalidate_page(pmap, va);
}
@@ -1598,6 +2038,7 @@
vm_offset_t pdnxt;
pd_entry_t ptpaddr;
pt_entry_t *pte;
+ vm_page_t free = NULL;
int anyvalid;
/*
@@ -1619,7 +2060,7 @@
*/
if ((sva + PAGE_SIZE == eva) &&
((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva);
+ pmap_remove_page(pmap, sva, &free);
goto out;
}
@@ -1665,17 +2106,24 @@
sva += PAGE_SIZE) {
if (*pte == 0)
continue;
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva))
+
+ /*
+ * The TLB entry for a PG_G mapping is invalidated
+ * by pmap_remove_pte().
+ */
+ if ((*pte & PG_G) == 0)
+ anyvalid = 1;
+ if (pmap_remove_pte(pmap, pte, sva, &free))
break;
}
}
out:
sched_unpin();
- vm_page_unlock_queues();
if (anyvalid)
pmap_invalidate_all(pmap);
+ vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
+ pmap_free_zero_pages(free);
}
/*
@@ -1694,8 +2142,10 @@
void
pmap_remove_all(vm_page_t m)
{
- register pv_entry_t pv;
+ pv_entry_t pv;
+ pmap_t pmap;
pt_entry_t *pte, tpte;
+ vm_page_t free;
#if defined(PMAP_DIAGNOSTIC)
/*
@@ -1709,12 +2159,13 @@
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
sched_pin();
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- PMAP_LOCK(pv->pv_pmap);
- pv->pv_pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pmap->pm_stats.resident_count--;
+ pte = pmap_pte_quick(pmap, pv->pv_va);
tpte = pte_load_clear(pte);
if (tpte & PG_W)
- pv->pv_pmap->pm_stats.wired_count--;
+ pmap->pm_stats.wired_count--;
if (tpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
@@ -1722,23 +2173,19 @@
* Update the vm_page_t clean and reference bits.
*/
if (tpte & PG_M) {
-#if defined(PMAP_DIAGNOSTIC)
- if (pmap_nw_modified((pt_entry_t) tpte)) {
- printf(
- "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
- pv->pv_va, tpte);
- }
-#endif
- if (pmap_track_modified(pv->pv_va))
- vm_page_dirty(m);
+ KASSERT((tpte & PG_RW),
+ ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx",
+ pv->pv_va, (uintmax_t)tpte));
+ vm_page_dirty(m);
}
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
- TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+ free = NULL;
+ pmap_unuse_pt(pmap, pv->pv_va, &free);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ pmap_free_zero_pages(free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count--;
- pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
- PMAP_UNLOCK(pv->pv_pmap);
- free_pv_entry(pv);
+ free_pv_entry(pmap, pv);
+ PMAP_UNLOCK(pmap);
}
vm_page_flag_clear(m, PG_WRITEABLE);
sched_unpin();
@@ -1761,8 +2208,14 @@
return;
}
+#ifdef PAE
+ if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
+ (VM_PROT_WRITE|VM_PROT_EXECUTE))
+ return;
+#else
if (prot & VM_PROT_WRITE)
return;
+#endif
anychanged = 0;
@@ -1770,7 +2223,8 @@
sched_pin();
PMAP_LOCK(pmap);
for (; sva < eva; sva = pdnxt) {
- unsigned obits, pbits, pdirindex;
+ pt_entry_t obits, pbits;
+ unsigned pdirindex;
pdnxt = (sva + NBPDR) & ~PDRMASK;
@@ -1788,7 +2242,12 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+ if ((prot & VM_PROT_WRITE) == 0)
+ pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ pmap->pm_pdir[pdirindex] |= pg_nx;
+#endif
anychanged = 1;
continue;
}
@@ -1806,28 +2265,39 @@
* size, PG_RW, PG_A, and PG_M are among the least
* significant 32 bits.
*/
- obits = pbits = *(u_int *)pte;
+ obits = pbits = *pte;
+ if ((pbits & PG_V) == 0)
+ continue;
if (pbits & PG_MANAGED) {
m = NULL;
if (pbits & PG_A) {
- m = PHYS_TO_VM_PAGE(*pte);
+ m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
vm_page_flag_set(m, PG_REFERENCED);
pbits &= ~PG_A;
}
- if ((pbits & PG_M) != 0 &&
- pmap_track_modified(sva)) {
+ if ((pbits & PG_M) != 0) {
if (m == NULL)
- m = PHYS_TO_VM_PAGE(*pte);
+ m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
vm_page_dirty(m);
}
}
- pbits &= ~(PG_RW | PG_M);
+ if ((prot & VM_PROT_WRITE) == 0)
+ pbits &= ~(PG_RW | PG_M);
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ pbits |= pg_nx;
+#endif
if (pbits != obits) {
+#ifdef PAE
+ if (!atomic_cmpset_64(pte, obits, pbits))
+ goto retry;
+#else
if (!atomic_cmpset_int((u_int *)pte, obits,
pbits))
goto retry;
+#endif
if (obits & PG_G)
pmap_invalidate_page(pmap, sva);
else
@@ -1836,9 +2306,9 @@
}
}
sched_unpin();
- vm_page_unlock_queues();
if (anychanged)
pmap_invalidate_all(pmap);
+ vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
}
@@ -1859,13 +2329,14 @@
boolean_t wired)
{
vm_paddr_t pa;
- register pt_entry_t *pte;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
vm_paddr_t opa;
pt_entry_t origpte, newpte;
vm_page_t mpte, om;
boolean_t invlva;
- va &= PG_FRAME;
+ va = trunc_page(va);
#ifdef PMAP_DIAGNOSTIC
if (va > VM_MAX_KERNEL_ADDRESS)
panic("pmap_enter: toobig");
@@ -1897,6 +2368,9 @@
}
#endif
+ pde = pmap_pde(pmap, va);
+ if ((*pde & PG_PS) != 0)
+ panic("pmap_enter: attempted pmap_enter on 4MB page");
pte = pmap_pte_quick(pmap, va);
/*
@@ -1912,16 +2386,6 @@
origpte = *pte;
opa = origpte & PG_FRAME;
- if (origpte & PG_PS) {
- /*
- * Yes, I know this will truncate upper address bits for PAE,
- * but I'm actually more interested in the lower bits
- */
- printf("pmap_enter: va %p, pte %p, origpte %p\n",
- (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
- panic("pmap_enter: attempted pmap_enter on 4MB page");
- }
-
/*
* Mapping has not changed, must be protection or wiring change.
*/
@@ -1977,6 +2441,8 @@
* Enter on the PV list if part of our managed memory.
*/
if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
+ KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
+ ("pmap_enter: managed mapping within the clean submap"));
pmap_insert_entry(pmap, va, m);
pa |= PG_MANAGED;
}
@@ -1992,8 +2458,14 @@
* Now validate mapping with desired protection/wiring.
*/
newpte = (pt_entry_t)(pa | PG_V);
- if ((prot & VM_PROT_WRITE) != 0)
+ if ((prot & VM_PROT_WRITE) != 0) {
newpte |= PG_RW;
+ vm_page_flag_set(m, PG_WRITEABLE);
+ }
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ newpte |= pg_nx;
+#endif
if (wired)
newpte |= PG_W;
if (va < VM_MAXUSER_ADDRESS)
@@ -2014,13 +2486,17 @@
vm_page_flag_set(om, PG_REFERENCED);
if (opa != VM_PAGE_TO_PHYS(m))
invlva = TRUE;
+#ifdef PAE
+ if ((origpte & PG_NX) == 0 &&
+ (newpte & PG_NX) != 0)
+ invlva = TRUE;
+#endif
}
if (origpte & PG_M) {
KASSERT((origpte & PG_RW),
- ("pmap_enter: modified page not writable:"
- " va: 0x%x, pte: 0x%x", va, origpte));
- if ((origpte & PG_MANAGED) &&
- pmap_track_modified(va))
+ ("pmap_enter: modified page not writable: va: %#x, pte: %#jx",
+ va, (uintmax_t)origpte));
+ if ((origpte & PG_MANAGED) != 0)
vm_page_dirty(om);
if ((prot & VM_PROT_WRITE) == 0)
invlva = TRUE;
@@ -2036,6 +2512,38 @@
}
/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start. This page is
+ * mapped at the given virtual address start. Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object. The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end. Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+void
+pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
+ vm_page_t m_start, vm_prot_t prot)
+{
+ vm_page_t m, mpte;
+ vm_pindex_t diff, psize;
+
+ VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
+ psize = atop(end - start);
+ mpte = NULL;
+ m = m_start;
+ PMAP_LOCK(pmap);
+ while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+ mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
+ prot, mpte);
+ m = TAILQ_NEXT(m, listq);
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* this code makes some *MAJOR* assumptions:
* 1. Current pmap & pmap exists.
* 2. Not wired.
@@ -2044,16 +2552,28 @@
* but is *MUCH* faster than pmap_enter...
*/
-vm_page_t
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- vm_page_t mpte)
+void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+{
+
+ PMAP_LOCK(pmap);
+ (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL);
+ PMAP_UNLOCK(pmap);
+}
+
+static vm_page_t
+pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_prot_t prot, vm_page_t mpte)
{
pt_entry_t *pte;
vm_paddr_t pa;
+ vm_page_t free;
+ KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
+ (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
+ ("pmap_enter_quick_locked: managed mapping within the clean submap"));
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
- PMAP_LOCK(pmap);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* In the case that a page table page is not
@@ -2070,7 +2590,6 @@
if (mpte && (mpte->pindex == ptepindex)) {
mpte->wire_count++;
} else {
-retry:
/*
* Get the page directory entry
*/
@@ -2083,23 +2602,13 @@
if (ptepa) {
if (ptepa & PG_PS)
panic("pmap_enter_quick: unexpected mapping into 4MB page");
- mpte = PHYS_TO_VM_PAGE(ptepa);
+ mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
mpte->wire_count++;
} else {
mpte = _pmap_allocpte(pmap, ptepindex,
M_NOWAIT);
- if (mpte == NULL) {
- PMAP_UNLOCK(pmap);
- vm_page_busy(m);
- vm_page_unlock_queues();
- VM_OBJECT_UNLOCK(m->object);
- VM_WAIT;
- VM_OBJECT_LOCK(m->object);
- vm_page_lock_queues();
- vm_page_wakeup(m);
- PMAP_LOCK(pmap);
- goto retry;
- }
+ if (mpte == NULL)
+ return (mpte);
}
}
} else {
@@ -2115,19 +2624,28 @@
pte = vtopte(va);
if (*pte) {
if (mpte != NULL) {
- pmap_unwire_pte_hold(pmap, mpte);
+ mpte->wire_count--;
mpte = NULL;
}
- goto out;
+ return (mpte);
}
/*
- * Enter on the PV list if part of our managed memory. Note that we
- * raise IPL while manipulating pv_table since pmap_enter can be
- * called at interrupt time.
+ * Enter on the PV list if part of our managed memory.
*/
- if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
- pmap_insert_entry(pmap, va, m);
+ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
+ !pmap_try_insert_pv_entry(pmap, va, m)) {
+ if (mpte != NULL) {
+ free = NULL;
+ if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
+ pmap_invalidate_page(pmap, va);
+ pmap_free_zero_pages(free);
+ }
+
+ mpte = NULL;
+ }
+ return (mpte);
+ }
/*
* Increment counters
@@ -2135,6 +2653,10 @@
pmap->pm_stats.resident_count++;
pa = VM_PAGE_TO_PHYS(m);
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ pa |= pg_nx;
+#endif
/*
* Now validate mapping with RO protection
@@ -2143,8 +2665,6 @@
pte_store(pte, pa | PG_V | PG_U);
else
pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-out:
- PMAP_UNLOCK(pmap);
return mpte;
}
@@ -2193,7 +2713,6 @@
retry:
p = vm_page_lookup(object, pindex);
if (p != NULL) {
- vm_page_lock_queues();
if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
goto retry;
} else {
@@ -2212,8 +2731,8 @@
p = vm_page_lookup(object, pindex);
vm_page_lock_queues();
vm_page_wakeup(p);
+ vm_page_unlock_queues();
}
- vm_page_unlock_queues();
ptepa = VM_PAGE_TO_PHYS(p);
if (ptepa & (NBPDR - 1))
@@ -2244,12 +2763,9 @@
* The mapping must already exist in the pmap.
*/
void
-pmap_change_wiring(pmap, va, wired)
- register pmap_t pmap;
- vm_offset_t va;
- boolean_t wired;
+pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
{
- register pt_entry_t *pte;
+ pt_entry_t *pte;
PMAP_LOCK(pmap);
pte = pmap_pte(pmap, va);
@@ -2282,10 +2798,10 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
+ vm_page_t free;
vm_offset_t addr;
vm_offset_t end_addr = src_addr + len;
vm_offset_t pdnxt;
- vm_page_t m;
if (dst_addr != src_addr)
return;
@@ -2311,15 +2827,6 @@
if (addr >= UPT_MIN_ADDRESS)
panic("pmap_copy: invalid to pmap_copy page tables");
- /*
- * Don't let optional prefaulting of pages make us go
- * way below the low water mark of free pages or way
- * above high water mark of used pv entries.
- */
- if (cnt.v_free_count < cnt.v_free_reserved ||
- pv_entry_count > pv_entry_high_water)
- break;
-
pdnxt = (addr + NBPDR) & ~PDRMASK;
ptepindex = addr >> PDRSHIFT;
@@ -2329,14 +2836,15 @@
if (srcptepaddr & PG_PS) {
if (dst_pmap->pm_pdir[ptepindex] == 0) {
- dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
+ dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
+ ~PG_W;
dst_pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
}
continue;
}
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+ srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
if (srcmpte->wire_count == 0)
panic("pmap_copy: source page table page is unused");
@@ -2351,28 +2859,31 @@
* we only virtual copy managed pages
*/
if ((ptetemp & PG_MANAGED) != 0) {
- /*
- * We have to check after allocpte for the
- * pte still being around... allocpte can
- * block.
- */
dstmpte = pmap_allocpte(dst_pmap, addr,
M_NOWAIT);
if (dstmpte == NULL)
break;
dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0) {
+ if (*dst_pte == 0 &&
+ pmap_try_insert_pv_entry(dst_pmap, addr,
+ PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
/*
- * Clear the modified and
+ * Clear the wired, modified, and
* accessed (referenced) bits
* during the copy.
*/
- m = PHYS_TO_VM_PAGE(ptetemp);
- *dst_pte = ptetemp & ~(PG_M | PG_A);
+ *dst_pte = ptetemp & ~(PG_W | PG_M |
+ PG_A);
dst_pmap->pm_stats.resident_count++;
- pmap_insert_entry(dst_pmap, addr, m);
- } else
- pmap_unwire_pte_hold(dst_pmap, dstmpte);
+ } else {
+ free = NULL;
+ if (pmap_unwire_pte_hold( dst_pmap,
+ dstmpte, &free)) {
+ pmap_invalidate_page(dst_pmap,
+ addr);
+ pmap_free_zero_pages(free);
+ }
+ }
if (dstmpte->wire_count >= srcmpte->wire_count)
break;
}
@@ -2508,9 +3019,7 @@
* subset of pmaps for proper page aging.
*/
boolean_t
-pmap_page_exists_quick(pmap, m)
- pmap_t pmap;
- vm_page_t m;
+pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
pv_entry_t pv;
int loops = 0;
@@ -2520,7 +3029,7 @@
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pv->pv_pmap == pmap) {
+ if (PV_PMAP(pv) == pmap) {
return TRUE;
}
loops++;
@@ -2530,7 +3039,6 @@
return (FALSE);
}
-#define PMAP_REMOVE_PAGES_CURPROC_ONLY
/*
* Remove all pages from specified address space
* this aids process exit speeds. Also, this code
@@ -2540,85 +3048,103 @@
* in the case of running down an entire address space.
*/
void
-pmap_remove_pages(pmap, sva, eva)
- pmap_t pmap;
- vm_offset_t sva, eva;
+pmap_remove_pages(pmap_t pmap)
{
pt_entry_t *pte, tpte;
- vm_page_t m;
- pv_entry_t pv, npv;
+ vm_page_t m, free = NULL;
+ pv_entry_t pv;
+ struct pv_chunk *pc, *npc;
+ int field, idx;
+ int32_t bit;
+ uint32_t inuse, bitmask;
+ int allfree;
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
-#endif
vm_page_lock_queues();
PMAP_LOCK(pmap);
sched_pin();
- for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
-
- if (pv->pv_va >= eva || pv->pv_va < sva) {
- npv = TAILQ_NEXT(pv, pv_plist);
- continue;
- }
-
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
- pte = vtopte(pv->pv_va);
-#else
- pte = pmap_pte_quick(pmap, pv->pv_va);
-#endif
- tpte = *pte;
-
- if (tpte == 0) {
- printf("TPTE at %p IS ZERO @ VA %08x\n",
- pte, pv->pv_va);
- panic("bad pte");
- }
+ TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
+ allfree = 1;
+ for (field = 0; field < _NPCM; field++) {
+ inuse = (~(pc->pc_map[field])) & pc_freemask[field];
+ while (inuse != 0) {
+ bit = bsfl(inuse);
+ bitmask = 1UL << bit;
+ idx = field * 32 + bit;
+ pv = &pc->pc_pventry[idx];
+ inuse &= ~bitmask;
+
+ pte = vtopte(pv->pv_va);
+ tpte = *pte;
+
+ if (tpte == 0) {
+ printf(
+ "TPTE at %p IS ZERO @ VA %08x\n",
+ pte, pv->pv_va);
+ panic("bad pte");
+ }
/*
* We cannot remove wired pages from a process' mapping at this time
*/
- if (tpte & PG_W) {
- npv = TAILQ_NEXT(pv, pv_plist);
- continue;
- }
-
- m = PHYS_TO_VM_PAGE(tpte);
- KASSERT(m->phys_addr == (tpte & PG_FRAME),
- ("vm_page_t %p phys_addr mismatch %016jx %016jx",
- m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
-
- KASSERT(m < &vm_page_array[vm_page_array_size],
- ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
+ if (tpte & PG_W) {
+ allfree = 0;
+ continue;
+ }
- pmap->pm_stats.resident_count--;
+ m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+ KASSERT(m->phys_addr == (tpte & PG_FRAME),
+ ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+ m, (uintmax_t)m->phys_addr,
+ (uintmax_t)tpte));
+
+ KASSERT(m < &vm_page_array[vm_page_array_size],
+ ("pmap_remove_pages: bad tpte %#jx",
+ (uintmax_t)tpte));
- pte_clear(pte);
+ pmap->pm_stats.resident_count--;
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if (tpte & PG_M) {
- vm_page_dirty(m);
- }
+ pte_clear(pte);
- npv = TAILQ_NEXT(pv, pv_plist);
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+ /*
+ * Update the vm_page_t clean/reference bits.
+ */
+ if (tpte & PG_M)
+ vm_page_dirty(m);
- m->md.pv_list_count--;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_flag_clear(m, PG_WRITEABLE);
+ /* Mark free */
+ PV_STAT(pv_entry_frees++);
+ PV_STAT(pv_entry_spare++);
+ pv_entry_count--;
+ pc->pc_map[field] |= bitmask;
+ m->md.pv_list_count--;
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
- pmap_unuse_pt(pmap, pv->pv_va);
- free_pv_entry(pv);
+ pmap_unuse_pt(pmap, pv->pv_va, &free);
+ }
+ }
+ if (allfree) {
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
+ pmap_qremove((vm_offset_t)pc, 1);
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+ }
}
sched_unpin();
pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
vm_page_unlock_queues();
+ PMAP_UNLOCK(pmap);
+ pmap_free_zero_pages(free);
}
/*
@@ -2632,6 +3158,7 @@
{
pv_entry_t pv;
pt_entry_t *pte;
+ pmap_t pmap;
boolean_t rv;
rv = FALSE;
@@ -2641,17 +3168,11 @@
sched_pin();
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- /*
- * if the bit being tested is the modified bit, then
- * mark clean_map and ptes as never
- * modified.
- */
- if (!pmap_track_modified(pv->pv_va))
- continue;
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
rv = (*pte & PG_M) != 0;
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
if (rv)
break;
}
@@ -2682,82 +3203,46 @@
}
/*
- * Clear the given bit in each of the given page's ptes. The bit is
- * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in
- * size, only a bit within the least significant 32 can be cleared.
+ * Clear the write and modified bits in each of the given page's mappings.
*/
-static __inline void
-pmap_clear_ptes(vm_page_t m, int bit)
+void
+pmap_remove_write(vm_page_t m)
{
- register pv_entry_t pv;
- pt_entry_t pbits, *pte;
+ pv_entry_t pv;
+ pmap_t pmap;
+ pt_entry_t oldpte, *pte;
- if ((m->flags & PG_FICTITIOUS) ||
- (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if ((m->flags & PG_FICTITIOUS) != 0 ||
+ (m->flags & PG_WRITEABLE) == 0)
return;
-
sched_pin();
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- /*
- * Loop over all current mappings setting/clearing as appropos If
- * setting RO do we need to clear the VAC?
- */
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- /*
- * don't write protect pager mappings
- */
- if (bit == PG_RW) {
- if (!pmap_track_modified(pv->pv_va))
- continue;
- }
-
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
retry:
- pbits = *pte;
- if (pbits & bit) {
- if (bit == PG_RW) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
- if (!atomic_cmpset_int((u_int *)pte, pbits,
- pbits & ~(PG_RW | PG_M)))
- goto retry;
- if (pbits & PG_M) {
- vm_page_dirty(m);
- }
- } else {
- atomic_clear_int((u_int *)pte, bit);
- }
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ oldpte = *pte;
+ if ((oldpte & PG_RW) != 0) {
+ /*
+ * Regardless of whether a pte is 32 or 64 bits
+ * in size, PG_RW and PG_M are among the least
+ * significant 32 bits.
+ */
+ if (!atomic_cmpset_int((u_int *)pte, oldpte,
+ oldpte & ~(PG_RW | PG_M)))
+ goto retry;
+ if ((oldpte & PG_M) != 0)
+ vm_page_dirty(m);
+ pmap_invalidate_page(pmap, pv->pv_va);
}
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
- if (bit == PG_RW)
- vm_page_flag_clear(m, PG_WRITEABLE);
+ vm_page_flag_clear(m, PG_WRITEABLE);
sched_unpin();
}
/*
- * pmap_page_protect:
- *
- * Lower the permission for all mappings to a given page.
- */
-void
-pmap_page_protect(vm_page_t m, vm_prot_t prot)
-{
- if ((prot & VM_PROT_WRITE) == 0) {
- if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
- pmap_clear_ptes(m, PG_RW);
- } else {
- pmap_remove_all(m);
- }
- }
-}
-
-/*
* pmap_ts_referenced:
*
* Return a count of reference bits for a page, clearing those bits.
@@ -2772,48 +3257,35 @@
int
pmap_ts_referenced(vm_page_t m)
{
- register pv_entry_t pv, pvf, pvn;
+ pv_entry_t pv, pvf, pvn;
+ pmap_t pmap;
pt_entry_t *pte;
- pt_entry_t v;
int rtval = 0;
if (m->flags & PG_FICTITIOUS)
return (rtval);
-
sched_pin();
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-
pvf = pv;
-
do {
pvn = TAILQ_NEXT(pv, pv_list);
-
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
-
- if (!pmap_track_modified(pv->pv_va))
- continue;
-
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-
- if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
+ if ((*pte & PG_A) != 0) {
atomic_clear_int((u_int *)pte, PG_A);
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
-
+ pmap_invalidate_page(pmap, pv->pv_va);
rtval++;
- if (rtval > 4) {
- PMAP_UNLOCK(pv->pv_pmap);
- break;
- }
+ if (rtval > 4)
+ pvn = NULL;
}
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
} while ((pv = pvn) != NULL && pv != pvf);
}
sched_unpin();
-
return (rtval);
}
@@ -2823,7 +3295,30 @@
void
pmap_clear_modify(vm_page_t m)
{
- pmap_clear_ptes(m, PG_M);
+ pv_entry_t pv;
+ pmap_t pmap;
+ pt_entry_t *pte;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if ((m->flags & PG_FICTITIOUS) != 0)
+ return;
+ sched_pin();
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
+ if ((*pte & PG_M) != 0) {
+ /*
+ * Regardless of whether a pte is 32 or 64 bits
+ * in size, PG_M is among the least significant
+ * 32 bits.
+ */
+ atomic_clear_int((u_int *)pte, PG_M);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ sched_unpin();
}
/*
@@ -2834,7 +3329,30 @@
void
pmap_clear_reference(vm_page_t m)
{
- pmap_clear_ptes(m, PG_A);
+ pv_entry_t pv;
+ pmap_t pmap;
+ pt_entry_t *pte;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if ((m->flags & PG_FICTITIOUS) != 0)
+ return;
+ sched_pin();
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
+ if ((*pte & PG_A) != 0) {
+ /*
+ * Regardless of whether a pte is 32 or 64 bits
+ * in size, PG_A is among the least significant
+ * 32 bits.
+ */
+ atomic_clear_int((u_int *)pte, PG_A);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ sched_unpin();
}
/*
@@ -2848,9 +3366,7 @@
* NOT real memory.
*/
void *
-pmap_mapdev(pa, size)
- vm_paddr_t pa;
- vm_size_t size;
+pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
{
vm_offset_t va, tmpva, offset;
@@ -2866,25 +3382,38 @@
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
for (tmpva = va; size > 0; ) {
- pmap_kenter(tmpva, pa);
+ pmap_kenter_attr(tmpva, pa, mode);
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, va, tmpva);
+ pmap_invalidate_cache();
return ((void *)(va + offset));
}
+void *
+pmap_mapdev(vm_paddr_t pa, vm_size_t size)
+{
+
+ return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
+}
+
+void *
+pmap_mapbios(vm_paddr_t pa, vm_size_t size)
+{
+
+ return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
+}
+
void
-pmap_unmapdev(va, size)
- vm_offset_t va;
- vm_size_t size;
+pmap_unmapdev(vm_offset_t va, vm_size_t size)
{
vm_offset_t base, offset, tmpva;
if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
return;
- base = va & PG_FRAME;
+ base = trunc_page(va);
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
@@ -2893,13 +3422,72 @@
kmem_free(kernel_map, base, size);
}
+int
+pmap_change_attr(va, size, mode)
+ vm_offset_t va;
+ vm_size_t size;
+ int mode;
+{
+ vm_offset_t base, offset, tmpva;
+ pt_entry_t *pte;
+ u_int opte, npte;
+ pd_entry_t *pde;
+
+ base = trunc_page(va);
+ offset = va & PAGE_MASK;
+ size = roundup(offset + size, PAGE_SIZE);
+
+ /* Only supported on kernel virtual addresses. */
+ if (base <= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+
+ /* 4MB pages and pages that aren't mapped aren't supported. */
+ for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+ pde = pmap_pde(kernel_pmap, tmpva);
+ if (*pde & PG_PS)
+ return (EINVAL);
+ if (*pde == 0)
+ return (EINVAL);
+ pte = vtopte(va);
+ if (*pte == 0)
+ return (EINVAL);
+ }
+
+ /*
+ * Ok, all the pages exist and are 4k, so run through them updating
+ * their cache mode.
+ */
+ for (tmpva = base; size > 0; ) {
+ pte = vtopte(tmpva);
+
+ /*
+ * The cache mode bits are all in the low 32-bits of the
+ * PTE, so we can just spin on updating the low 32-bits.
+ */
+ do {
+ opte = *(u_int *)pte;
+ npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
+ npte |= pmap_cache_bits(mode, 0);
+ } while (npte != opte &&
+ !atomic_cmpset_int((u_int *)pte, opte, npte));
+ tmpva += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ /*
+ * Flush CPU caches to make sure any data isn't cached that shouldn't
+ * be, etc.
+ */
+ pmap_invalidate_range(kernel_pmap, base, tmpva);
+ pmap_invalidate_cache();
+ return (0);
+}
+
/*
* perform the pmap work for mincore
*/
int
-pmap_mincore(pmap, addr)
- pmap_t pmap;
- vm_offset_t addr;
+pmap_mincore(pmap_t pmap, vm_offset_t addr)
{
pt_entry_t *ptep, pte;
vm_page_t m;
@@ -2960,7 +3548,6 @@
void
pmap_activate(struct thread *td)
{
- struct proc *p = td->td_proc;
pmap_t pmap, oldpmap;
u_int32_t cr3;
@@ -2979,18 +3566,10 @@
#else
cr3 = vtophys(pmap->pm_pdir);
#endif
- /* XXXKSE this is wrong.
+ /*
* pmap_activate is for the current thread on the current cpu
*/
- if (p->p_flag & P_SA) {
- /* Make sure all other cr3 entries are updated. */
- /* what if they are running? XXXKSE (maybe abort them) */
- FOREACH_THREAD_IN_PROC(p, td) {
- td->td_pcb->pcb_cr3 = cr3;
- }
- } else {
- td->td_pcb->pcb_cr3 = cr3;
- }
+ td->td_pcb->pcb_cr3 = cr3;
load_cr3(cr3);
PCPU_SET(curpmap, pmap);
critical_exit();
@@ -3018,7 +3597,7 @@
int index;
sx_slock(&allproc_lock);
- LIST_FOREACH(p, &allproc, p_list) {
+ FOREACH_PROC_IN_SYSTEM(p) {
if (p->p_pid != pid)
continue;
@@ -3048,7 +3627,7 @@
pt_entry_t pa;
vm_page_t m;
pa = *pte;
- m = PHYS_TO_VM_PAGE(pa);
+ m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
va, pa, m->hold_count, m->wire_count, m->flags);
npte++;
@@ -3077,8 +3656,7 @@
/* print address space of pmap*/
static void
-pads(pm)
- pmap_t pm;
+pads(pmap_t pm)
{
int i, j;
vm_paddr_t va;
@@ -3102,17 +3680,18 @@
}
void
-pmap_pvdump(pa)
- vm_paddr_t pa;
+pmap_pvdump(vm_paddr_t pa)
{
pv_entry_t pv;
+ pmap_t pmap;
vm_page_t m;
printf("pa %x", pa);
m = PHYS_TO_VM_PAGE(pa);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
- pads(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
+ pads(pmap);
}
printf(" ");
}
Index: io.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/io.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/io.c -L sys/i386/i386/io.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/io.c
+++ sys/i386/i386/io.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/io.c,v 1.1 2004/08/01 11:40:52 markm Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/io.c,v 1.2 2006/11/06 13:41:59 rwatson Exp $");
#include <sys/param.h>
#include <sys/conf.h>
@@ -33,6 +33,7 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
@@ -54,7 +55,7 @@
{
int error;
- error = suser(td);
+ error = priv_check(td, PRIV_IO);
if (error != 0)
return (error);
error = securelevel_gt(td->td_ucred, 0);
Index: apic_vector.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/apic_vector.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/apic_vector.s -L sys/i386/i386/apic_vector.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/apic_vector.s
+++ sys/i386/i386/apic_vector.s
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* from: vector.s, 386BSD 0.1 unknown origin
- * $FreeBSD: src/sys/i386/i386/apic_vector.s,v 1.103.2.1 2005/10/04 15:15:21 jhb Exp $
+ * $FreeBSD: src/sys/i386/i386/apic_vector.s,v 1.113 2006/12/17 05:07:00 kmacy Exp $
*/
/*
@@ -36,31 +36,14 @@
* as well as IPI handlers.
*/
+#include "opt_smp.h"
+
#include <machine/asmacros.h>
#include <machine/apicreg.h>
-#include <machine/smptests.h>
#include "assym.s"
/*
- * Macros to create and destroy a trap frame.
- */
-#define PUSH_FRAME \
- pushl $0 ; /* dummy error code */ \
- pushl $0 ; /* dummy trap type */ \
- pushal ; /* 8 ints */ \
- pushl %ds ; /* save data and extra segments ... */ \
- pushl %es ; \
- pushl %fs
-
-#define POP_FRAME \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp
-
-/*
* I/O Interrupt Entry Point. Rather than having one entry point for
* each interrupt source, we use one entry point for each 32-bit word
* in the ISR. The handler determines the highest bit set in the ISR,
@@ -72,11 +55,7 @@
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
PUSH_FRAME ; \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- movl %eax, %ds ; \
- movl %eax, %es ; \
- movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
- movl %eax, %fs ; \
+ SET_KERNEL_SREGS ; \
FAKE_MCOUNT(TF_EIP(%esp)) ; \
movl lapic, %edx ; /* pointer to local APIC */ \
movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \
@@ -84,9 +63,10 @@
jz 2f ; \
addl $(32 * index),%eax ; \
1: ; \
+ pushl %esp ; \
pushl %eax ; /* pass the IRQ */ \
call lapic_handle_intr ; \
- addl $4, %esp ; /* discard parameter */ \
+ addl $8, %esp ; /* discard parameter */ \
MEXITCOUNT ; \
jmp doreti ; \
2: movl $-1, %eax ; /* send a vector of -1 */ \
@@ -122,20 +102,11 @@
SUPERALIGN_TEXT
IDTVEC(timerint)
PUSH_FRAME
- movl $KDSEL, %eax /* reload with kernel's data segment */
- movl %eax, %ds
- movl %eax, %es
- movl $KPSEL, %eax
- movl %eax, %fs
-
- movl lapic, %edx
- movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
-
+ SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
-
- pushl $0 /* XXX convert trapframe to clockframe */
+ pushl %esp
call lapic_handle_timer
- addl $4, %esp /* XXX convert clockframe to trapframe */
+ add $4, %esp
MEXITCOUNT
jmp doreti
@@ -264,97 +235,71 @@
iret
/*
- * Forward hardclock to another CPU. Pushes a clockframe and calls
- * forwarded_hardclock().
+ * Invalidate cache.
*/
.text
SUPERALIGN_TEXT
-IDTVEC(ipi_intr_bitmap_handler)
-
- PUSH_FRAME
- movl $KDSEL, %eax /* reload with kernel's data segment */
+IDTVEC(invlcache)
+ pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
movl %eax, %ds
- movl %eax, %es
- movl $KPSEL, %eax
+
+#ifdef COUNT_IPIS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
movl %eax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ movl ipi_invlcache_counts(,%eax,4),%eax
+ incl (%eax)
+#endif
+
+ wbinvd
+
+ movl lapic, %eax
+ movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Handler for IPIs sent via the per-cpu IPI bitmap.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(ipi_intr_bitmap_handler)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
movl lapic, %edx
movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
FAKE_MCOUNT(TF_EIP(%esp))
- pushl $0 /* XXX convert trapframe to clockframe */
call ipi_bitmap_handler
- addl $4, %esp /* XXX convert clockframe to trapframe */
MEXITCOUNT
jmp doreti
/*
- * Executed by a CPU when it receives an Xcpustop IPI from another CPU,
- *
- * - Signals its receipt.
- * - Waits for permission to restart.
- * - Signals its restart.
+ * Executed by a CPU when it receives an IPI_STOP from another CPU.
*/
.text
SUPERALIGN_TEXT
IDTVEC(cpustop)
- pushl %ebp
- movl %esp, %ebp
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl %ds /* save current data segment */
- pushl %es
- pushl %fs
-
- movl $KDSEL, %eax
- movl %eax, %ds /* use KERNEL data segment */
- movl %eax, %es
- movl $KPSEL, %eax
- movl %eax, %fs
+ PUSH_FRAME
+ SET_KERNEL_SREGS
movl lapic, %eax
movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
- movl PCPU(CPUID), %eax
- imull $PCB_SIZE, %eax
- leal CNAME(stoppcbs)(%eax), %eax
- pushl %eax
- call CNAME(savectx) /* Save process context */
- addl $4, %esp
-
- movl PCPU(CPUID), %eax
-
- lock
- btsl %eax, CNAME(stopped_cpus) /* stopped_cpus |= (1<<id) */
-1:
- btl %eax, CNAME(started_cpus) /* while (!(started_cpus & (1<<id))) */
- jnc 1b
-
- lock
- btrl %eax, CNAME(started_cpus) /* started_cpus &= ~(1<<id) */
- lock
- btrl %eax, CNAME(stopped_cpus) /* stopped_cpus &= ~(1<<id) */
+ call cpustop_handler
- test %eax, %eax
- jnz 2f
-
- movl CNAME(cpustop_restartfunc), %eax
- test %eax, %eax
- jz 2f
- movl $0, CNAME(cpustop_restartfunc) /* One-shot */
-
- call *%eax
-2:
- popl %fs
- popl %es
- popl %ds /* restore previous data segment */
- popl %edx
- popl %ecx
- popl %eax
- movl %ebp, %esp
- popl %ebp
+ POP_FRAME
iret
/*
@@ -366,11 +311,7 @@
SUPERALIGN_TEXT
IDTVEC(rendezvous)
PUSH_FRAME
- movl $KDSEL, %eax
- movl %eax, %ds /* use KERNEL data segment */
- movl %eax, %es
- movl $KPSEL, %eax
- movl %eax, %fs
+ SET_KERNEL_SREGS
#ifdef COUNT_IPIS
movl PCPU(CPUID), %eax
@@ -391,20 +332,11 @@
SUPERALIGN_TEXT
IDTVEC(lazypmap)
PUSH_FRAME
- movl $KDSEL, %eax
- movl %eax, %ds /* use KERNEL data segment */
- movl %eax, %es
- movl $KPSEL, %eax
- movl %eax, %fs
+ SET_KERNEL_SREGS
-#ifdef COUNT_IPIS
- movl PCPU(CPUID), %eax
- movl ipi_lazypmap_counts(,%eax,4), %eax
- incl (%eax)
-#endif
call pmap_lazyfix_action
- movl lapic, %eax
+ movl lapic, %eax
movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
POP_FRAME
iret
Index: nexus.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/nexus.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/nexus.c -L sys/i386/i386/nexus.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/nexus.c
+++ sys/i386/i386/nexus.c
@@ -12,7 +12,7 @@
* no representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/nexus.c,v 1.62 2005/05/10 12:02:15 nyan Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/nexus.c,v 1.73 2007/05/08 21:29:14 jhb Exp $");
/*
* This code implements a `root nexus' for Intel Architecture
@@ -41,6 +41,7 @@
* and I/O memory address space.
*/
+#include "opt_apic.h"
#include "opt_isa.h"
#include <sys/param.h>
@@ -61,6 +62,10 @@
#include <machine/resource.h>
+#ifdef DEV_APIC
+#include "pcib_if.h"
+#endif
+
#ifdef DEV_ISA
#include <isa/isavar.h>
#ifdef PC98
@@ -97,13 +102,21 @@
static int nexus_release_resource(device_t, device_t, int, int,
struct resource *);
static int nexus_setup_intr(device_t, device_t, struct resource *, int flags,
- void (*)(void *), void *, void **);
+ driver_filter_t filter, void (*)(void *), void *,
+ void **);
static int nexus_teardown_intr(device_t, device_t, struct resource *,
void *);
static struct resource_list *nexus_get_reslist(device_t dev, device_t child);
static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long);
static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *);
static void nexus_delete_resource(device_t, device_t, int, int);
+#ifdef DEV_APIC
+static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
+static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs);
+static int nexus_alloc_msix(device_t pcib, device_t dev, int *irq);
+static int nexus_release_msix(device_t pcib, device_t dev, int irq);
+static int nexus_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data);
+#endif
static device_method_t nexus_methods[] = {
/* Device interface */
@@ -129,6 +142,15 @@
DEVMETHOD(bus_get_resource, nexus_get_resource),
DEVMETHOD(bus_delete_resource, nexus_delete_resource),
+ /* pcib interface */
+#ifdef DEV_APIC
+ DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi),
+ DEVMETHOD(pcib_release_msi, nexus_release_msi),
+ DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix),
+ DEVMETHOD(pcib_release_msix, nexus_release_msix),
+ DEVMETHOD(pcib_map_msi, nexus_map_msi),
+#endif
+
{ 0, 0 }
};
@@ -144,11 +166,11 @@
static int
nexus_probe(device_t dev)
{
- int irq, last;
+ int irq;
device_quiet(dev); /* suppress attach message for neatness */
- /*
+ /*
* XXX working notes:
*
* - IRQ resource creation should be moved to the PIC/APIC driver.
@@ -177,18 +199,10 @@
* We search for regions of existing IRQs and add those to the IRQ
* resource manager.
*/
- last = -1;
for (irq = 0; irq < NUM_IO_INTS; irq++)
- if (intr_lookup_source(irq) != NULL) {
- if (last == -1)
- last = irq;
- } else if (last != -1) {
- if (rman_manage_region(&irq_rman, last, irq - 1) != 0)
+ if (intr_lookup_source(irq) != NULL)
+ if (rman_manage_region(&irq_rman, irq, irq) != 0)
panic("nexus_probe irq_rman add");
- last = -1;
- }
- if (last != -1 && rman_manage_region(&irq_rman, last, irq - 1) != 0)
- panic("nexus_probe irq_rman add");
/*
* ISA DMA on PCI systems is implemented in the ISA part of each
@@ -251,7 +265,7 @@
if (STAILQ_FIRST(rl))
retval += printf(" at");
-
+
retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
@@ -284,7 +298,7 @@
return(0);
resource_list_init(&ndev->nx_resources);
- child = device_add_child_ordered(bus, order, name, unit);
+ child = device_add_child_ordered(bus, order, name, unit);
/* should we free this in nexus_child_detached? */
device_set_ivars(child, ndev);
@@ -306,9 +320,6 @@
struct resource_list_entry *rle;
struct rman *rm;
int needactivate = flags & RF_ACTIVE;
-#ifdef PC98
- bus_space_handle_t bh;
-#endif
/*
* If this is an allocation of the "default" range for a given RID, and
@@ -352,40 +363,15 @@
rv = rman_reserve_resource(rm, start, end, count, flags, child);
if (rv == 0)
return 0;
-
- if (type == SYS_RES_MEMORY) {
- rman_set_bustag(rv, I386_BUS_SPACE_MEM);
- } else if (type == SYS_RES_IOPORT) {
- rman_set_bustag(rv, I386_BUS_SPACE_IO);
-#ifndef PC98
- rman_set_bushandle(rv, rman_get_start(rv));
-#endif
- }
-
-#ifdef PC98
- if ((type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) &&
- i386_bus_space_handle_alloc(rman_get_bustag(rv),
- rman_get_start(rv), count, &bh) != 0) {
- rman_release_resource(rv);
- return 0;
- }
- rman_set_bushandle(rv, bh);
-#endif
+ rman_set_rid(rv, *rid);
if (needactivate) {
if (bus_activate_resource(child, type, *rid, rv)) {
-#ifdef PC98
- if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
- bh = rman_get_bushandle(rv);
- i386_bus_space_handle_free(rman_get_bustag(rv),
- bh, bh->bsh_sz);
- }
-#endif
rman_release_resource(rv);
return 0;
}
}
-
+
return rv;
}
@@ -395,34 +381,40 @@
{
#ifdef PC98
bus_space_handle_t bh;
+ int error;
#endif
+ void *vaddr;
+
/*
* If this is a memory resource, map it into the kernel.
*/
- if (rman_get_bustag(r) == I386_BUS_SPACE_MEM) {
- caddr_t vaddr = 0;
-
- if (rman_get_end(r) < 1024 * 1024) {
- /*
- * The first 1Mb is mapped at KERNBASE.
- */
- vaddr = (caddr_t)(uintptr_t)(KERNBASE + rman_get_start(r));
- } else {
- u_int32_t paddr;
- u_int32_t psize;
- u_int32_t poffs;
-
- paddr = rman_get_start(r);
- psize = rman_get_size(r);
-
- poffs = paddr - trunc_page(paddr);
- vaddr = (caddr_t) pmap_mapdev(paddr-poffs, psize+poffs) + poffs;
- }
+ switch (type) {
+ case SYS_RES_IOPORT:
+#ifdef PC98
+ error = i386_bus_space_handle_alloc(I386_BUS_SPACE_IO,
+ rman_get_start(r), rman_get_size(r), &bh);
+ if (error)
+ return (error);
+ rman_set_bushandle(r, bh);
+#else
+ rman_set_bushandle(r, rman_get_start(r));
+#endif
+ rman_set_bustag(r, I386_BUS_SPACE_IO);
+ break;
+ case SYS_RES_MEMORY:
+#ifdef PC98
+ error = i386_bus_space_handle_alloc(I386_BUS_SPACE_MEM,
+ rman_get_start(r), rman_get_size(r), &bh);
+ if (error)
+ return (error);
+#endif
+ vaddr = pmap_mapdev(rman_get_start(r), rman_get_size(r));
rman_set_virtual(r, vaddr);
+ rman_set_bustag(r, I386_BUS_SPACE_MEM);
#ifdef PC98
/* PC-98: the type of bus_space_handle_t is the structure. */
- bh = rman_get_bushandle(r);
bh->bsh_base = (bus_addr_t) vaddr;
+ rman_set_bushandle(r, bh);
#else
/* IBM-PC: the type of bus_space_handle_t is u_int */
rman_set_bushandle(r, (bus_space_handle_t) vaddr);
@@ -435,17 +427,22 @@
nexus_deactivate_resource(device_t bus, device_t child, int type, int rid,
struct resource *r)
{
+
/*
* If this is a memory resource, unmap it.
*/
- if ((rman_get_bustag(r) == I386_BUS_SPACE_MEM) &&
- (rman_get_end(r) >= 1024 * 1024)) {
- u_int32_t psize;
+ if (type == SYS_RES_MEMORY) {
+ pmap_unmapdev((vm_offset_t)rman_get_virtual(r),
+ rman_get_size(r));
+ }
+#ifdef PC98
+ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
+ bus_space_handle_t bh;
- psize = rman_get_size(r);
- pmap_unmapdev((vm_offset_t)rman_get_virtual(r), psize);
+ bh = rman_get_bushandle(r);
+ i386_bus_space_handle_free(rman_get_bustag(r), bh, bh->bsh_sz);
}
-
+#endif
return (rman_deactivate_resource(r));
}
@@ -458,14 +455,6 @@
if (error)
return error;
}
-#ifdef PC98
- if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
- bus_space_handle_t bh;
-
- bh = rman_get_bushandle(r);
- i386_bus_space_handle_free(rman_get_bustag(r), bh, bh->bsh_sz);
- }
-#endif
return (rman_release_resource(r));
}
@@ -477,7 +466,8 @@
*/
static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
- int flags, void (*ihand)(void *), void *arg, void **cookiep)
+ int flags, driver_filter_t filter, void (*ihand)(void *),
+ void *arg, void **cookiep)
{
int error;
@@ -497,7 +487,7 @@
return (error);
error = intr_add_handler(device_get_nameunit(child),
- rman_get_start(irq), ihand, arg, flags, cookiep);
+ rman_get_start(irq), filter, ihand, arg, flags, cookiep);
return (error);
}
@@ -560,9 +550,133 @@
resource_list_delete(rl, type, rid);
}
+/* Called from the MSI code to add new IRQs to the IRQ rman. */
+void
+nexus_add_irq(u_long irq)
+{
+
+ if (rman_manage_region(&irq_rman, irq, irq) != 0)
+ panic("%s: failed", __func__);
+}
+
+#ifdef DEV_APIC
+static int
+nexus_alloc_msix(device_t pcib, device_t dev, int *irq)
+{
+
+ return (msix_alloc(dev, irq));
+}
+
+static int
+nexus_release_msix(device_t pcib, device_t dev, int irq)
+{
+
+ return (msix_release(irq));
+}
+
+static int
+nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
+{
+
+ return (msi_alloc(dev, count, maxcount, irqs));
+}
+
+static int
+nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs)
+{
+
+ return (msi_release(irqs, count));
+}
+
+static int
+nexus_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data)
+{
+
+ return (msi_map(irq, addr, data));
+}
+#endif
+
+/* Placeholder for system RAM. */
+static void
+ram_identify(driver_t *driver, device_t parent)
+{
+
+ if (resource_disabled("ram", 0))
+ return;
+ if (BUS_ADD_CHILD(parent, 0, "ram", 0) == NULL)
+ panic("ram_identify");
+}
+
+static int
+ram_probe(device_t dev)
+{
+
+ device_quiet(dev);
+ device_set_desc(dev, "System RAM");
+ return (0);
+}
+
+static int
+ram_attach(device_t dev)
+{
+ struct resource *res;
+ vm_paddr_t *p;
+ int error, i, rid;
+
+ /*
+ * We use the dump_avail[] array rather than phys_avail[] for
+ * the memory map as phys_avail[] contains holes for kernel
+ * memory, page 0, the message buffer, and the dcons buffer.
+ * We test the end address in the loop instead of the start
+ * since the start address for the first segment is 0.
+ *
+ * XXX: It would be preferable to use the SMAP if it exists
+ * instead since if the SMAP is very fragmented we may not
+ * include some memory regions in dump_avail[] and phys_avail[].
+ */
+ for (i = 0, p = dump_avail; p[1] != 0; i++, p += 2) {
+ rid = i;
+#ifdef PAE
+ /*
+ * Resources use long's to track resources, so we can't
+ * include memory regions above 4GB.
+ */
+ if (p[0] >= ~0ul)
+ break;
+#endif
+ error = bus_set_resource(dev, SYS_RES_MEMORY, rid, p[0],
+ p[1] - p[0]);
+ if (error)
+ panic("ram_attach: resource %d failed set with %d", i,
+ error);
+ res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0);
+ if (res == NULL)
+ panic("ram_attach: resource %d failed to attach", i);
+ }
+ return (0);
+}
+
+static device_method_t ram_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, ram_identify),
+ DEVMETHOD(device_probe, ram_probe),
+ DEVMETHOD(device_attach, ram_attach),
+ { 0, 0 }
+};
+
+static driver_t ram_driver = {
+ "ram",
+ ram_methods,
+ 1, /* no softc */
+};
+
+static devclass_t ram_devclass;
+
+DRIVER_MODULE(ram, nexus, ram_driver, ram_devclass, 0, 0);
+
#ifdef DEV_ISA
/*
- * Placeholder which claims PnP 'devices' which describe system
+ * Placeholder which claims PnP 'devices' which describe system
* resources.
*/
static struct isa_pnp_id sysresource_ids[] = {
@@ -575,7 +689,7 @@
sysresource_probe(device_t dev)
{
int result;
-
+
if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, sysresource_ids)) <= 0) {
device_quiet(dev);
}
Index: genassym.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/genassym.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/genassym.c -L sys/i386/i386/genassym.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/genassym.c
+++ sys/i386/i386/genassym.c
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.151 2005/04/13 22:57:17 peter Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.160 2007/09/17 21:55:28 peter Exp $");
#include "opt_apic.h"
#include "opt_compat.h"
@@ -78,12 +78,13 @@
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
-ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_MD, offsetof(struct thread, td_md));
+ASSYM(TD_TID, offsetof(struct thread, td_tid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@@ -140,7 +141,6 @@
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
-ASSYM(PCB_SWITCHOUT, offsetof(struct pcb, pcb_switchout));
ASSYM(PCB_SIZE, sizeof(struct pcb));
ASSYM(PCB_VM86CALL, PCB_VM86CALL);
@@ -173,6 +173,7 @@
ASSYM(ENOENT, ENOENT);
ASSYM(EFAULT, EFAULT);
ASSYM(ENAMETOOLONG, ENAMETOOLONG);
+ASSYM(MAXCPU, MAXCPU);
ASSYM(MAXCOMLEN, MAXCOMLEN);
ASSYM(MAXPATHLEN, MAXPATHLEN);
ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
@@ -198,6 +199,7 @@
ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
#ifdef DEV_APIC
ASSYM(LA_VER, offsetof(struct LAPIC, version));
--- /dev/null
+++ sys/i386/i386/minidump_machdep.c
@@ -0,0 +1,405 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.3.4.1 2008/01/30 21:21:50 ru Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/msgbuf.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/atomic.h>
+#include <machine/elf.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/minidump.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define SIZEOF_METADATA (64*1024)
+
+#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+uint32_t *vm_page_dump;
+int vm_page_dump_size;
+
+static struct kerneldumpheader kdh;
+static off_t dumplo;
+
+/* Handle chunked writes. */
+static size_t fragsz;
+static void *dump_va;
+static uint64_t counter, progress;
+
+CTASSERT(sizeof(*vm_page_dump) == 4);
+
+static int
+is_dumpable(vm_paddr_t pa)
+{
+ int i;
+
+ for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
+ if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+ return (1);
+ }
+ return (0);
+}
+
+/* XXX should be MI */
+static void
+mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
+ uint32_t blksz)
+{
+
+ bzero(kdh, sizeof(*kdh));
+ strncpy(kdh->magic, KERNELDUMPMAGIC, sizeof(kdh->magic));
+ strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
+ kdh->version = htod32(KERNELDUMPVERSION);
+ kdh->architectureversion = htod32(archver);
+ kdh->dumplength = htod64(dumplen);
+ kdh->dumptime = htod64(time_second);
+ kdh->blocksize = htod32(blksz);
+ strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
+ strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
+ if (panicstr != NULL)
+ strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
+ kdh->parity = kerneldump_parity(kdh);
+}
+
+#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
+
+static int
+blk_flush(struct dumperinfo *di)
+{
+ int error;
+
+ if (fragsz == 0)
+ return (0);
+
+ error = dump_write(di, dump_va, 0, dumplo, fragsz);
+ dumplo += fragsz;
+ fragsz = 0;
+ return (error);
+}
+
+static int
+blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
+{
+ size_t len;
+ int error, i, c;
+
+ error = 0;
+ if ((sz % PAGE_SIZE) != 0) {
+ printf("size not page aligned\n");
+ return (EINVAL);
+ }
+ if (ptr != NULL && pa != 0) {
+ printf("cant have both va and pa!\n");
+ return (EINVAL);
+ }
+ if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
+ printf("address not page aligned\n");
+ return (EINVAL);
+ }
+ if (ptr != NULL) {
+ /* If we're doing a virtual dump, flush any pre-existing pa pages */
+ error = blk_flush(di);
+ if (error)
+ return (error);
+ }
+ while (sz) {
+ len = (MAXDUMPPGS * PAGE_SIZE) - fragsz;
+ if (len > sz)
+ len = sz;
+ counter += len;
+ progress -= len;
+ if (counter >> 24) {
+ printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
+ counter &= (1<<24) - 1;
+ }
+ if (ptr) {
+ error = dump_write(di, ptr, 0, dumplo, len);
+ if (error)
+ return (error);
+ dumplo += len;
+ ptr += len;
+ sz -= len;
+ } else {
+ for (i = 0; i < len; i += PAGE_SIZE)
+ dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
+ fragsz += len;
+ pa += len;
+ sz -= len;
+ if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) {
+ error = blk_flush(di);
+ if (error)
+ return (error);
+ }
+ }
+
+ /* Check for user abort. */
+ c = cncheckc();
+ if (c == 0x03)
+ return (ECANCELED);
+ if (c != -1)
+ printf(" (CTRL-C to abort) ");
+ }
+
+ return (0);
+}
+
+/* A fake page table page, to avoid having to handle both 4K and 2M pages */
+static pt_entry_t fakept[NPTEPG];
+
+void
+minidumpsys(struct dumperinfo *di)
+{
+ uint64_t dumpsize;
+ uint32_t ptesize;
+ vm_offset_t va;
+ int error;
+ uint32_t bits;
+ uint64_t pa;
+ pd_entry_t *pd;
+ pt_entry_t *pt;
+ int i, j, k, bit;
+ struct minidumphdr mdhdr;
+
+ counter = 0;
+ /* Walk page table pages, set bits in vm_page_dump */
+ ptesize = 0;
+ for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
+ /*
+ * We always write a page, even if it is zero. Each
+ * page written corresponds to 2MB of space
+ */
+ ptesize += PAGE_SIZE;
+ pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
+ j = va >> PDRSHIFT;
+ if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
+ /* This is an entire 2M page. */
+ pa = pd[j] & PG_PS_FRAME;
+ for (k = 0; k < NPTEPG; k++) {
+ if (is_dumpable(pa))
+ dump_add_page(pa);
+ pa += PAGE_SIZE;
+ }
+ continue;
+ }
+ if ((pd[j] & PG_V) == PG_V) {
+ /* set bit for each valid page in this 2MB block */
+ pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
+ for (k = 0; k < NPTEPG; k++) {
+ if ((pt[k] & PG_V) == PG_V) {
+ pa = pt[k] & PG_FRAME;
+ if (is_dumpable(pa))
+ dump_add_page(pa);
+ }
+ }
+ } else {
+ /* nothing, we're going to dump a null page */
+ }
+ }
+
+ /* Calculate dump size. */
+ dumpsize = ptesize;
+ dumpsize += round_page(msgbufp->msg_size);
+ dumpsize += round_page(vm_page_dump_size);
+ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+ bits = vm_page_dump[i];
+ while (bits) {
+ bit = bsfl(bits);
+ pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+ /* Clear out undumpable pages now if needed */
+ if (is_dumpable(pa)) {
+ dumpsize += PAGE_SIZE;
+ } else {
+ dump_drop_page(pa);
+ }
+ bits &= ~(1ul << bit);
+ }
+ }
+ dumpsize += PAGE_SIZE;
+
+ /* Determine dump offset on device. */
+ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+ error = ENOSPC;
+ goto fail;
+ }
+ dumplo = di->mediaoffset + di->mediasize - dumpsize;
+ dumplo -= sizeof(kdh) * 2;
+ progress = dumpsize;
+
+ /* Initialize mdhdr */
+ bzero(&mdhdr, sizeof(mdhdr));
+ strcpy(mdhdr.magic, MINIDUMP_MAGIC);
+ mdhdr.version = MINIDUMP_VERSION;
+ mdhdr.msgbufsize = msgbufp->msg_size;
+ mdhdr.bitmapsize = vm_page_dump_size;
+ mdhdr.ptesize = ptesize;
+ mdhdr.kernbase = KERNBASE;
+#ifdef PAE
+ mdhdr.paemode = 1;
+#endif
+
+ mkdumpheader(&kdh, KERNELDUMP_I386_VERSION, dumpsize, di->blocksize);
+
+ printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
+ printf("Dumping %llu MB:", (long long)dumpsize >> 20);
+
+ /* Dump leader */
+ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
+ if (error)
+ goto fail;
+ dumplo += sizeof(kdh);
+
+ /* Dump my header */
+ bzero(&fakept, sizeof(fakept));
+ bcopy(&mdhdr, &fakept, sizeof(mdhdr));
+ error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+
+ /* Dump msgbuf up front */
+ error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
+ if (error)
+ goto fail;
+
+ /* Dump bitmap */
+ error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
+ if (error)
+ goto fail;
+
+ /* Dump kernel page table pages */
+ for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
+ /* We always write a page, even if it is zero */
+ pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
+ j = va >> PDRSHIFT;
+ if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
+ /* This is a single 2M block. Generate a fake PTP */
+ pa = pd[j] & PG_PS_FRAME;
+ for (k = 0; k < NPTEPG; k++) {
+ fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
+ }
+ error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+ /* flush, in case we reuse fakept in the same block */
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+ continue;
+ }
+ if ((pd[j] & PG_V) == PG_V) {
+ pa = pd[j] & PG_FRAME;
+ error = blk_write(di, 0, pa, PAGE_SIZE);
+ if (error)
+ goto fail;
+ } else {
+ bzero(fakept, sizeof(fakept));
+ error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+ /* flush, in case we reuse fakept in the same block */
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+ }
+ }
+
+ /* Dump memory chunks */
+ /* XXX cluster it up and use blk_dump() */
+ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+ bits = vm_page_dump[i];
+ while (bits) {
+ bit = bsfl(bits);
+ pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+ error = blk_write(di, 0, pa, PAGE_SIZE);
+ if (error)
+ goto fail;
+ bits &= ~(1ul << bit);
+ }
+ }
+
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+
+ /* Dump trailer */
+ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
+ if (error)
+ goto fail;
+ dumplo += sizeof(kdh);
+
+ /* Signal completion, signoff and exit stage left. */
+ dump_write(di, NULL, 0, 0, 0);
+ printf("\nDump complete\n");
+ return;
+
+ fail:
+ if (error < 0)
+ error = -error;
+
+ if (error == ECANCELED)
+ printf("\nDump aborted\n");
+ else if (error == ENOSPC)
+ printf("\nDump failed. Partition too small.\n");
+ else
+ printf("\n** DUMP FAILED (ERROR %d) **\n", error);
+}
+
+void
+dump_add_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 5; /* 2^5 = 32 */
+ bit = pa & 31;
+ atomic_set_int(&vm_page_dump[idx], 1ul << bit);
+}
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 5; /* 2^5 = 32 */
+ bit = pa & 31;
+ atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
+}
+
Index: locore.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/locore.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/locore.s -L sys/i386/i386/locore.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/locore.s
+++ sys/i386/i386/locore.s
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD: src/sys/i386/i386/locore.s,v 1.186 2005/05/16 09:47:53 obrien Exp $
+ * $FreeBSD: src/sys/i386/i386/locore.s,v 1.188 2007/03/24 19:53:22 alc Exp $
*
* originally from: locore.s, by William F. Jolitz
*
@@ -777,21 +777,6 @@
movl %esi, R(SMPpt) /* relocated to KVM space */
#endif /* SMP */
-/* Map page zero read-write so bios32 calls can use it */
- xorl %eax, %eax
- movl $PG_RW,%edx
- movl $1,%ecx
- fillkptphys(%edx)
-
-/* Map read-only from page 1 to the beginning of the kernel text section */
- movl $PAGE_SIZE, %eax
- xorl %edx,%edx
- movl $R(btext),%ecx
- addl $PAGE_MASK,%ecx
- subl %eax,%ecx
- shrl $PAGE_SHIFT,%ecx
- fillkptphys(%edx)
-
/*
* Enable PSE and PGE.
*/
@@ -815,22 +800,21 @@
#endif
/*
- * Write page tables for the kernel starting at btext and
- * until the end. Make sure to map read+write. We do this even
+ * Initialize page table pages mapping physical address zero through the
+ * end of the kernel. All of the page table entries allow read and write
+ * access. Write access to the first physical page is required by bios32
+ * calls, and write access to the first 1 MB of physical memory is required
+ * by ACPI for implementing suspend and resume. We do this even
* if we've enabled PSE above, we'll just switch the corresponding kernel
* PDEs before we turn on paging.
*
* XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE
* THIS! SMP needs the page table to be there to map the kernel P==V.
*/
- movl $R(btext),%eax
- addl $PAGE_MASK, %eax
- andl $~PAGE_MASK, %eax
- movl $PG_RW,%edx
+ xorl %eax, %eax
movl R(KERNend),%ecx
- subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
- fillkptphys(%edx)
+ fillkptphys($PG_RW)
/* Map page directory. */
#ifdef PAE
@@ -901,17 +885,43 @@
fillkpt(R(SMPptpa), $PG_RW)
#endif /* SMP */
-/* install a pde for temporary double map of bottom of VA */
+/*
+ * Create an identity mapping for low physical memory, including the kernel.
+ * The part of this mapping that covers the first 1 MB of physical memory
+ * becomes a permanent part of the kernel's address space. The rest of this
+ * mapping is destroyed in pmap_bootstrap(). Ordinarily, the same page table
+ * pages are shared by the identity mapping and the kernel's native mapping.
+ * However, the permanent identity mapping cannot contain PG_G mappings.
+ * Thus, if the kernel is loaded within the permanent identity mapping, that
+ * page table page must be duplicated and not shared.
+ *
+ * N.B. Due to errata concerning large pages and physical address zero,
+ * a PG_PS mapping is not used.
+ */
movl R(KPTphys), %eax
xorl %ebx, %ebx
movl $NKPT, %ecx
fillkpt(R(IdlePTD), $PG_RW)
+#if KERNLOAD < (1 << PDRSHIFT)
+ testl $PG_G, R(pgeflag)
+ jz 1f
+ ALLOCPAGES(1)
+ movl %esi, %edi
+ movl R(IdlePTD), %eax
+ movl (%eax), %esi
+ movl %edi, (%eax)
+ movl $PAGE_SIZE, %ecx
+ cld
+ rep
+ movsb
+1:
+#endif
/*
- * For the non-PSE case, install PDEs for PTs covering the kernel.
+ * For the non-PSE case, install PDEs for PTs covering the KVA.
* For the PSE case, do the same, but clobber the ones corresponding
- * to the kernel (from btext to KERNend) with 4M ('PS') PDEs immediately
- * after.
+ * to the kernel (from btext to KERNend) with 4M (2M for PAE) ('PS')
+ * PDEs immediately after.
*/
movl R(KPTphys), %eax
movl $KPTDI, %ebx
Index: elan-mmcr.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/elan-mmcr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/elan-mmcr.c -L sys/i386/i386/elan-mmcr.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/elan-mmcr.c
+++ sys/i386/i386/elan-mmcr.c
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/elan-mmcr.c,v 1.31.2.1 2005/08/16 22:47:14 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/elan-mmcr.c,v 1.35 2007/06/04 18:25:06 dwmalone Exp $");
#include "opt_cpu.h"
#include <sys/param.h>
@@ -313,7 +313,7 @@
int error;
f = elan_timecounter.tc_frequency * 4;
- error = sysctl_handle_int(oidp, &f, sizeof(f), req);
+ error = sysctl_handle_int(oidp, &f, 0, req);
if (error == 0 && req->newptr != NULL)
elan_timecounter.tc_frequency = (f + 3) / 4;
return (error);
@@ -367,11 +367,11 @@
static void
elan_watchdog(void *foo __unused, u_int spec, int *error)
{
- u_int u, v;
+ u_int u, v, w;
static u_int cur;
u = spec & WD_INTERVAL;
- if (spec && u <= 35) {
+ if (u > 0 && u <= 35) {
u = imax(u - 5, 24);
v = 2 << (u - 24);
v |= 0xc000;
@@ -383,7 +383,7 @@
* for other reasons. Save and restore the GP echo mode
* around our hardware tom-foolery.
*/
- u = elan_mmcr->GPECHO;
+ w = elan_mmcr->GPECHO;
elan_mmcr->GPECHO = 0;
if (v != cur) {
/* Clear the ENB bit */
@@ -401,19 +401,17 @@
elan_mmcr->WDTMRCTL = 0xaaaa;
elan_mmcr->WDTMRCTL = 0x5555;
}
- elan_mmcr->GPECHO = u;
+ elan_mmcr->GPECHO = w;
*error = 0;
- return;
} else {
- u = elan_mmcr->GPECHO;
+ w = elan_mmcr->GPECHO;
elan_mmcr->GPECHO = 0;
elan_mmcr->WDTMRCTL = 0x3333;
elan_mmcr->WDTMRCTL = 0xcccc;
elan_mmcr->WDTMRCTL = 0x4080;
- elan_mmcr->WDTMRCTL = u;
- elan_mmcr->GPECHO = u;
+ elan_mmcr->WDTMRCTL = w; /* XXX What does this statement do? */
+ elan_mmcr->GPECHO = w;
cur = 0;
- return;
}
}
--- /dev/null
+++ sys/i386/i386/bpf_jit_machdep.h
@@ -0,0 +1,404 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/bpf_jit_machdep.h,v 1.3 2005/12/06 20:11:07 jkim Exp $
+ */
+
+#ifndef _BPF_JIT_MACHDEP_H_
+#define _BPF_JIT_MACHDEP_H_
+
+/*
+ * Registers
+ */
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+#define ESP 4
+#define EBP 5
+#define ESI 6
+#define EDI 7
+
+#define AX 0
+#define CX 1
+#define DX 2
+#define BX 3
+#define SP 4
+#define BP 5
+#define SI 6
+#define DI 7
+
+#define AL 0
+#define CL 1
+#define DL 2
+#define BL 3
+
+/* A stream of native binary code.*/
+typedef struct bpf_bin_stream {
+ /* Current native instruction pointer. */
+ int cur_ip;
+
+ /*
+ * Current BPF instruction pointer, i.e. position in
+ * the BPF program reached by the jitter.
+ */
+ int bpf_pc;
+
+ /* Instruction buffer, contains the generated native code. */
+ char *ibuf;
+
+ /* Jumps reference table. */
+ u_int *refs;
+} bpf_bin_stream;
+
+/*
+ * Prototype of the emit functions.
+ *
+ * Different emit functions are used to create the reference table and
+ * to generate the actual filtering code. This allows to have simpler
+ * instruction macros.
+ * The first parameter is the stream that will receive the data.
+ * The second one is a variable containing the data.
+ * The third one is the length, that can be 1, 2, or 4 since it is possible
+ * to emit a byte, a short, or a word at a time.
+ */
+typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
+
+/*
+ * native Instruction Macros
+ */
+
+/* mov r32,i32 */
+#define MOVid(r32, i32) do { \
+ emitm(&stream, (11 << 4) | (1 << 3) | (r32 & 0x7), 1); \
+ emitm(&stream, i32, 4); \
+} while (0)
+
+/* mov dr32,sr32 */
+#define MOVrd(dr32, sr32) do { \
+ emitm(&stream, (8 << 4) | 3 | (1 << 3), 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* mov dr32,sr32[off] */
+#define MOVodd(dr32, sr32, off) do { \
+ emitm(&stream, (8 << 4) | 3 | (1 << 3), 1); \
+ emitm(&stream, \
+ (1 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+ emitm(&stream, off, 1); \
+} while (0)
+
+/* mov dr32,sr32[or32] */
+#define MOVobd(dr32, sr32, or32) do { \
+ emitm(&stream, (8 << 4) | 3 | (1 << 3), 1); \
+ emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1); \
+ emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* mov dr16,sr32[or32] */
+#define MOVobw(dr32, sr32, or32) do { \
+ emitm(&stream, 0x66, 1); \
+ emitm(&stream, (8 << 4) | 3 | (1 << 3), 1); \
+ emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1); \
+ emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* mov dr8,sr32[or32] */
+#define MOVobb(dr8, sr32, or32) do { \
+ emitm(&stream, 0x8a, 1); \
+ emitm(&stream, ((dr8 & 0x7) << 3) | 4, 1); \
+ emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* mov [dr32][or32],sr32 */
+#define MOVomd(dr32, or32, sr32) do { \
+ emitm(&stream, 0x89, 1); \
+ emitm(&stream, ((sr32 & 0x7) << 3) | 4, 1); \
+ emitm(&stream, ((or32 & 0x7) << 3) | (dr32 & 0x7), 1); \
+} while (0)
+
+/* bswap dr32 */
+#define BSWAP(dr32) do { \
+ emitm(&stream, 0xf, 1); \
+ emitm(&stream, (0x19 << 3) | dr32, 1); \
+} while (0)
+
+/* xchg al,ah */
+#define SWAP_AX() do { \
+ emitm(&stream, 0x86, 1); \
+ emitm(&stream, 0xc4, 1); \
+} while (0)
+
+/* push r32 */
+#define PUSH(r32) do { \
+ emitm(&stream, (5 << 4) | (0 << 3) | (r32 & 0x7), 1); \
+} while (0)
+
+/* pop r32 */
+#define POP(r32) do { \
+ emitm(&stream, (5 << 4) | (1 << 3) | (r32 & 0x7), 1); \
+} while (0)
+
+/* leave/ret */
+#define LEAVE_RET() do { \
+ emitm(&stream, 0xc9, 1); \
+ emitm(&stream, 0xc3, 1); \
+} while (0)
+
+/* add dr32,sr32 */
+#define ADDrd(dr32, sr32) do { \
+ emitm(&stream, 0x03, 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* add eax,i32 */
+#define ADD_EAXi(i32) do { \
+ emitm(&stream, 0x05, 1); \
+ emitm(&stream, i32, 4); \
+} while (0)
+
+/* add r32,i32 */
+#define ADDid(r32, i32) do { \
+ emitm(&stream, 0x81, 1); \
+ emitm(&stream, (24 << 3) | r32, 1); \
+ emitm(&stream, i32, 4); \
+} while (0)
+
+/* add r32,i8 */
+#define ADDib(r32, i8) do { \
+ emitm(&stream, 0x83, 1); \
+ emitm(&stream, (24 << 3) | r32, 1); \
+ emitm(&stream, i8, 1); \
+} while (0)
+
+/* sub dr32,sr32 */
+#define SUBrd(dr32, sr32) do { \
+ emitm(&stream, 0x2b, 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* sub eax,i32 */
+#define SUB_EAXi(i32) do { \
+ emitm(&stream, 0x2d, 1); \
+ emitm(&stream, i32, 4); \
+} while (0)
+
+/* mul r32 */
+#define MULrd(r32) do { \
+ emitm(&stream, 0xf7, 1); \
+ emitm(&stream, (7 << 5) | (r32 & 0x7), 1); \
+} while (0)
+
+/* div r32 */
+#define DIVrd(r32) do { \
+ emitm(&stream, 0xf7, 1); \
+ emitm(&stream, (15 << 4) | (r32 & 0x7), 1); \
+} while (0)
+
+/* and r8,i8 */
+#define ANDib(r8, i8) do { \
+ emitm(&stream, 0x80, 1); \
+ emitm(&stream, (7 << 5) | r8, 1); \
+ emitm(&stream, i8, 1); \
+} while (0)
+
+/* and r32,i32 */
+#define ANDid(r32, i32) do { \
+ if (r32 == EAX) { \
+ emitm(&stream, 0x25, 1); \
+ emitm(&stream, i32, 4); \
+ } else { \
+ emitm(&stream, 0x81, 1); \
+ emitm(&stream, (7 << 5) | r32, 1); \
+ emitm(&stream, i32, 4); \
+ } \
+} while (0)
+
+/* and dr32,sr32 */
+#define ANDrd(dr32, sr32) do { \
+ emitm(&stream, 0x23, 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* or dr32,sr32 */
+#define ORrd(dr32, sr32) do { \
+ emitm(&stream, 0x0b, 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* or r32,i32 */
+#define ORid(r32, i32) do { \
+ if (r32 == EAX) { \
+ emitm(&stream, 0x0d, 1); \
+ emitm(&stream, i32, 4); \
+ } else { \
+ emitm(&stream, 0x81, 1); \
+ emitm(&stream, (25 << 3) | r32, 1); \
+ emitm(&stream, i32, 4); \
+ } \
+} while (0)
+
+/* shl r32,i8 */
+#define SHLib(r32, i8) do { \
+ emitm(&stream, 0xc1, 1); \
+ emitm(&stream, (7 << 5) | (r32 & 0x7), 1); \
+ emitm(&stream, i8, 1); \
+} while (0)
+
+/* shl dr32,cl */
+#define SHL_CLrb(dr32) do { \
+ emitm(&stream, 0xd3, 1); \
+ emitm(&stream, (7 << 5) | (dr32 & 0x7), 1); \
+} while (0)
+
+/* shr r32,i8 */
+#define SHRib(r32, i8) do { \
+ emitm(&stream, 0xc1, 1); \
+ emitm(&stream, (29 << 3) | (r32 & 0x7), 1); \
+ emitm(&stream, i8, 1); \
+} while (0)
+
+/* shr dr32,cl */
+#define SHR_CLrb(dr32) do { \
+ emitm(&stream, 0xd3, 1); \
+ emitm(&stream, (29 << 3) | (dr32 & 0x7), 1); \
+} while (0)
+
+/* neg r32 */
+#define NEGd(r32) do { \
+ emitm(&stream, 0xf7, 1); \
+ emitm(&stream, (27 << 3) | (r32 & 0x7), 1); \
+} while (0)
+
+/* cmp dr32,sr32[off] */
+#define CMPodd(dr32, sr32, off) do { \
+ emitm(&stream, (3 << 4) | 3 | (1 << 3), 1); \
+ emitm(&stream, \
+ (1 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+ emitm(&stream, off, 1); \
+} while (0)
+
+/* cmp dr32,sr32 */
+#define CMPrd(dr32, sr32) do { \
+ emitm(&stream, 0x3b, 1); \
+ emitm(&stream, \
+ (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1); \
+} while (0)
+
+/* cmp dr32,i32 */
+#define CMPid(dr32, i32) do { \
+ if (dr32 == EAX){ \
+ emitm(&stream, 0x3d, 1); \
+ emitm(&stream, i32, 4); \
+ } else { \
+ emitm(&stream, 0x81, 1); \
+ emitm(&stream, (0x1f << 3) | (dr32 & 0x7), 1); \
+ emitm(&stream, i32, 4); \
+ } \
+} while (0)
+
+/* jne off32 */
+#define JNEb(off8) do { \
+ emitm(&stream, 0x75, 1); \
+ emitm(&stream, off8, 1); \
+} while (0)
+
+/* je off32 */
+#define JE(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x84, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jle off32 */
+#define JLE(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x8e, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jle off8 */
+#define JLEb(off8) do { \
+ emitm(&stream, 0x7e, 1); \
+ emitm(&stream, off8, 1); \
+} while (0)
+
+/* ja off32 */
+#define JA(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x87, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jae off32 */
+#define JAE(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x83, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jg off32 */
+#define JG(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x8f, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jge off32 */
+#define JGE(off32) do { \
+ emitm(&stream, 0x0f, 1); \
+ emitm(&stream, 0x8d, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* jmp off32 */
+#define JMP(off32) do { \
+ emitm(&stream, 0xe9, 1); \
+ emitm(&stream, off32, 4); \
+} while (0)
+
+/* xor eax,eax */
+#define ZERO_EAX() do { \
+ emitm(&stream, 0x31, 1); \
+ emitm(&stream, 0xc0, 1); \
+} while (0)
+
+/* xor edx,edx */
+#define ZERO_EDX() do { \
+ emitm(&stream, 0x31, 1); \
+ emitm(&stream, 0xd2, 1); \
+} while (0)
+
+#endif /* _BPF_JIT_MACHDEP_H_ */
--- /dev/null
+++ sys/i386/i386/msi.c
@@ -0,0 +1,506 @@
+/*-
+ * Copyright (c) 2006 Yahoo!, Inc.
+ * All rights reserved.
+ * Written by: John Baldwin <jhb at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on
+ * x86 are basically APIC messages that the northbridge delivers directly
+ * to the local APICs as if they had come from an I/O APIC.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/msi.c,v 1.6.2.1 2007/10/30 18:00:56 jhb Exp $");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <machine/apicreg.h>
+#include <machine/md_var.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <dev/pci/pcivar.h>
+
+/* Fields in address for Intel MSI messages. */
+#define MSI_INTEL_ADDR_DEST 0x000ff000
+#define MSI_INTEL_ADDR_RH 0x00000008
+# define MSI_INTEL_ADDR_RH_ON 0x00000008
+# define MSI_INTEL_ADDR_RH_OFF 0x00000000
+#define MSI_INTEL_ADDR_DM 0x00000004
+# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000
+# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004
+
+/* Fields in data for Intel MSI messages. */
+#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */
+# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG
+# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL
+#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */
+# define MSI_INTEL_DATA_DEASSERT 0x00000000
+# define MSI_INTEL_DATA_ASSERT 0x00004000
+#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */
+# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
+# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI
+# define MSI_INTEL_DATA_DELSMI IOART_DELSMI
+# define MSI_INTEL_DATA_DELNMI IOART_DELNMI
+# define MSI_INTEL_DATA_DELINIT IOART_DELINIT
+# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT
+#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
+
+/*
+ * Build Intel MSI message and data values from a source. AMD64 systems
+ * seem to be compatible, so we use the same function for both.
+ */
+#define INTEL_ADDR(msi) \
+ (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \
+ MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
+#define INTEL_DATA(msi) \
+ (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
+
+static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
+
+/*
+ * MSI sources are bunched into groups. This is because MSI forces
+ * all of the messages to share the address and data registers and
+ * thus certain properties (such as the local APIC ID target on x86).
+ * Each group has a 'first' source that contains information global to
+ * the group. These fields are marked with (g) below.
+ *
+ * Note that local APIC ID is kind of special. Each message will be
+ * assigned an ID by the system; however, a group will use the ID from
+ * the first message.
+ *
+ * For MSI-X, each message is isolated.
+ */
+struct msi_intsrc {
+ struct intsrc msi_intsrc;
+ device_t msi_dev; /* Owning device. (g) */
+ struct msi_intsrc *msi_first; /* First source in group. */
+ u_int msi_irq; /* IRQ cookie. */
+ u_int msi_msix; /* MSI-X message. */
+ u_int msi_vector:8; /* IDT vector. */
+ u_int msi_cpu:8; /* Local APIC ID. (g) */
+ u_int msi_count:8; /* Messages in this group. (g) */
+};
+
+static void msi_create_source(void);
+static void msi_enable_source(struct intsrc *isrc);
+static void msi_disable_source(struct intsrc *isrc, int eoi);
+static void msi_eoi_source(struct intsrc *isrc);
+static void msi_enable_intr(struct intsrc *isrc);
+static void msi_disable_intr(struct intsrc *isrc);
+static int msi_vector(struct intsrc *isrc);
+static int msi_source_pending(struct intsrc *isrc);
+static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol);
+static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
+
+struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
+ msi_enable_intr, msi_disable_intr, msi_vector,
+ msi_source_pending, NULL, NULL, msi_config_intr,
+ msi_assign_cpu };
+
+static int msi_enabled;
+static int msi_last_irq;
+static struct mtx msi_lock;
+
+static void
+msi_enable_source(struct intsrc *isrc)
+{
+}
+
+static void
+msi_disable_source(struct intsrc *isrc, int eoi)
+{
+
+ if (eoi == PIC_EOI)
+ lapic_eoi();
+}
+
+static void
+msi_eoi_source(struct intsrc *isrc)
+{
+
+ lapic_eoi();
+}
+
+static void
+msi_enable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ apic_enable_vector(msi->msi_vector);
+}
+
+static void
+msi_disable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ apic_disable_vector(msi->msi_vector);
+}
+
+static int
+msi_vector(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ return (msi->msi_irq);
+}
+
+static int
+msi_source_pending(struct intsrc *isrc)
+{
+
+ return (0);
+}
+
+static int
+msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+
+ return (ENODEV);
+}
+
+static void
+msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ msi->msi_cpu = apic_id;
+ if (bootverbose)
+ printf("msi: Assigning %s IRQ %d to local APIC %u\n",
+ msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
+ msi->msi_cpu);
+ pci_remap_msi_irq(msi->msi_dev, msi->msi_irq);
+}
+
+void
+msi_init(void)
+{
+
+ /* Check if we have a supported CPU. */
+ if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 ||
+ strcmp(cpu_vendor, "AuthenticAMD") == 0))
+ return;
+
+ msi_enabled = 1;
+ intr_register_pic(&msi_pic);
+ mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
+}
+
+void
+msi_create_source(void)
+{
+ struct msi_intsrc *msi;
+ u_int irq;
+
+ mtx_lock(&msi_lock);
+ if (msi_last_irq >= NUM_MSI_INTS) {
+ mtx_unlock(&msi_lock);
+ return;
+ }
+ irq = msi_last_irq + FIRST_MSI_INT;
+ msi_last_irq++;
+ mtx_unlock(&msi_lock);
+
+ msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
+ msi->msi_intsrc.is_pic = &msi_pic;
+ msi->msi_irq = irq;
+ intr_register_source(&msi->msi_intsrc);
+ nexus_add_irq(irq);
+}
+
+/*
+ * Try to allocate 'count' interrupt sources with contiguous IDT values. If
+ * we allocate any new sources, then their IRQ values will be at the end of
+ * the irqs[] array, with *newirq being the index of the first new IRQ value
+ * and *newcount being the number of new IRQ values added.
+ */
+int
+msi_alloc(device_t dev, int count, int maxcount, int *irqs)
+{
+ struct msi_intsrc *msi, *fsrc;
+ int cnt, i, vector;
+
+ if (!msi_enabled)
+ return (ENXIO);
+
+again:
+ mtx_lock(&msi_lock);
+
+ /* Try to find 'count' free IRQs. */
+ cnt = 0;
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* If this is a free one, save its IRQ in the array. */
+ if (msi->msi_dev == NULL) {
+ irqs[cnt] = i;
+ cnt++;
+ if (cnt == count)
+ break;
+ }
+ }
+
+ /* Do we need to create some new sources? */
+ if (cnt < count) {
+ /* If we would exceed the max, give up. */
+ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
+ mtx_unlock(&msi_lock);
+ return (ENXIO);
+ }
+ mtx_unlock(&msi_lock);
+
+ /* We need count - cnt more sources. */
+ while (cnt < count) {
+ msi_create_source();
+ cnt++;
+ }
+ goto again;
+ }
+
+ /* Ok, we now have the IRQs allocated. */
+ KASSERT(cnt == count, ("count mismatch"));
+
+ /* Allocate 'count' IDT vectors. */
+ vector = apic_alloc_vectors(irqs, count, maxcount);
+ if (vector == 0) {
+ mtx_unlock(&msi_lock);
+ return (ENOSPC);
+ }
+
+ /* Assign IDT vectors and make these messages owned by 'dev'. */
+ fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ for (i = 0; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ msi->msi_dev = dev;
+ msi->msi_vector = vector + i;
+ if (bootverbose)
+ printf("msi: routing MSI IRQ %d to vector %u\n",
+ msi->msi_irq, msi->msi_vector);
+ msi->msi_first = fsrc;
+ KASSERT(msi->msi_intsrc.is_handlers == 0,
+ ("dead MSI has handlers"));
+ }
+ fsrc->msi_count = count;
+ mtx_unlock(&msi_lock);
+
+ return (0);
+}
+
+int
+msi_release(int *irqs, int count)
+{
+ struct msi_intsrc *msi, *first;
+ int i;
+
+ mtx_lock(&msi_lock);
+ first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ if (first == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENOENT);
+ }
+
+ /* Make sure this isn't an MSI-X message. */
+ if (first->msi_msix) {
+ mtx_unlock(&msi_lock);
+ return (EINVAL);
+ }
+
+ /* Make sure this message is allocated to a group. */
+ if (first->msi_first == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENXIO);
+ }
+
+ /*
+ * Make sure this is the start of a group and that we are releasing
+ * the entire group.
+ */
+ if (first->msi_first != first || first->msi_count != count) {
+ mtx_unlock(&msi_lock);
+ return (EINVAL);
+ }
+ KASSERT(first->msi_dev != NULL, ("unowned group"));
+
+ /* Clear all the extra messages in the group. */
+ for (i = 1; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ KASSERT(msi->msi_first == first, ("message not in group"));
+ KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
+ msi->msi_first = NULL;
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ }
+
+ /* Clear out the first message. */
+ first->msi_first = NULL;
+ first->msi_dev = NULL;
+ apic_free_vector(first->msi_vector, first->msi_irq);
+ first->msi_vector = 0;
+ first->msi_count = 0;
+
+ mtx_unlock(&msi_lock);
+ return (0);
+}
+
+int
+msi_map(int irq, uint64_t *addr, uint32_t *data)
+{
+ struct msi_intsrc *msi;
+
+ mtx_lock(&msi_lock);
+ msi = (struct msi_intsrc *)intr_lookup_source(irq);
+ if (msi == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENOENT);
+ }
+
+ /* Make sure this message is allocated to a device. */
+ if (msi->msi_dev == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENXIO);
+ }
+
+ /*
+ * If this message isn't an MSI-X message, make sure it's part
+ * of a group, and switch to the first message in the
+ * group.
+ */
+ if (!msi->msi_msix) {
+ if (msi->msi_first == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENXIO);
+ }
+ msi = msi->msi_first;
+ }
+
+ *addr = INTEL_ADDR(msi);
+ *data = INTEL_DATA(msi);
+ mtx_unlock(&msi_lock);
+ return (0);
+}
+
+int
+msix_alloc(device_t dev, int *irq)
+{
+ struct msi_intsrc *msi;
+ int i, vector;
+
+ if (!msi_enabled)
+ return (ENXIO);
+
+again:
+ mtx_lock(&msi_lock);
+
+ /* Find a free IRQ. */
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* Stop at the first free source. */
+ if (msi->msi_dev == NULL)
+ break;
+ }
+
+ /* Do we need to create a new source? */
+ if (msi == NULL) {
+ /* If we would exceed the max, give up. */
+ if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) {
+ mtx_unlock(&msi_lock);
+ return (ENXIO);
+ }
+ mtx_unlock(&msi_lock);
+
+ /* Create a new source. */
+ msi_create_source();
+ goto again;
+ }
+
+ /* Allocate an IDT vector. */
+ vector = apic_alloc_vector(i);
+ if (bootverbose)
+ printf("msi: routing MSI-X IRQ %d to vector %u\n", msi->msi_irq,
+ vector);
+
+ /* Setup source. */
+ msi->msi_dev = dev;
+ msi->msi_vector = vector;
+ msi->msi_msix = 1;
+
+ KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
+ mtx_unlock(&msi_lock);
+
+ *irq = i;
+ return (0);
+}
+
+int
+msix_release(int irq)
+{
+ struct msi_intsrc *msi;
+
+ mtx_lock(&msi_lock);
+ msi = (struct msi_intsrc *)intr_lookup_source(irq);
+ if (msi == NULL) {
+ mtx_unlock(&msi_lock);
+ return (ENOENT);
+ }
+
+ /* Make sure this is an MSI-X message. */
+ if (!msi->msi_msix) {
+ mtx_unlock(&msi_lock);
+ return (EINVAL);
+ }
+
+ KASSERT(msi->msi_dev != NULL, ("unowned message"));
+
+ /* Clear out the message. */
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ msi->msi_msix = 0;
+
+ mtx_unlock(&msi_lock);
+ return (0);
+}
Index: dump_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/dump_machdep.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/dump_machdep.c -L sys/i386/i386/dump_machdep.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/dump_machdep.c
+++ sys/i386/i386/dump_machdep.c
@@ -25,12 +25,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/dump_machdep.c,v 1.11 2005/07/02 19:57:31 marcel Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/dump_machdep.c,v 1.12.4.1 2008/01/30 21:21:50 ru Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
+#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <vm/vm.h>
@@ -40,6 +41,11 @@
CTASSERT(sizeof(struct kerneldumpheader) == 512);
+int do_minidump = 1;
+TUNABLE_INT("debug.minidump", &do_minidump);
+SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
+ "Enable mini crash dumps");
+
/*
* Don't touch the first SIZEOF_METADATA bytes on the dump device. This
* is to protect us from metadata and to protect metadata from us.
@@ -134,7 +140,7 @@
ptr += len;
sz -= len;
if (fragsz == DEV_BSIZE) {
- error = di->dumper(di->priv, buffer, 0, dumplo,
+ error = dump_write(di, buffer, 0, dumplo,
DEV_BSIZE);
if (error)
return error;
@@ -154,7 +160,7 @@
if (fragsz == 0)
return (0);
- error = di->dumper(di->priv, buffer, 0, dumplo, DEV_BSIZE);
+ error = dump_write(di, buffer, 0, dumplo, DEV_BSIZE);
dumplo += DEV_BSIZE;
fragsz = 0;
return (error);
@@ -195,7 +201,7 @@
a = pa + i * PAGE_SIZE;
va = pmap_kenter_temporary(trunc_page(a), i);
}
- error = di->dumper(di->priv, va, 0, dumplo, sz);
+ error = dump_write(di, va, 0, dumplo, sz);
if (error)
break;
dumplo += sz;
@@ -272,6 +278,10 @@
size_t hdrsz;
int error;
+ if (do_minidump) {
+ minidumpsys(di);
+ return;
+ }
bzero(&ehdr, sizeof(ehdr));
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;
@@ -317,7 +327,7 @@
ehdr.e_phnum);
/* Dump leader */
- error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
dumplo += sizeof(kdh);
@@ -348,12 +358,12 @@
goto fail;
/* Dump trailer */
- error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
/* Signal completion, signoff and exit stage left. */
- di->dumper(di->priv, NULL, 0, 0, 0);
+ dump_write(di, NULL, 0, 0, 0);
printf("\nDump complete\n");
return;
Index: ptrace_machdep.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/ptrace_machdep.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/i386/i386/ptrace_machdep.c -L sys/i386/i386/ptrace_machdep.c -u -r1.3 -r1.4
--- sys/i386/i386/ptrace_machdep.c
+++ sys/i386/i386/ptrace_machdep.c
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/ptrace_machdep.c,v 1.3.2.1 2005/08/11 14:28:42 tobez Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/ptrace_machdep.c,v 1.6 2006/05/30 23:44:21 davidxu Exp $");
#include "opt_cpu.h"
Index: exception.s
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/exception.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/exception.s -L sys/i386/i386/exception.s -u -r1.1.1.1 -r1.2
--- sys/i386/i386/exception.s
+++ sys/i386/i386/exception.s
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/i386/i386/exception.s,v 1.113.2.1 2005/07/28 03:30:53 jkoshy Exp $
+ * $FreeBSD: src/sys/i386/i386/exception.s,v 1.117 2006/12/17 05:07:00 kmacy Exp $
*/
#include "opt_apic.h"
@@ -74,6 +74,8 @@
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
+#define TRAP(a) pushl $(a) ; jmp alltraps
+
IDTVEC(div)
pushl $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
@@ -116,8 +118,9 @@
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
+ * Note that int0x80_syscall is a trap gate. Interrupt gates are
+ * used by page faults, non-maskable interrupts, debug and breakpoint
+ * exceptions.
*/
SUPERALIGN_TEXT
@@ -129,15 +132,13 @@
pushl %es
pushl %fs
alltraps_with_regs_pushed:
- movl $KDSEL,%eax
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
+ SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
calltrap:
+ pushl %esp
call trap
-
+ add $4, %esp
+
/*
* Return via doreti to handle ASTs.
*/
@@ -166,13 +167,11 @@
pushl %ds
pushl %es
pushl %fs
- movl $KDSEL,%eax /* switch to kernel segments */
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
+ SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
call syscall
+ add $4, %esp
MEXITCOUNT
jmp doreti
@@ -191,13 +190,11 @@
pushl %ds
pushl %es
pushl %fs
- movl $KDSEL,%eax /* switch to kernel segments */
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
+ SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
call syscall
+ add $4, %esp
MEXITCOUNT
jmp doreti
Index: mptable_pci.c
===================================================================
RCS file: /home/cvs/src/sys/i386/i386/mptable_pci.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/i386/i386/mptable_pci.c -L sys/i386/i386/mptable_pci.c -u -r1.1.1.1 -r1.2
--- sys/i386/i386/mptable_pci.c
+++ sys/i386/i386/mptable_pci.c
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mptable_pci.c,v 1.2.8.1 2005/09/18 02:55:10 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/mptable_pci.c,v 1.8 2007/05/02 17:50:35 jhb Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -72,6 +72,37 @@
return (bus_generic_attach(dev));
}
+/* Pass MSI requests up to the nexus. */
+static int
+mptable_hostb_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
+ int *irqs)
+{
+ device_t bus;
+
+ bus = device_get_parent(pcib);
+ return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
+ irqs));
+}
+
+static int
+mptable_hostb_alloc_msix(device_t pcib, device_t dev, int *irq)
+{
+ device_t bus;
+
+ bus = device_get_parent(pcib);
+ return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
+}
+
+static int
+mptable_hostb_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
+ uint32_t *data)
+{
+ device_t bus;
+
+ bus = device_get_parent(pcib);
+ return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
+}
+
static device_method_t mptable_hostb_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, mptable_hostb_probe),
@@ -96,17 +127,19 @@
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, mptable_hostb_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, mptable_hostb_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
+ DEVMETHOD(pcib_map_msi, mptable_hostb_map_msi),
{ 0, 0 }
};
-static driver_t mptable_hostb_driver = {
- "pcib",
- mptable_hostb_methods,
- 1,
-};
+static devclass_t hostb_devclass;
-DRIVER_MODULE(mptable_pcib, legacy, mptable_hostb_driver, pcib_devclass, 0, 0);
+DEFINE_CLASS_0(pcib, mptable_hostb_driver, mptable_hostb_methods, 1);
+DRIVER_MODULE(mptable_pcib, legacy, mptable_hostb_driver, hostb_devclass, 0, 0);
/* PCI to PCI bridge driver. */
@@ -151,15 +184,17 @@
DEVMETHOD(pcib_read_config, pcib_read_config),
DEVMETHOD(pcib_write_config, pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
+ DEVMETHOD(pcib_map_msi, pcib_map_msi),
{0, 0}
};
-static driver_t mptable_pcib_driver = {
- "pcib",
- mptable_pcib_pci_methods,
- sizeof(struct pcib_softc),
-};
+static devclass_t pcib_devclass;
+DEFINE_CLASS_0(pcib, mptable_pcib_driver, mptable_pcib_pci_methods,
+ sizeof(struct pcib_softc));
DRIVER_MODULE(mptable_pcib, pci, mptable_pcib_driver, pcib_devclass, 0, 0);
-
More information about the Midnightbsd-cvs
mailing list