1 |
/*- |
2 |
* Copyright (c) 2005 |
3 |
* Bill Paul <wpaul@windriver.com>. All rights reserved. |
4 |
* |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions |
7 |
* are met: |
8 |
* 1. Redistributions of source code must retain the above copyright |
9 |
* notice, this list of conditions and the following disclaimer. |
10 |
* 2. Redistributions in binary form must reproduce the above copyright |
11 |
* notice, this list of conditions and the following disclaimer in the |
12 |
* documentation and/or other materials provided with the distribution. |
13 |
* 3. All advertising materials mentioning features or use of this software |
14 |
* must display the following acknowledgement: |
15 |
* This product includes software developed by Bill Paul. |
16 |
* 4. Neither the name of the author nor the names of any co-contributors |
17 |
* may be used to endorse or promote products derived from this software |
18 |
* without specific prior written permission. |
19 |
* |
20 |
* THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND |
21 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
22 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
23 |
* ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD |
24 |
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 |
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 |
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 |
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
30 |
* THE POSSIBILITY OF SUCH DAMAGE. |
31 |
* |
32 |
* $MidnightBSD$ |
33 |
*/ |
34 |
|
35 |
/* The 'ret' macro doesn't work in this file if GPROF is enabled. */ |
36 |
#ifdef GPROF |
37 |
#undef GPROF |
38 |
#endif |
39 |
|
40 |
#include <machine/asmacros.h> |
41 |
|
42 |
/* |
43 |
* This file contains assembly language wrappers for the different |
44 |
* calling conventions supported by Windows on the i386 architecture. |
45 |
* In FreeBSD, the whole OS typically use same C calling convention |
46 |
* everywhere, namely _cdecl. Windows, on the other hand, uses several |
47 |
* different C calling conventions depending on the circumstances: |
48 |
* |
49 |
* _stdcall: Used for most ordinary Windows APIs. With _stdcall, |
50 |
* arguments are passed on the stack, and the callee unwinds the stack |
51 |
* before returning control to the caller. Not suitable for variadic |
52 |
* functions. |
53 |
* |
54 |
* _fastcall: Used for some APIs that may be invoked frequently and |
55 |
* where speed is a critical factor (e.g. KeAcquireSpinLock() and |
56 |
* KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit |
57 |
* or smaller arguments are passed in the %ecx and %edx registers |
58 |
* instead of on the stack. Not suitable for variadic functions. |
59 |
* |
60 |
* _cdecl: Used for standard C library routines and for variadic |
61 |
* functions. |
62 |
* |
63 |
* _regparm(3): Used for certain assembly routines. All arguments |
64 |
* passed in %eax, %ecx and %edx. |
65 |
* |
66 |
* Furthermore, there is an additional wrinkle that's not obvious |
67 |
* with all code: Microsoft supports the use of exceptions in C |
68 |
* (__try/__except) both in user _and_ kernel mode. Sadly, Windows |
69 |
* structured exception handling uses machine-specific features |
70 |
* that conflict rather badly with FreeBSD. (See utility routines |
71 |
* at the end of this module for more details.) |
72 |
* |
73 |
* We want to support these calling conventions in as portable a manner |
74 |
* as possible. The trick is doing it not only with different versions |
75 |
* of GNU C, but with compilers other than GNU C (e.g. the Solaris |
76 |
* SunOne C compiler). The only sure fire method is with assembly |
77 |
* language trampoline code which both fixes up the argument passing, |
78 |
* stack unwinding and exception/thread context all at once. |
79 |
* |
80 |
* You'll notice that we call the thunk/unthunk routines in the |
81 |
* *_wrap() functions in an awkward way. Rather than branching |
82 |
* directly to the address, we load the address into a register |
83 |
* first as a literal value, then we branch to it. This is done |
84 |
* to insure that the assembler doesn't translate the branch into |
85 |
* a relative branch. We use the *_wrap() routines here as templates |
86 |
* and create the actual trampolines at run time, at which point |
87 |
* we only know the absolute addresses of the thunk and unthunk |
88 |
* routines. So we need to make sure the templates have enough |
89 |
* room in them for the full address. |
90 |
* |
91 |
* Also note that when we call the a thunk/unthunk routine after |
92 |
* invoking a wrapped function, we have to make sure to preserve |
93 |
* the value returned from that function. Most functions return |
94 |
* a 32-bit value in %eax, however some routines return 64-bit |
95 |
* values, which span both %eax and %edx. Consequently, we have |
96 |
* to preserve both registers. |
97 |
*/ |
98 |
|
99 |
/* |
100 |
* Handle _stdcall going from Windows to UNIX. |
101 |
* This is frustrating, because to do it right you have to |
102 |
* know how many arguments the called function takes, and there's |
103 |
* no way to figure this out on the fly: you just have to be told |
104 |
* ahead of time. We assume there will be 16 arguments. I don't |
105 |
* think there are any Windows APIs that require this many. |
106 |
*/ |
107 |
|
108 |
.globl x86_stdcall_wrap_call |
109 |
.globl x86_stdcall_wrap_arg |
110 |
.globl x86_stdcall_wrap_end |
111 |
|
112 |
ENTRY(x86_stdcall_wrap) |
113 |
push %esi |
114 |
push %edi |
115 |
sub $64,%esp |
116 |
mov %esp,%esi |
117 |
add $64+8+4,%esi |
118 |
mov %esp,%edi |
119 |
mov $16,%ecx # handle up to 16 args |
120 |
rep |
121 |
movsl |
122 |
|
123 |
movl $ctxsw_wtou, %eax |
124 |
call *%eax # unthunk |
125 |
|
126 |
x86_stdcall_wrap_call: |
127 |
movl $0,%eax |
128 |
call *%eax # jump to routine |
129 |
push %eax # preserve return val |
130 |
push %edx |
131 |
|
132 |
movl $ctxsw_utow, %eax |
133 |
call *%eax # thunk |
134 |
|
135 |
pop %edx |
136 |
pop %eax # restore return val |
137 |
|
138 |
add $64,%esp # clean the stack |
139 |
pop %edi |
140 |
pop %esi |
141 |
x86_stdcall_wrap_arg: |
142 |
ret $0xFF |
143 |
x86_stdcall_wrap_end: |
144 |
|
145 |
|
146 |
/* |
147 |
* Handle _stdcall going from UNIX to Windows. This routine |
148 |
* expects to be passed the function to be called, number of |
149 |
* args and the arguments for the Windows function on the stack. |
150 |
*/ |
151 |
|
152 |
ENTRY(x86_stdcall_call) |
153 |
push %esi # must preserve %esi |
154 |
push %edi # and %edi |
155 |
|
156 |
mov 16(%esp),%eax # get arg cnt |
157 |
mov %eax,%ecx # save as copy count |
158 |
mov %esp,%esi # Set source address register to point to |
159 |
add $20,%esi # first agument to be forwarded. |
160 |
shl $2,%eax # turn arg cnt into offset |
161 |
sub %eax,%esp # shift stack to new location |
162 |
mov %esp,%edi # store dest copy addr |
163 |
rep # do the copy |
164 |
movsl |
165 |
|
166 |
call ctxsw_utow # thunk |
167 |
|
168 |
call *12(%edi) # branch to stdcall routine |
169 |
push %eax # preserve return val |
170 |
push %edx |
171 |
|
172 |
call ctxsw_wtou # unthunk |
173 |
|
174 |
pop %edx |
175 |
pop %eax # restore return val |
176 |
mov %edi,%esp # restore stack |
177 |
pop %edi # restore %edi |
178 |
pop %esi # and %esi |
179 |
ret |
180 |
|
181 |
/* |
182 |
* Fastcall support. Similar to _stdcall, except the first |
183 |
* two arguments are passed in %ecx and %edx. It happens we |
184 |
* only support a small number of _fastcall APIs, none of them |
185 |
* take more than three arguments. So to keep the code size |
186 |
* and complexity down, we only handle 3 arguments here. |
187 |
*/ |
188 |
|
189 |
/* Call _fastcall function going from Windows to UNIX. */ |
190 |
|
191 |
.globl x86_fastcall_wrap_call |
192 |
.globl x86_fastcall_wrap_arg |
193 |
.globl x86_fastcall_wrap_end |
194 |
|
195 |
ENTRY(x86_fastcall_wrap) |
196 |
mov 4(%esp),%eax |
197 |
push %eax |
198 |
push %edx |
199 |
push %ecx |
200 |
|
201 |
movl $ctxsw_wtou, %eax |
202 |
call *%eax # unthunk |
203 |
|
204 |
x86_fastcall_wrap_call: |
205 |
mov $0,%eax |
206 |
call *%eax # branch to fastcall routine |
207 |
push %eax # preserve return val |
208 |
push %edx |
209 |
|
210 |
movl $ctxsw_utow, %eax |
211 |
call *%eax # thunk |
212 |
|
213 |
pop %edx |
214 |
pop %eax # restore return val |
215 |
add $12,%esp # clean the stack |
216 |
x86_fastcall_wrap_arg: |
217 |
ret $0xFF |
218 |
x86_fastcall_wrap_end: |
219 |
|
220 |
/* |
221 |
* Call _fastcall function going from UNIX to Windows. |
222 |
* This routine isn't normally used since NDIS miniport drivers |
223 |
* only have _stdcall entry points, but it's provided anyway |
224 |
* to round out the API, and for testing purposes. |
225 |
*/ |
226 |
|
227 |
ENTRY(x86_fastcall_call) |
228 |
mov 4(%esp),%eax |
229 |
push 16(%esp) |
230 |
|
231 |
call ctxsw_utow # thunk |
232 |
|
233 |
mov 12(%esp),%ecx |
234 |
mov 16(%esp),%edx |
235 |
call *8(%esp) # branch to fastcall routine |
236 |
push %eax # preserve return val |
237 |
push %edx |
238 |
|
239 |
call ctxsw_wtou # unthunk |
240 |
|
241 |
pop %edx |
242 |
pop %eax # restore return val |
243 |
add $4,%esp # clean the stack |
244 |
ret |
245 |
|
246 |
/* |
247 |
* Call regparm(3) function going from Windows to UNIX. Arguments |
248 |
* are passed in %eax, %edx and %ecx. Note that while additional |
249 |
* arguments are passed on the stack, we never bother when them, |
250 |
* since the only regparm(3) routines we need to wrap never take |
251 |
* more than 3 arguments. |
252 |
*/ |
253 |
|
254 |
.globl x86_regparm_wrap_call |
255 |
.globl x86_regparm_wrap_end |
256 |
|
257 |
ENTRY(x86_regparm_wrap) |
258 |
push %ecx |
259 |
push %edx |
260 |
push %eax |
261 |
|
262 |
movl $ctxsw_wtou, %eax |
263 |
call *%eax # unthunk |
264 |
|
265 |
x86_regparm_wrap_call: |
266 |
movl $0,%eax |
267 |
call *%eax # jump to routine |
268 |
push %eax # preserve return val |
269 |
push %edx # preserve return val |
270 |
|
271 |
movl $ctxsw_utow, %eax |
272 |
call *%eax # thunk |
273 |
|
274 |
pop %edx # restore return val |
275 |
pop %eax # restore return val |
276 |
add $12,%esp # restore stack |
277 |
ret |
278 |
x86_regparm_wrap_end: |
279 |
|
280 |
/* |
281 |
* Call regparm(3) function going from UNIX to Windows. |
282 |
* This routine isn't normally used since NDIS miniport drivers |
283 |
* only have _stdcall entry points, but it's provided anyway |
284 |
* to round out the API, and for testing purposes. |
285 |
*/ |
286 |
|
287 |
ENTRY(x86_regparm_call) |
288 |
call ctxsw_utow # thunk |
289 |
|
290 |
mov 8(%esp),%eax |
291 |
mov 12(%esp),%edx |
292 |
mov 16(%esp),%ecx |
293 |
call *4(%esp) # branch to fastcall routine |
294 |
push %eax # preserve return val |
295 |
push %edx # preserve return val |
296 |
|
297 |
call ctxsw_wtou # unthunk |
298 |
|
299 |
pop %edx # restore return val |
300 |
pop %eax # restore return val |
301 |
ret |
302 |
|
303 |
/* |
304 |
* Ugly hack alert: |
305 |
* |
306 |
* On Win32/i386, using __try/__except results in code that tries to |
307 |
* manipulate what's supposed to be the Windows Threada Environment |
308 |
* Block (TEB), which one accesses via the %fs register. In particular, |
309 |
* %fs:0 (the first DWORD in the TEB) points to the exception |
310 |
* registration list. Unfortunately, FreeBSD uses %fs for the |
311 |
* per-cpu data structure (pcpu), and we can't allow Windows code |
312 |
* to muck with that. I don't even know what Solaris uses %fs for |
313 |
* (or if it even uses it at all). |
314 |
* |
315 |
* Even worse, in 32-bit protected mode, %fs is a selector that |
316 |
* refers to an entry in either the GDT or the LDT. Ideally, we would |
317 |
* like to be able to temporarily point it at another descriptor |
318 |
* while Windows code executes, but to do that we need a separate |
319 |
* descriptor entry of our own to play with. |
320 |
* |
321 |
* Therefore, we go to some trouble to learn the existing layout of |
322 |
* the GDT and update it to include an extra entry that we can use. |
323 |
* We need the following utility routines to help us do that. On |
324 |
* FreeBSD, index #7 in the GDT happens to be unused, so we turn |
325 |
* this into our own data segment descriptor. It would be better |
326 |
* if we could use a private LDT entry, but there's no easy way to |
327 |
* do that in SMP mode because of the way FreeBSD handles user LDTs. |
328 |
* |
329 |
* Once we have a custom descriptor, we have to thunk/unthunk whenever |
330 |
* we cross between FreeBSD code and Windows code. The thunking is |
331 |
* based on the premise that when executing instructions in the |
332 |
* Windows binary itself, we won't go to sleep. This is because in |
333 |
* order to yield the CPU, the code has to call back out to a FreeBSD |
334 |
* routine first, and when that happens we can unthunk in order to |
335 |
* restore FreeBSD context. What we're desperately trying to avoid is |
336 |
* being involuntarily pre-empted with the %fs register still pointing |
337 |
* to our fake TIB: if FreeBSD code runs with %fs pointing at our |
338 |
* Windows TIB instead of pcpu, we'll panic the kernel. Fortunately, |
339 |
* the only way involuntary preemption can occur is if an interrupt |
340 |
* fires, and the trap handler saves/restores %fs for us. |
341 |
* |
342 |
* The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX |
343 |
* to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are |
344 |
* external to this module. This is done simply because it's easier |
345 |
* to manipulate data structures in C rather than assembly. |
346 |
*/ |
347 |
|
348 |
ENTRY(x86_getldt) |
349 |
movl 4(%esp),%eax |
350 |
sgdtl (%eax) |
351 |
movl 8(%esp),%eax |
352 |
sldt (%eax) |
353 |
xor %eax,%eax |
354 |
ret |
355 |
|
356 |
ENTRY(x86_setldt) |
357 |
movl 4(%esp),%eax |
358 |
lgdt (%eax) |
359 |
jmp 1f |
360 |
nop |
361 |
1: |
362 |
movl 8(%esp),%eax |
363 |
lldt %ax |
364 |
xor %eax,%eax |
365 |
ret |
366 |
|
367 |
ENTRY(x86_getfs) |
368 |
mov %fs,%ax |
369 |
ret |
370 |
|
371 |
ENTRY(x86_setfs) |
372 |
mov 4(%esp),%fs |
373 |
ret |
374 |
|
375 |
ENTRY(x86_gettid) |
376 |
mov %fs:12,%eax |
377 |
ret |
378 |
|
379 |
ENTRY(x86_critical_enter) |
380 |
cli |
381 |
ret |
382 |
|
383 |
ENTRY(x86_critical_exit) |
384 |
sti |
385 |
ret |