1 /*-
2 * Copyright (c) 2005
3 * Bill Paul <wpaul@windriver.com>. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $FreeBSD: releng/6.0/sys/compat/ndis/winx32_wrap.S 151742 2005-10-27 17:08:57Z wpaul $
33 */
34
35 #include <machine/asmacros.h>
36
37 /*
38 * This file contains assembly language wrappers for the different
39 * calling conventions supported by Windows on the i386 architecture.
40 * In FreeBSD, the whole OS typically use same C calling convention
41 * everywhere, namely _cdecl. Windows, on the other hand, uses several
42 * different C calling conventions depending on the circumstances:
43 *
44 * _stdcall: Used for most ordinary Windows APIs. With _stdcall,
45 * arguments are passed on the stack, and the callee unwinds the stack
46 * before returning control to the caller. Not suitable for variadic
47 * functions.
48 *
49 * _fastcall: Used for some APIs that may be invoked frequently and
50 * where speed is a critical factor (e.g. KeAcquireSpinLock() and
51 * KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit
52 * or smaller arguments are passed in the %ecx and %edx registers
53 * instead of on the stack. Not suitable for variadic functions.
54 *
55 * _cdecl: Used for standard C library routines and for variadic
56 * functions.
57 *
58 * _regparm(3): Used for certain assembly routines. All arguments
59 * passed in %eax, %ecx and %edx.
60 *
61 * Furthermore, there is an additional wrinkle that's not obvious
62 * with all code: Microsoft supports the use of exceptions in C
63 * (__try/__except) both in user _and_ kernel mode. Sadly, Windows
64 * structured exception handling uses machine-specific features
65 * that conflict rather badly with FreeBSD. (See utility routines
66 * at the end of this module for more details.)
67 *
68 * We want to support these calling conventions in as portable a manner
69 * as possible. The trick is doing it not only with different versions
70 * of GNU C, but with compilers other than GNU C (e.g. the Solaris
71 * SunOne C compiler). The only sure fire method is with assembly
72 * language trampoline code which both fixes up the argument passing,
73 * stack unwinding and exception/thread context all at once.
74 *
75 * You'll notice that we call the thunk/unthunk routines in the
76 * *_wrap() functions in an awkward way. Rather than branching
77 * directly to the address, we load the address into a register
78 * first as a literal value, then we branch to it. This is done
79 * to insure that the assembler doesn't translate the branch into
80 * a relative branch. We use the *_wrap() routines here as templates
81 * and create the actual trampolines at run time, at which point
82 * we only know the absolute addresses of the thunk and unthunk
83 * routines. So we need to make sure the templates have enough
84 * room in them for the full address.
85 *
86 * Also note that when we call the a thunk/unthunk routine after
87 * invoking a wrapped function, we have to make sure to preserve
88 * the value returned from that function. Most functions return
89 * a 32-bit value in %eax, however some routines return 64-bit
90 * values, which span both %eax and %edx. Consequently, we have
91 * to preserve both registers.
92 */
93
94 /*
95 * Handle _stdcall going from Windows to UNIX.
96 * This is frustrating, because to do it right you have to
97 * know how many arguments the called function takes, and there's
98 * no way to figure this out on the fly: you just have to be told
99 * ahead of time. We assume there will be 16 arguments. I don't
100 * think there are any Windows APIs that require this many.
101 */
102
103 .globl x86_stdcall_wrap_call
104 .globl x86_stdcall_wrap_arg
105 .globl x86_stdcall_wrap_end
106
107 ENTRY(x86_stdcall_wrap)
108 push %esi
109 push %edi
110 sub $64,%esp
111 mov %esp,%esi
112 add $64+8+4,%esi
113 mov %esp,%edi
114 mov $16,%ecx # handle up to 16 args
115 rep
116 movsl
117
118 movl $ctxsw_wtou, %eax
119 call *%eax # unthunk
120
121 x86_stdcall_wrap_call:
122 movl $0,%eax
123 call *%eax # jump to routine
124 push %eax # preserve return val
125 push %edx
126
127 movl $ctxsw_utow, %eax
128 call *%eax # thunk
129
130 pop %edx
131 pop %eax # restore return val
132
133 add $64,%esp # clean the stack
134 pop %edi
135 pop %esi
136 x86_stdcall_wrap_arg:
137 ret $0xFF
138 x86_stdcall_wrap_end:
139
140
141 /*
142 * Handle _stdcall going from UNIX to Windows. This routine
143 * expects to be passed the function to be called, number of
144 * args and the arguments for the Windows function on the stack.
145 */
146
147 ENTRY(x86_stdcall_call)
148 push %esi # must preserve %esi
149 push %edi # and %edi
150
151 mov 16(%esp),%eax # get arg cnt
152 mov %eax,%ecx # save as copy count
153 mov %esp,%esi # Set source address register to point to
154 add $20,%esi # first agument to be forwarded.
155 shl $2,%eax # turn arg cnt into offset
156 sub %eax,%esp # shift stack to new location
157 mov %esp,%edi # store dest copy addr
158 rep # do the copy
159 movsl
160
161 call ctxsw_utow # thunk
162
163 call *12(%edi) # branch to stdcall routine
164 push %eax # preserve return val
165 push %edx
166
167 call ctxsw_wtou # unthunk
168
169 pop %edx
170 pop %eax # restore return val
171 mov %edi,%esp # restore stack
172 pop %edi # restore %edi
173 pop %esi # and %esi
174 ret
175
176 /*
177 * Fastcall support. Similar to _stdcall, except the first
178 * two arguments are passed in %ecx and %edx. It happens we
179 * only support a small number of _fastcall APIs, none of them
180 * take more than three arguments. So to keep the code size
181 * and complexity down, we only handle 3 arguments here.
182 */
183
184 /* Call _fastcall function going from Windows to UNIX. */
185
186 .globl x86_fastcall_wrap_call
187 .globl x86_fastcall_wrap_arg
188 .globl x86_fastcall_wrap_end
189
190 ENTRY(x86_fastcall_wrap)
191 mov 4(%esp),%eax
192 push %eax
193 push %edx
194 push %ecx
195
196 movl $ctxsw_wtou, %eax
197 call *%eax # unthunk
198
199 x86_fastcall_wrap_call:
200 mov $0,%eax
201 call *%eax # branch to fastcall routine
202 push %eax # preserve return val
203 push %edx
204
205 movl $ctxsw_utow, %eax
206 call *%eax # thunk
207
208 pop %edx
209 pop %eax # restore return val
210 add $12,%esp # clean the stack
211 x86_fastcall_wrap_arg:
212 ret $0xFF
213 x86_fastcall_wrap_end:
214
215 /*
216 * Call _fastcall function going from UNIX to Windows.
217 * This routine isn't normally used since NDIS miniport drivers
218 * only have _stdcall entry points, but it's provided anyway
219 * to round out the API, and for testing purposes.
220 */
221
222 ENTRY(x86_fastcall_call)
223 mov 4(%esp),%eax
224 push 16(%esp)
225
226 call ctxsw_utow # thunk
227
228 mov 12(%esp),%ecx
229 mov 16(%esp),%edx
230 call *8(%esp) # branch to fastcall routine
231 push %eax # preserve return val
232 push %edx
233
234 call ctxsw_wtou # unthunk
235
236 pop %edx
237 pop %eax # restore return val
238 add $4,%esp # clean the stack
239 ret
240
241 /*
242 * Call regparm(3) function going from Windows to UNIX. Arguments
243 * are passed in %eax, %edx and %ecx. Note that while additional
244 * arguments are passed on the stack, we never bother when them,
245 * since the only regparm(3) routines we need to wrap never take
246 * more than 3 arguments.
247 */
248
249 .globl x86_regparm_wrap_call
250 .globl x86_regparm_wrap_end
251
252 ENTRY(x86_regparm_wrap)
253 push %ecx
254 push %edx
255 push %eax
256
257 movl $ctxsw_wtou, %eax
258 call *%eax # unthunk
259
260 x86_regparm_wrap_call:
261 movl $0,%eax
262 call *%eax # jump to routine
263 push %eax # preserve return val
264 push %edx # preserve return val
265
266 movl $ctxsw_utow, %eax
267 call *%eax # thunk
268
269 pop %edx # restore return val
270 pop %eax # restore return val
271 add $12,%esp # restore stack
272 ret
273 x86_regparm_wrap_end:
274
275 /*
276 * Call regparm(3) function going from UNIX to Windows.
277 * This routine isn't normally used since NDIS miniport drivers
278 * only have _stdcall entry points, but it's provided anyway
279 * to round out the API, and for testing purposes.
280 */
281
282 ENTRY(x86_regparm_call)
283 call ctxsw_utow # thunk
284
285 mov 8(%esp),%eax
286 mov 12(%esp),%edx
287 mov 16(%esp),%ecx
288 call *4(%esp) # branch to fastcall routine
289 push %eax # preserve return val
290 push %edx # preserve return val
291
292 call ctxsw_wtou # unthunk
293
294 pop %edx # restore return val
295 pop %eax # restore return val
296 ret
297
298 /*
299 * Ugly hack alert:
300 *
301 * On Win32/i386, using __try/__except results in code that tries to
302 * manipulate what's supposed to be the Windows Threada Environment
303 * Block (TEB), which one accesses via the %fs register. In particular,
304 * %fs:0 (the first DWORD in the TEB) points to the exception
305 * registration list. Unfortunately, FreeBSD uses %fs for the
306 * per-cpu data structure (pcpu), and we can't allow Windows code
307 * to muck with that. I don't even know what Solaris uses %fs for
308 * (or if it even uses it at all).
309 *
310 * Even worse, in 32-bit protected mode, %fs is a selector that
311 * refers to an entry in either the GDT or the LDT. Ideally, we would
312 * like to be able to temporarily point it at another descriptor
313 * while Windows code executes, but to do that we need a separate
314 * descriptor entry of our own to play with.
315 *
316 * Therefore, we go to some trouble to learn the existing layout of
317 * the GDT and update it to include an extra entry that we can use.
318 * We need the following utility routines to help us do that. On
319 * FreeBSD, index #7 in the GDT happens to be unused, so we turn
320 * this into our own data segment descriptor. It would be better
321 * if we could use a private LDT entry, but there's no easy way to
322 * do that in SMP mode because of the way FreeBSD handles user LDTs.
323 *
324 * Once we have a custom descriptor, we have to thunk/unthunk whenever
325 * we cross between FreeBSD code and Windows code. The thunking is
326 * based on the premise that when executing instructions in the
327 * Windows binary itself, we won't go to sleep. This is because in
328 * order to yield the CPU, the code has to call back out to a FreeBSD
329 * routine first, and when that happens we can unthunk in order to
330 * restore FreeBSD context. What we're desperately trying to avoid is
331 * being involuntarily pre-empted with the %fs register still pointing
332 * to our fake TIB: if FreeBSD code runs with %fs pointing at our
333 * Windows TIB instead of pcpu, we'll panic the kernel. Fortunately,
334 * the only way involuntary preemption can occur is if an interrupt
335 * fires, and the trap handler saves/restores %fs for us.
336 *
337 * The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX
338 * to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are
339 * external to this module. This is done simply because it's easier
340 * to manipulate data structures in C rather than assembly.
341 */
342
343 ENTRY(x86_getldt)
344 movl 4(%esp),%eax
345 sgdtl (%eax)
346 movl 8(%esp),%eax
347 sldt (%eax)
348 xor %eax,%eax
349 ret
350
351 ENTRY(x86_setldt)
352 movl 4(%esp),%eax
353 lgdt (%eax)
354 jmp 1f
355 nop
356 1:
357 movl 8(%esp),%eax
358 lldt %ax
359 xor %eax,%eax
360 ret
361
362 ENTRY(x86_getfs)
363 mov %fs,%ax
364 ret
365
366 ENTRY(x86_setfs)
367 movl 4(%esp),%fs
368 ret
369
370 ENTRY(x86_gettid)
371 mov %fs:12,%eax
372 ret
Cache object: a175b08dd1831f025743eff2eadff559
|