1 /*-
2 * Copyright (c) 2005
3 * Bill Paul <wpaul@windriver.com>. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $FreeBSD: releng/8.0/sys/compat/ndis/winx32_wrap.S 187948 2009-01-31 11:37:21Z obrien $
33 */
34
35 /* The 'ret' macro doesn't work in this file if GPROF is enabled. */
36 #ifdef GPROF
37 #undef GPROF
38 #endif
39
40 #include <machine/asmacros.h>
41
42 /*
43 * This file contains assembly language wrappers for the different
44 * calling conventions supported by Windows on the i386 architecture.
45 * In FreeBSD, the whole OS typically use same C calling convention
46 * everywhere, namely _cdecl. Windows, on the other hand, uses several
47 * different C calling conventions depending on the circumstances:
48 *
49 * _stdcall: Used for most ordinary Windows APIs. With _stdcall,
50 * arguments are passed on the stack, and the callee unwinds the stack
51 * before returning control to the caller. Not suitable for variadic
52 * functions.
53 *
54 * _fastcall: Used for some APIs that may be invoked frequently and
55 * where speed is a critical factor (e.g. KeAcquireSpinLock() and
56 * KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit
57 * or smaller arguments are passed in the %ecx and %edx registers
58 * instead of on the stack. Not suitable for variadic functions.
59 *
60 * _cdecl: Used for standard C library routines and for variadic
61 * functions.
62 *
63 * _regparm(3): Used for certain assembly routines. All arguments
64 * passed in %eax, %ecx and %edx.
65 *
66 * Furthermore, there is an additional wrinkle that's not obvious
67 * with all code: Microsoft supports the use of exceptions in C
68 * (__try/__except) both in user _and_ kernel mode. Sadly, Windows
69 * structured exception handling uses machine-specific features
70 * that conflict rather badly with FreeBSD. (See utility routines
71 * at the end of this module for more details.)
72 *
73 * We want to support these calling conventions in as portable a manner
74 * as possible. The trick is doing it not only with different versions
75 * of GNU C, but with compilers other than GNU C (e.g. the Solaris
76 * SunOne C compiler). The only sure fire method is with assembly
77 * language trampoline code which both fixes up the argument passing,
78 * stack unwinding and exception/thread context all at once.
79 *
80 * You'll notice that we call the thunk/unthunk routines in the
81 * *_wrap() functions in an awkward way. Rather than branching
82 * directly to the address, we load the address into a register
83 * first as a literal value, then we branch to it. This is done
84 * to insure that the assembler doesn't translate the branch into
85 * a relative branch. We use the *_wrap() routines here as templates
86 * and create the actual trampolines at run time, at which point
87 * we only know the absolute addresses of the thunk and unthunk
88 * routines. So we need to make sure the templates have enough
89 * room in them for the full address.
90 *
91 * Also note that when we call the a thunk/unthunk routine after
92 * invoking a wrapped function, we have to make sure to preserve
93 * the value returned from that function. Most functions return
94 * a 32-bit value in %eax, however some routines return 64-bit
95 * values, which span both %eax and %edx. Consequently, we have
96 * to preserve both registers.
97 */
98
99 /*
100 * Handle _stdcall going from Windows to UNIX.
101 * This is frustrating, because to do it right you have to
102 * know how many arguments the called function takes, and there's
103 * no way to figure this out on the fly: you just have to be told
104 * ahead of time. We assume there will be 16 arguments. I don't
105 * think there are any Windows APIs that require this many.
106 */
107
108 .globl x86_stdcall_wrap_call
109 .globl x86_stdcall_wrap_arg
110 .globl x86_stdcall_wrap_end
111
112 ENTRY(x86_stdcall_wrap)
113 push %esi
114 push %edi
115 sub $64,%esp
116 mov %esp,%esi
117 add $64+8+4,%esi
118 mov %esp,%edi
119 mov $16,%ecx # handle up to 16 args
120 rep
121 movsl
122
123 movl $ctxsw_wtou, %eax
124 call *%eax # unthunk
125
126 x86_stdcall_wrap_call:
127 movl $0,%eax
128 call *%eax # jump to routine
129 push %eax # preserve return val
130 push %edx
131
132 movl $ctxsw_utow, %eax
133 call *%eax # thunk
134
135 pop %edx
136 pop %eax # restore return val
137
138 add $64,%esp # clean the stack
139 pop %edi
140 pop %esi
141 x86_stdcall_wrap_arg:
142 ret $0xFF
143 x86_stdcall_wrap_end:
144
145
146 /*
147 * Handle _stdcall going from UNIX to Windows. This routine
148 * expects to be passed the function to be called, number of
149 * args and the arguments for the Windows function on the stack.
150 */
151
152 ENTRY(x86_stdcall_call)
153 push %esi # must preserve %esi
154 push %edi # and %edi
155
156 mov 16(%esp),%eax # get arg cnt
157 mov %eax,%ecx # save as copy count
158 mov %esp,%esi # Set source address register to point to
159 add $20,%esi # first agument to be forwarded.
160 shl $2,%eax # turn arg cnt into offset
161 sub %eax,%esp # shift stack to new location
162 mov %esp,%edi # store dest copy addr
163 rep # do the copy
164 movsl
165
166 call ctxsw_utow # thunk
167
168 call *12(%edi) # branch to stdcall routine
169 push %eax # preserve return val
170 push %edx
171
172 call ctxsw_wtou # unthunk
173
174 pop %edx
175 pop %eax # restore return val
176 mov %edi,%esp # restore stack
177 pop %edi # restore %edi
178 pop %esi # and %esi
179 ret
180
181 /*
182 * Fastcall support. Similar to _stdcall, except the first
183 * two arguments are passed in %ecx and %edx. It happens we
184 * only support a small number of _fastcall APIs, none of them
185 * take more than three arguments. So to keep the code size
186 * and complexity down, we only handle 3 arguments here.
187 */
188
189 /* Call _fastcall function going from Windows to UNIX. */
190
191 .globl x86_fastcall_wrap_call
192 .globl x86_fastcall_wrap_arg
193 .globl x86_fastcall_wrap_end
194
195 ENTRY(x86_fastcall_wrap)
196 mov 4(%esp),%eax
197 push %eax
198 push %edx
199 push %ecx
200
201 movl $ctxsw_wtou, %eax
202 call *%eax # unthunk
203
204 x86_fastcall_wrap_call:
205 mov $0,%eax
206 call *%eax # branch to fastcall routine
207 push %eax # preserve return val
208 push %edx
209
210 movl $ctxsw_utow, %eax
211 call *%eax # thunk
212
213 pop %edx
214 pop %eax # restore return val
215 add $12,%esp # clean the stack
216 x86_fastcall_wrap_arg:
217 ret $0xFF
218 x86_fastcall_wrap_end:
219
220 /*
221 * Call _fastcall function going from UNIX to Windows.
222 * This routine isn't normally used since NDIS miniport drivers
223 * only have _stdcall entry points, but it's provided anyway
224 * to round out the API, and for testing purposes.
225 */
226
227 ENTRY(x86_fastcall_call)
228 mov 4(%esp),%eax
229 push 16(%esp)
230
231 call ctxsw_utow # thunk
232
233 mov 12(%esp),%ecx
234 mov 16(%esp),%edx
235 call *8(%esp) # branch to fastcall routine
236 push %eax # preserve return val
237 push %edx
238
239 call ctxsw_wtou # unthunk
240
241 pop %edx
242 pop %eax # restore return val
243 add $4,%esp # clean the stack
244 ret
245
246 /*
247 * Call regparm(3) function going from Windows to UNIX. Arguments
248 * are passed in %eax, %edx and %ecx. Note that while additional
249 * arguments are passed on the stack, we never bother when them,
250 * since the only regparm(3) routines we need to wrap never take
251 * more than 3 arguments.
252 */
253
254 .globl x86_regparm_wrap_call
255 .globl x86_regparm_wrap_end
256
257 ENTRY(x86_regparm_wrap)
258 push %ecx
259 push %edx
260 push %eax
261
262 movl $ctxsw_wtou, %eax
263 call *%eax # unthunk
264
265 x86_regparm_wrap_call:
266 movl $0,%eax
267 call *%eax # jump to routine
268 push %eax # preserve return val
269 push %edx # preserve return val
270
271 movl $ctxsw_utow, %eax
272 call *%eax # thunk
273
274 pop %edx # restore return val
275 pop %eax # restore return val
276 add $12,%esp # restore stack
277 ret
278 x86_regparm_wrap_end:
279
280 /*
281 * Call regparm(3) function going from UNIX to Windows.
282 * This routine isn't normally used since NDIS miniport drivers
283 * only have _stdcall entry points, but it's provided anyway
284 * to round out the API, and for testing purposes.
285 */
286
287 ENTRY(x86_regparm_call)
288 call ctxsw_utow # thunk
289
290 mov 8(%esp),%eax
291 mov 12(%esp),%edx
292 mov 16(%esp),%ecx
293 call *4(%esp) # branch to fastcall routine
294 push %eax # preserve return val
295 push %edx # preserve return val
296
297 call ctxsw_wtou # unthunk
298
299 pop %edx # restore return val
300 pop %eax # restore return val
301 ret
302
303 /*
304 * Ugly hack alert:
305 *
306 * On Win32/i386, using __try/__except results in code that tries to
307 * manipulate what's supposed to be the Windows Threada Environment
308 * Block (TEB), which one accesses via the %fs register. In particular,
309 * %fs:0 (the first DWORD in the TEB) points to the exception
310 * registration list. Unfortunately, FreeBSD uses %fs for the
311 * per-cpu data structure (pcpu), and we can't allow Windows code
312 * to muck with that. I don't even know what Solaris uses %fs for
313 * (or if it even uses it at all).
314 *
315 * Even worse, in 32-bit protected mode, %fs is a selector that
316 * refers to an entry in either the GDT or the LDT. Ideally, we would
317 * like to be able to temporarily point it at another descriptor
318 * while Windows code executes, but to do that we need a separate
319 * descriptor entry of our own to play with.
320 *
321 * Therefore, we go to some trouble to learn the existing layout of
322 * the GDT and update it to include an extra entry that we can use.
323 * We need the following utility routines to help us do that. On
324 * FreeBSD, index #7 in the GDT happens to be unused, so we turn
325 * this into our own data segment descriptor. It would be better
326 * if we could use a private LDT entry, but there's no easy way to
327 * do that in SMP mode because of the way FreeBSD handles user LDTs.
328 *
329 * Once we have a custom descriptor, we have to thunk/unthunk whenever
330 * we cross between FreeBSD code and Windows code. The thunking is
331 * based on the premise that when executing instructions in the
332 * Windows binary itself, we won't go to sleep. This is because in
333 * order to yield the CPU, the code has to call back out to a FreeBSD
334 * routine first, and when that happens we can unthunk in order to
335 * restore FreeBSD context. What we're desperately trying to avoid is
336 * being involuntarily pre-empted with the %fs register still pointing
337 * to our fake TIB: if FreeBSD code runs with %fs pointing at our
338 * Windows TIB instead of pcpu, we'll panic the kernel. Fortunately,
339 * the only way involuntary preemption can occur is if an interrupt
340 * fires, and the trap handler saves/restores %fs for us.
341 *
342 * The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX
343 * to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are
344 * external to this module. This is done simply because it's easier
345 * to manipulate data structures in C rather than assembly.
346 */
347
348 ENTRY(x86_getldt)
349 movl 4(%esp),%eax
350 sgdtl (%eax)
351 movl 8(%esp),%eax
352 sldt (%eax)
353 xor %eax,%eax
354 ret
355
356 ENTRY(x86_setldt)
357 movl 4(%esp),%eax
358 lgdt (%eax)
359 jmp 1f
360 nop
361 1:
362 movl 8(%esp),%eax
363 lldt %ax
364 xor %eax,%eax
365 ret
366
367 ENTRY(x86_getfs)
368 mov %fs,%ax
369 ret
370
371 ENTRY(x86_setfs)
372 mov 4(%esp),%fs
373 ret
374
375 ENTRY(x86_gettid)
376 mov %fs:12,%eax
377 ret
378
379 ENTRY(x86_critical_enter)
380 cli
381 ret
382
383 ENTRY(x86_critical_exit)
384 sti
385 ret
Cache object: 5111ca4c82f1aa3f4c9b1f4cffb6e6b6
|