1 /*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: src/sys/i386/i386/support.s,v 1.41.2.8 1999/09/05 08:11:16 peter Exp $
34 */
35
36 #include "npx.h"
37 #include "opt_cpu.h"
38
39 #include <machine/asmacros.h>
40 #include <machine/cputypes.h>
41 #include <machine/pmap.h>
42 #include <machine/specialreg.h>
43
44 #include "assym.s"
45
46 #define KDSEL 0x10 /* kernel data selector */
47 #define IDXSHIFT 10
48
49 .data
50 .globl _bcopy_vector
51 _bcopy_vector:
52 .long _generic_bcopy
53 .globl _bzero
54 _bzero:
55 .long _generic_bzero
56 .globl _copyin_vector
57 _copyin_vector:
58 .long _generic_copyin
59 .globl _copyout_vector
60 _copyout_vector:
61 .long _generic_copyout
62 .globl _ovbcopy_vector
63 _ovbcopy_vector:
64 .long _generic_bcopy
65 #if defined(I586_CPU) && NNPX > 0
66 kernel_fpu_lock:
67 .byte 0xfe
68 .space 3
69 #endif
70
71 .text
72
73 /*
74 * bcopy family
75 * void bzero(void *buf, u_int len)
76 */
77
78 ENTRY(generic_bzero)
79 pushl %edi
80 movl 8(%esp),%edi
81 movl 12(%esp),%ecx
82 xorl %eax,%eax
83 shrl $2,%ecx
84 cld
85 rep
86 stosl
87 movl 12(%esp),%ecx
88 andl $3,%ecx
89 rep
90 stosb
91 popl %edi
92 ret
93
94 #if defined(I486_CPU)
95 ENTRY(i486_bzero)
96 movl 4(%esp),%edx
97 movl 8(%esp),%ecx
98 xorl %eax,%eax
99 /*
100 * do 64 byte chunks first
101 *
102 * XXX this is probably over-unrolled at least for DX2's
103 */
104 2:
105 cmpl $64,%ecx
106 jb 3f
107 movl %eax,(%edx)
108 movl %eax,4(%edx)
109 movl %eax,8(%edx)
110 movl %eax,12(%edx)
111 movl %eax,16(%edx)
112 movl %eax,20(%edx)
113 movl %eax,24(%edx)
114 movl %eax,28(%edx)
115 movl %eax,32(%edx)
116 movl %eax,36(%edx)
117 movl %eax,40(%edx)
118 movl %eax,44(%edx)
119 movl %eax,48(%edx)
120 movl %eax,52(%edx)
121 movl %eax,56(%edx)
122 movl %eax,60(%edx)
123 addl $64,%edx
124 subl $64,%ecx
125 jnz 2b
126 ret
127
128 /*
129 * do 16 byte chunks
130 */
131 SUPERALIGN_TEXT
132 3:
133 cmpl $16,%ecx
134 jb 4f
135 movl %eax,(%edx)
136 movl %eax,4(%edx)
137 movl %eax,8(%edx)
138 movl %eax,12(%edx)
139 addl $16,%edx
140 subl $16,%ecx
141 jnz 3b
142 ret
143
144 /*
145 * do 4 byte chunks
146 */
147 SUPERALIGN_TEXT
148 4:
149 cmpl $4,%ecx
150 jb 5f
151 movl %eax,(%edx)
152 addl $4,%edx
153 subl $4,%ecx
154 jnz 4b
155 ret
156
157 /*
158 * do 1 byte chunks
159 * a jump table seems to be faster than a loop or more range reductions
160 *
161 * XXX need a const section for non-text
162 */
163 .data
164 jtab:
165 .long do0
166 .long do1
167 .long do2
168 .long do3
169
170 .text
171 SUPERALIGN_TEXT
172 5:
173 jmp jtab(,%ecx,4)
174
175 SUPERALIGN_TEXT
176 do3:
177 movw %ax,(%edx)
178 movb %al,2(%edx)
179 ret
180
181 SUPERALIGN_TEXT
182 do2:
183 movw %ax,(%edx)
184 ret
185
186 SUPERALIGN_TEXT
187 do1:
188 movb %al,(%edx)
189 ret
190
191 SUPERALIGN_TEXT
192 do0:
193 ret
194 #endif
195
196 #if defined(I586_CPU) && NNPX > 0
197 ENTRY(i586_bzero)
198 movl 4(%esp),%edx
199 movl 8(%esp),%ecx
200
201 /*
202 * The FPU register method is twice as fast as the integer register
203 * method unless the target is in the L1 cache and we pre-allocate a
204 * cache line for it (then the integer register method is 4-5 times
205 * faster). However, we never pre-allocate cache lines, since that
206 * would make the integer method 25% or more slower for the common
207 * case when the target isn't in either the L1 cache or the L2 cache.
208 * Thus we normally use the FPU register method unless the overhead
209 * would be too large.
210 */
211 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */
212 jb intreg_i586_bzero
213
214 /*
215 * The FPU registers may belong to an application or to fastmove()
216 * or to another invocation of bcopy() or ourself in a higher level
217 * interrupt or trap handler. Preserving the registers is
218 * complicated since we avoid it if possible at all levels. We
219 * want to localize the complications even when that increases them.
220 * Here the extra work involves preserving CR0_TS in TS.
221 * `npxproc != NULL' is supposed to be the condition that all the
222 * FPU resources belong to an application, but npxproc and CR0_TS
223 * aren't set atomically enough for this condition to work in
224 * interrupt handlers.
225 *
226 * Case 1: FPU registers belong to the application: we must preserve
227 * the registers if we use them, so we only use the FPU register
228 * method if the target size is large enough to amortize the extra
229 * overhead for preserving them. CR0_TS must be preserved although
230 * it is very likely to end up as set.
231 *
232 * Case 2: FPU registers belong to fastmove(): fastmove() currently
233 * makes the registers look like they belong to an application so
234 * that cpu_switch() and savectx() don't have to know about it, so
235 * this case reduces to case 1.
236 *
237 * Case 3: FPU registers belong to the kernel: don't use the FPU
238 * register method. This case is unlikely, and supporting it would
239 * be more complicated and might take too much stack.
240 *
241 * Case 4: FPU registers don't belong to anyone: the FPU registers
242 * don't need to be preserved, so we always use the FPU register
243 * method. CR0_TS must be preserved although it is very likely to
244 * always end up as clear.
245 */
246 cmpl $0,_npxproc
247 je i586_bz1
248 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */
249 jb intreg_i586_bzero
250 sarb $1,kernel_fpu_lock
251 jc intreg_i586_bzero
252 smsw %ax
253 clts
254 subl $108,%esp
255 fnsave 0(%esp)
256 jmp i586_bz2
257
258 i586_bz1:
259 sarb $1,kernel_fpu_lock
260 jc intreg_i586_bzero
261 smsw %ax
262 clts
263 fninit /* XXX should avoid needing this */
264 i586_bz2:
265 fldz
266
267 /*
268 * Align to an 8 byte boundary (misalignment in the main loop would
269 * cost a factor of >= 2). Avoid jumps (at little cost if it is
270 * already aligned) by always zeroing 8 bytes and using the part up
271 * to the _next_ alignment position.
272 */
273 fstl 0(%edx)
274 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */
275 addl $8,%edx
276 andl $~7,%edx
277 subl %edx,%ecx
278
279 /*
280 * Similarly align `len' to a multiple of 8.
281 */
282 fstl -8(%edx,%ecx)
283 decl %ecx
284 andl $~7,%ecx
285
286 /*
287 * This wouldn't be any faster if it were unrolled, since the loop
288 * control instructions are much faster than the fstl and/or done
289 * in parallel with it so their overhead is insignificant.
290 */
291 fpureg_i586_bzero_loop:
292 fstl 0(%edx)
293 addl $8,%edx
294 subl $8,%ecx
295 cmpl $8,%ecx
296 jae fpureg_i586_bzero_loop
297
298 cmpl $0,_npxproc
299 je i586_bz3
300 frstor 0(%esp)
301 addl $108,%esp
302 lmsw %ax
303 movb $0xfe,kernel_fpu_lock
304 ret
305
306 i586_bz3:
307 fstpl %st(0)
308 lmsw %ax
309 movb $0xfe,kernel_fpu_lock
310 ret
311
312 intreg_i586_bzero:
313 /*
314 * `rep stos' seems to be the best method in practice for small
315 * counts. Fancy methods usually take too long to start up due
316 * to cache and BTB misses.
317 */
318 pushl %edi
319 movl %edx,%edi
320 xorl %eax,%eax
321 shrl $2,%ecx
322 cld
323 rep
324 stosl
325 movl 12(%esp),%ecx
326 andl $3,%ecx
327 jne 1f
328 popl %edi
329 ret
330
331 1:
332 rep
333 stosb
334 popl %edi
335 ret
336 #endif /* I586_CPU && NNPX > 0 */
337
338 /* fillw(pat, base, cnt) */
339 ENTRY(fillw)
340 pushl %edi
341 movl 8(%esp),%eax
342 movl 12(%esp),%edi
343 movl 16(%esp),%ecx
344 cld
345 rep
346 stosw
347 popl %edi
348 ret
349
350 ENTRY(bcopyb)
351 bcopyb:
352 pushl %esi
353 pushl %edi
354 movl 12(%esp),%esi
355 movl 16(%esp),%edi
356 movl 20(%esp),%ecx
357 movl %edi,%eax
358 subl %esi,%eax
359 cmpl %ecx,%eax /* overlapping && src < dst? */
360 jb 1f
361 cld /* nope, copy forwards */
362 rep
363 movsb
364 popl %edi
365 popl %esi
366 ret
367
368 ALIGN_TEXT
369 1:
370 addl %ecx,%edi /* copy backwards. */
371 addl %ecx,%esi
372 decl %edi
373 decl %esi
374 std
375 rep
376 movsb
377 popl %edi
378 popl %esi
379 cld
380 ret
381
382 ENTRY(bcopy)
383 MEXITCOUNT
384 jmp *_bcopy_vector
385
386 ENTRY(ovbcopy)
387 MEXITCOUNT
388 jmp *_ovbcopy_vector
389
390 /*
391 * generic_bcopy(src, dst, cnt)
392 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
393 */
394 ENTRY(generic_bcopy)
395 pushl %esi
396 pushl %edi
397 movl 12(%esp),%esi
398 movl 16(%esp),%edi
399 movl 20(%esp),%ecx
400
401 movl %edi,%eax
402 subl %esi,%eax
403 cmpl %ecx,%eax /* overlapping && src < dst? */
404 jb 1f
405
406 shrl $2,%ecx /* copy by 32-bit words */
407 cld /* nope, copy forwards */
408 rep
409 movsl
410 movl 20(%esp),%ecx
411 andl $3,%ecx /* any bytes left? */
412 rep
413 movsb
414 popl %edi
415 popl %esi
416 ret
417
418 ALIGN_TEXT
419 1:
420 addl %ecx,%edi /* copy backwards */
421 addl %ecx,%esi
422 decl %edi
423 decl %esi
424 andl $3,%ecx /* any fractional bytes? */
425 std
426 rep
427 movsb
428 movl 20(%esp),%ecx /* copy remainder by 32-bit words */
429 shrl $2,%ecx
430 subl $3,%esi
431 subl $3,%edi
432 rep
433 movsl
434 popl %edi
435 popl %esi
436 cld
437 ret
438
439 #if defined(I586_CPU) && NNPX > 0
440 ENTRY(i586_bcopy)
441 pushl %esi
442 pushl %edi
443 movl 12(%esp),%esi
444 movl 16(%esp),%edi
445 movl 20(%esp),%ecx
446
447 movl %edi,%eax
448 subl %esi,%eax
449 cmpl %ecx,%eax /* overlapping && src < dst? */
450 jb 1f
451
452 cmpl $1024,%ecx
453 jb small_i586_bcopy
454
455 sarb $1,kernel_fpu_lock
456 jc small_i586_bcopy
457 cmpl $0,_npxproc
458 je i586_bc1
459 smsw %dx
460 clts
461 subl $108,%esp
462 fnsave 0(%esp)
463 jmp 4f
464
465 i586_bc1:
466 smsw %dx
467 clts
468 fninit /* XXX should avoid needing this */
469
470 ALIGN_TEXT
471 4:
472 pushl %ecx
473 #define DCACHE_SIZE 8192
474 cmpl $(DCACHE_SIZE-512)/2,%ecx
475 jbe 2f
476 movl $(DCACHE_SIZE-512)/2,%ecx
477 2:
478 subl %ecx,0(%esp)
479 cmpl $256,%ecx
480 jb 5f /* XXX should prefetch if %ecx >= 32 */
481 pushl %esi
482 pushl %ecx
483 ALIGN_TEXT
484 3:
485 movl 0(%esi),%eax
486 movl 32(%esi),%eax
487 movl 64(%esi),%eax
488 movl 96(%esi),%eax
489 movl 128(%esi),%eax
490 movl 160(%esi),%eax
491 movl 192(%esi),%eax
492 movl 224(%esi),%eax
493 addl $256,%esi
494 subl $256,%ecx
495 cmpl $256,%ecx
496 jae 3b
497 popl %ecx
498 popl %esi
499 5:
500 ALIGN_TEXT
501 large_i586_bcopy_loop:
502 fildq 0(%esi)
503 fildq 8(%esi)
504 fildq 16(%esi)
505 fildq 24(%esi)
506 fildq 32(%esi)
507 fildq 40(%esi)
508 fildq 48(%esi)
509 fildq 56(%esi)
510 fistpq 56(%edi)
511 fistpq 48(%edi)
512 fistpq 40(%edi)
513 fistpq 32(%edi)
514 fistpq 24(%edi)
515 fistpq 16(%edi)
516 fistpq 8(%edi)
517 fistpq 0(%edi)
518 addl $64,%esi
519 addl $64,%edi
520 subl $64,%ecx
521 cmpl $64,%ecx
522 jae large_i586_bcopy_loop
523 popl %eax
524 addl %eax,%ecx
525 cmpl $64,%ecx
526 jae 4b
527
528 cmpl $0,_npxproc
529 je i586_bc2
530 frstor 0(%esp)
531 addl $108,%esp
532 i586_bc2:
533 lmsw %dx
534 movb $0xfe,kernel_fpu_lock
535
536 /*
537 * This is a duplicate of the main part of generic_bcopy. See the comments
538 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and
539 * would mess up high resolution profiling.
540 */
541 ALIGN_TEXT
542 small_i586_bcopy:
543 shrl $2,%ecx
544 cld
545 rep
546 movsl
547 movl 20(%esp),%ecx
548 andl $3,%ecx
549 rep
550 movsb
551 popl %edi
552 popl %esi
553 ret
554
555 ALIGN_TEXT
556 1:
557 addl %ecx,%edi
558 addl %ecx,%esi
559 decl %edi
560 decl %esi
561 andl $3,%ecx
562 std
563 rep
564 movsb
565 movl 20(%esp),%ecx
566 shrl $2,%ecx
567 subl $3,%esi
568 subl $3,%edi
569 rep
570 movsl
571 popl %edi
572 popl %esi
573 cld
574 ret
575 #endif /* I586_CPU && NNPX > 0 */
576
577 /*
578 * Note: memcpy does not support overlapping copies
579 */
580 ENTRY(memcpy)
581 pushl %edi
582 pushl %esi
583 movl 12(%esp),%edi
584 movl 16(%esp),%esi
585 movl 20(%esp),%ecx
586 movl %edi,%eax
587 shrl $2,%ecx /* copy by 32-bit words */
588 cld /* nope, copy forwards */
589 rep
590 movsl
591 movl 20(%esp),%ecx
592 andl $3,%ecx /* any bytes left? */
593 rep
594 movsb
595 popl %esi
596 popl %edi
597 ret
598
599
600 /*****************************************************************************/
601 /* copyout and fubyte family */
602 /*****************************************************************************/
603 /*
604 * Access user memory from inside the kernel. These routines and possibly
605 * the math- and DOS emulators should be the only places that do this.
606 *
607 * We have to access the memory with user's permissions, so use a segment
608 * selector with RPL 3. For writes to user space we have to additionally
609 * check the PTE for write permission, because the 386 does not check
610 * write permissions when we are executing with EPL 0. The 486 does check
611 * this if the WP bit is set in CR0, so we can use a simpler version here.
612 *
613 * These routines set curpcb->onfault for the time they execute. When a
614 * protection violation occurs inside the functions, the trap handler
615 * returns to *curpcb->onfault instead of the function.
616 */
617
618 /* copyout(from_kernel, to_user, len) */
619 ENTRY(copyout)
620 MEXITCOUNT
621 jmp *_copyout_vector
622
623 ENTRY(generic_copyout)
624 movl _curpcb,%eax
625 movl $copyout_fault,PCB_ONFAULT(%eax)
626 pushl %esi
627 pushl %edi
628 pushl %ebx
629 movl 16(%esp),%esi
630 movl 20(%esp),%edi
631 movl 24(%esp),%ebx
632 testl %ebx,%ebx /* anything to do? */
633 jz done_copyout
634
635 /*
636 * Check explicitly for non-user addresses. If 486 write protection
637 * is being used, this check is essential because we are in kernel
638 * mode so the h/w does not provide any protection against writing
639 * kernel addresses.
640 */
641
642 /*
643 * First, prevent address wrapping.
644 */
645 movl %edi,%eax
646 addl %ebx,%eax
647 jc copyout_fault
648 /*
649 * XXX STOP USING VM_MAXUSER_ADDRESS.
650 * It is an end address, not a max, so every time it is used correctly it
651 * looks like there is an off by one error, and of course it caused an off
652 * by one error in several places.
653 */
654 cmpl $VM_MAXUSER_ADDRESS,%eax
655 ja copyout_fault
656
657 #if defined(I386_CPU)
658
659 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
660 cmpl $CPUCLASS_386,_cpu_class
661 jne 3f
662 #endif
663 /*
664 * We have to check each PTE for user write permission.
665 * The checking may cause a page fault, so it is important to set
666 * up everything for return via copyout_fault before here.
667 */
668 /* compute number of pages */
669 movl %edi,%ecx
670 andl $PAGE_MASK,%ecx
671 addl %ebx,%ecx
672 decl %ecx
673 shrl $IDXSHIFT+2,%ecx
674 incl %ecx
675
676 /* compute PTE offset for start address */
677 movl %edi,%edx
678 shrl $IDXSHIFT,%edx
679 andb $0xfc,%dl
680
681 1:
682 /* check PTE for each page */
683 leal _PTmap(%edx),%eax
684 shrl $IDXSHIFT,%eax
685 andb $0xfc,%al
686 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */
687 je 4f
688 movb _PTmap(%edx),%al
689 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */
690 cmpb $PG_V|PG_RW|PG_U,%al
691 je 2f
692
693 4:
694 /* simulate a trap */
695 pushl %edx
696 pushl %ecx
697 shll $IDXSHIFT,%edx
698 pushl %edx
699 call _trapwrite /* trapwrite(addr) */
700 popl %edx
701 popl %ecx
702 popl %edx
703
704 testl %eax,%eax /* if not ok, return EFAULT */
705 jnz copyout_fault
706
707 2:
708 addl $4,%edx
709 decl %ecx
710 jnz 1b /* check next page */
711 #endif /* I386_CPU */
712
713 /* bcopy(%esi, %edi, %ebx) */
714 3:
715 movl %ebx,%ecx
716
717 #if defined(I586_CPU) && NNPX > 0
718 ALIGN_TEXT
719 slow_copyout:
720 #endif
721 shrl $2,%ecx
722 cld
723 rep
724 movsl
725 movb %bl,%cl
726 andb $3,%cl
727 rep
728 movsb
729
730 done_copyout:
731 popl %ebx
732 popl %edi
733 popl %esi
734 xorl %eax,%eax
735 movl _curpcb,%edx
736 movl %eax,PCB_ONFAULT(%edx)
737 ret
738
739 ALIGN_TEXT
740 copyout_fault:
741 popl %ebx
742 popl %edi
743 popl %esi
744 movl _curpcb,%edx
745 movl $0,PCB_ONFAULT(%edx)
746 movl $EFAULT,%eax
747 ret
748
749 #if defined(I586_CPU) && NNPX > 0
750 ENTRY(i586_copyout)
751 /*
752 * Duplicated from generic_copyout. Could be done a bit better.
753 */
754 movl _curpcb,%eax
755 movl $copyout_fault,PCB_ONFAULT(%eax)
756 pushl %esi
757 pushl %edi
758 pushl %ebx
759 movl 16(%esp),%esi
760 movl 20(%esp),%edi
761 movl 24(%esp),%ebx
762 testl %ebx,%ebx /* anything to do? */
763 jz done_copyout
764
765 /*
766 * Check explicitly for non-user addresses. If 486 write protection
767 * is being used, this check is essential because we are in kernel
768 * mode so the h/w does not provide any protection against writing
769 * kernel addresses.
770 */
771
772 /*
773 * First, prevent address wrapping.
774 */
775 movl %edi,%eax
776 addl %ebx,%eax
777 jc copyout_fault
778 /*
779 * XXX STOP USING VM_MAXUSER_ADDRESS.
780 * It is an end address, not a max, so every time it is used correctly it
781 * looks like there is an off by one error, and of course it caused an off
782 * by one error in several places.
783 */
784 cmpl $VM_MAXUSER_ADDRESS,%eax
785 ja copyout_fault
786
787 /* bcopy(%esi, %edi, %ebx) */
788 3:
789 movl %ebx,%ecx
790 /*
791 * End of duplicated code.
792 */
793
794 cmpl $1024,%ecx
795 jb slow_copyout
796
797 pushl %ecx
798 call _fastmove
799 addl $4,%esp
800 jmp done_copyout
801 #endif /* I586_CPU && NNPX > 0 */
802
803 /* copyin(from_user, to_kernel, len) */
804 ENTRY(copyin)
805 MEXITCOUNT
806 jmp *_copyin_vector
807
808 ENTRY(generic_copyin)
809 movl _curpcb,%eax
810 movl $copyin_fault,PCB_ONFAULT(%eax)
811 pushl %esi
812 pushl %edi
813 movl 12(%esp),%esi /* caddr_t from */
814 movl 16(%esp),%edi /* caddr_t to */
815 movl 20(%esp),%ecx /* size_t len */
816
817 /*
818 * make sure address is valid
819 */
820 movl %esi,%edx
821 addl %ecx,%edx
822 jc copyin_fault
823 cmpl $VM_MAXUSER_ADDRESS,%edx
824 ja copyin_fault
825
826 #if defined(I586_CPU) && NNPX > 0
827 ALIGN_TEXT
828 slow_copyin:
829 #endif
830 movb %cl,%al
831 shrl $2,%ecx /* copy longword-wise */
832 cld
833 rep
834 movsl
835 movb %al,%cl
836 andb $3,%cl /* copy remaining bytes */
837 rep
838 movsb
839
840 #if defined(I586_CPU) && NNPX > 0
841 ALIGN_TEXT
842 done_copyin:
843 #endif
844 popl %edi
845 popl %esi
846 xorl %eax,%eax
847 movl _curpcb,%edx
848 movl %eax,PCB_ONFAULT(%edx)
849 ret
850
851 ALIGN_TEXT
852 copyin_fault:
853 popl %edi
854 popl %esi
855 movl _curpcb,%edx
856 movl $0,PCB_ONFAULT(%edx)
857 movl $EFAULT,%eax
858 ret
859
860 #if defined(I586_CPU) && NNPX > 0
861 ENTRY(i586_copyin)
862 /*
863 * Duplicated from generic_copyin. Could be done a bit better.
864 */
865 movl _curpcb,%eax
866 movl $copyin_fault,PCB_ONFAULT(%eax)
867 pushl %esi
868 pushl %edi
869 movl 12(%esp),%esi /* caddr_t from */
870 movl 16(%esp),%edi /* caddr_t to */
871 movl 20(%esp),%ecx /* size_t len */
872
873 /*
874 * make sure address is valid
875 */
876 movl %esi,%edx
877 addl %ecx,%edx
878 jc copyin_fault
879 cmpl $VM_MAXUSER_ADDRESS,%edx
880 ja copyin_fault
881 /*
882 * End of duplicated code.
883 */
884
885 cmpl $1024,%ecx
886 jb slow_copyin
887
888 pushl %ebx /* XXX prepare for fastmove_fault */
889 pushl %ecx
890 call _fastmove
891 addl $8,%esp
892 jmp done_copyin
893 #endif /* I586_CPU && NNPX > 0 */
894
895 #if defined(I586_CPU) && NNPX > 0
896 /* fastmove(src, dst, len)
897 src in %esi
898 dst in %edi
899 len in %ecx XXX changed to on stack for profiling
900 uses %eax and %edx for tmp. storage
901 */
902 /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
903 ENTRY(fastmove)
904 pushl %ebp
905 movl %esp,%ebp
906 subl $PCB_SAVEFPU_SIZE+3*4,%esp
907
908 movl 8(%ebp),%ecx
909 cmpl $63,%ecx
910 jbe fastmove_tail
911
912 testl $7,%esi /* check if src addr is multiple of 8 */
913 jnz fastmove_tail
914
915 testl $7,%edi /* check if dst addr is multiple of 8 */
916 jnz fastmove_tail
917
918 /* if (npxproc != NULL) { */
919 cmpl $0,_npxproc
920 je 6f
921 /* fnsave(&curpcb->pcb_savefpu); */
922 movl _curpcb,%eax
923 fnsave PCB_SAVEFPU(%eax)
924 /* npxproc = NULL; */
925 movl $0,_npxproc
926 /* } */
927 6:
928 /* now we own the FPU. */
929
930 /*
931 * The process' FP state is saved in the pcb, but if we get
932 * switched, the cpu_switch() will store our FP state in the
933 * pcb. It should be possible to avoid all the copying for
934 * this, e.g., by setting a flag to tell cpu_switch() to
935 * save the state somewhere else.
936 */
937 /* tmp = curpcb->pcb_savefpu; */
938 movl %ecx,-12(%ebp)
939 movl %esi,-8(%ebp)
940 movl %edi,-4(%ebp)
941 movl %esp,%edi
942 movl _curpcb,%esi
943 addl $PCB_SAVEFPU,%esi
944 cld
945 movl $PCB_SAVEFPU_SIZE>>2,%ecx
946 rep
947 movsl
948 movl -12(%ebp),%ecx
949 movl -8(%ebp),%esi
950 movl -4(%ebp),%edi
951 /* stop_emulating(); */
952 clts
953 /* npxproc = curproc; */
954 movl _curproc,%eax
955 movl %eax,_npxproc
956 movl _curpcb,%eax
957 movl $fastmove_fault,PCB_ONFAULT(%eax)
958 4:
959 movl %ecx,-12(%ebp)
960 cmpl $1792,%ecx
961 jbe 2f
962 movl $1792,%ecx
963 2:
964 subl %ecx,-12(%ebp)
965 cmpl $256,%ecx
966 jb 5f
967 movl %ecx,-8(%ebp)
968 movl %esi,-4(%ebp)
969 ALIGN_TEXT
970 3:
971 movl 0(%esi),%eax
972 movl 32(%esi),%eax
973 movl 64(%esi),%eax
974 movl 96(%esi),%eax
975 movl 128(%esi),%eax
976 movl 160(%esi),%eax
977 movl 192(%esi),%eax
978 movl 224(%esi),%eax
979 addl $256,%esi
980 subl $256,%ecx
981 cmpl $256,%ecx
982 jae 3b
983 movl -8(%ebp),%ecx
984 movl -4(%ebp),%esi
985 5:
986 ALIGN_TEXT
987 fastmove_loop:
988 fildq 0(%esi)
989 fildq 8(%esi)
990 fildq 16(%esi)
991 fildq 24(%esi)
992 fildq 32(%esi)
993 fildq 40(%esi)
994 fildq 48(%esi)
995 fildq 56(%esi)
996 fistpq 56(%edi)
997 fistpq 48(%edi)
998 fistpq 40(%edi)
999 fistpq 32(%edi)
1000 fistpq 24(%edi)
1001 fistpq 16(%edi)
1002 fistpq 8(%edi)
1003 fistpq 0(%edi)
1004 addl $-64,%ecx
1005 addl $64,%esi
1006 addl $64,%edi
1007 cmpl $63,%ecx
1008 ja fastmove_loop
1009 movl -12(%ebp),%eax
1010 addl %eax,%ecx
1011 cmpl $64,%ecx
1012 jae 4b
1013
1014 /* curpcb->pcb_savefpu = tmp; */
1015 movl %ecx,-12(%ebp)
1016 movl %esi,-8(%ebp)
1017 movl %edi,-4(%ebp)
1018 movl _curpcb,%edi
1019 addl $PCB_SAVEFPU,%edi
1020 movl %esp,%esi
1021 cld
1022 movl $PCB_SAVEFPU_SIZE>>2,%ecx
1023 rep
1024 movsl
1025 movl -12(%ebp),%ecx
1026 movl -8(%ebp),%esi
1027 movl -4(%ebp),%edi
1028
1029 /* start_emulating(); */
1030 smsw %ax
1031 orb $CR0_TS,%al
1032 lmsw %ax
1033 /* npxproc = NULL; */
1034 movl $0,_npxproc
1035
1036 ALIGN_TEXT
1037 fastmove_tail:
1038 movl _curpcb,%eax
1039 movl $fastmove_tail_fault,PCB_ONFAULT(%eax)
1040
1041 movb %cl,%al
1042 shrl $2,%ecx /* copy longword-wise */
1043 cld
1044 rep
1045 movsl
1046 movb %al,%cl
1047 andb $3,%cl /* copy remaining bytes */
1048 rep
1049 movsb
1050
1051 movl %ebp,%esp
1052 popl %ebp
1053 ret
1054
1055 ALIGN_TEXT
1056 fastmove_fault:
1057 movl _curpcb,%edi
1058 addl $PCB_SAVEFPU,%edi
1059 movl %esp,%esi
1060 cld
1061 movl $PCB_SAVEFPU_SIZE>>2,%ecx
1062 rep
1063 movsl
1064
1065 smsw %ax
1066 orb $CR0_TS,%al
1067 lmsw %ax
1068 movl $0,_npxproc
1069
1070 fastmove_tail_fault:
1071 movl %ebp,%esp
1072 popl %ebp
1073 addl $8,%esp
1074 popl %ebx
1075 popl %edi
1076 popl %esi
1077 movl _curpcb,%edx
1078 movl $0,PCB_ONFAULT(%edx)
1079 movl $EFAULT,%eax
1080 ret
1081 #endif /* I586_CPU && NNPX > 0 */
1082
1083 /*
1084 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1085 */
1086 ENTRY(fuword)
1087 movl _curpcb,%ecx
1088 movl $fusufault,PCB_ONFAULT(%ecx)
1089 movl 4(%esp),%edx /* from */
1090
1091 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
1092 ja fusufault
1093
1094 movl (%edx),%eax
1095 movl $0,PCB_ONFAULT(%ecx)
1096 ret
1097
1098 /*
1099 * These two routines are called from the profiling code, potentially
1100 * at interrupt time. If they fail, that's okay, good things will
1101 * happen later. Fail all the time for now - until the trap code is
1102 * able to deal with this.
1103 */
1104 ALTENTRY(suswintr)
1105 ENTRY(fuswintr)
1106 movl $-1,%eax
1107 ret
1108
1109 ENTRY(fusword)
1110 movl _curpcb,%ecx
1111 movl $fusufault,PCB_ONFAULT(%ecx)
1112 movl 4(%esp),%edx
1113
1114 cmpl $VM_MAXUSER_ADDRESS-2,%edx
1115 ja fusufault
1116
1117 movzwl (%edx),%eax
1118 movl $0,PCB_ONFAULT(%ecx)
1119 ret
1120
1121 ENTRY(fubyte)
1122 movl _curpcb,%ecx
1123 movl $fusufault,PCB_ONFAULT(%ecx)
1124 movl 4(%esp),%edx
1125
1126 cmpl $VM_MAXUSER_ADDRESS-1,%edx
1127 ja fusufault
1128
1129 movzbl (%edx),%eax
1130 movl $0,PCB_ONFAULT(%ecx)
1131 ret
1132
1133 ALIGN_TEXT
1134 fusufault:
1135 movl _curpcb,%ecx
1136 xorl %eax,%eax
1137 movl %eax,PCB_ONFAULT(%ecx)
1138 decl %eax
1139 ret
1140
1141 /*
1142 * su{byte,sword,word}: write a byte (word, longword) to user memory
1143 */
1144 ENTRY(suword)
1145 movl _curpcb,%ecx
1146 movl $fusufault,PCB_ONFAULT(%ecx)
1147 movl 4(%esp),%edx
1148
1149 #if defined(I386_CPU)
1150
1151 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1152 cmpl $CPUCLASS_386,_cpu_class
1153 jne 2f /* we only have to set the right segment selector */
1154 #endif /* I486_CPU || I586_CPU || I686_CPU */
1155
1156 /* XXX - page boundary crossing is still not handled */
1157 movl %edx,%eax
1158 shrl $IDXSHIFT,%edx
1159 andb $0xfc,%dl
1160
1161 leal _PTmap(%edx),%ecx
1162 shrl $IDXSHIFT,%ecx
1163 andb $0xfc,%cl
1164 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1165 je 4f
1166 movb _PTmap(%edx),%dl
1167 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1168 cmpb $PG_V|PG_RW|PG_U,%dl
1169 je 1f
1170
1171 4:
1172 /* simulate a trap */
1173 pushl %eax
1174 call _trapwrite
1175 popl %edx /* remove junk parameter from stack */
1176 testl %eax,%eax
1177 jnz fusufault
1178 1:
1179 movl 4(%esp),%edx
1180 #endif
1181
1182 2:
1183 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
1184 ja fusufault
1185
1186 movl 8(%esp),%eax
1187 movl %eax,(%edx)
1188 xorl %eax,%eax
1189 movl _curpcb,%ecx
1190 movl %eax,PCB_ONFAULT(%ecx)
1191 ret
1192
1193 ENTRY(susword)
1194 movl _curpcb,%ecx
1195 movl $fusufault,PCB_ONFAULT(%ecx)
1196 movl 4(%esp),%edx
1197
1198 #if defined(I386_CPU)
1199
1200 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1201 cmpl $CPUCLASS_386,_cpu_class
1202 jne 2f
1203 #endif /* I486_CPU || I586_CPU || I686_CPU */
1204
1205 /* XXX - page boundary crossing is still not handled */
1206 movl %edx,%eax
1207 shrl $IDXSHIFT,%edx
1208 andb $0xfc,%dl
1209
1210 leal _PTmap(%edx),%ecx
1211 shrl $IDXSHIFT,%ecx
1212 andb $0xfc,%cl
1213 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1214 je 4f
1215 movb _PTmap(%edx),%dl
1216 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1217 cmpb $PG_V|PG_RW|PG_U,%dl
1218 je 1f
1219
1220 4:
1221 /* simulate a trap */
1222 pushl %eax
1223 call _trapwrite
1224 popl %edx /* remove junk parameter from stack */
1225 testl %eax,%eax
1226 jnz fusufault
1227 1:
1228 movl 4(%esp),%edx
1229 #endif
1230
1231 2:
1232 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
1233 ja fusufault
1234
1235 movw 8(%esp),%ax
1236 movw %ax,(%edx)
1237 xorl %eax,%eax
1238 movl _curpcb,%ecx /* restore trashed register */
1239 movl %eax,PCB_ONFAULT(%ecx)
1240 ret
1241
1242 ALTENTRY(suibyte)
1243 ENTRY(subyte)
1244 movl _curpcb,%ecx
1245 movl $fusufault,PCB_ONFAULT(%ecx)
1246 movl 4(%esp),%edx
1247
1248 #if defined(I386_CPU)
1249
1250 #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1251 cmpl $CPUCLASS_386,_cpu_class
1252 jne 2f
1253 #endif /* I486_CPU || I586_CPU || I686_CPU */
1254
1255 movl %edx,%eax
1256 shrl $IDXSHIFT,%edx
1257 andb $0xfc,%dl
1258
1259 leal _PTmap(%edx),%ecx
1260 shrl $IDXSHIFT,%ecx
1261 andb $0xfc,%cl
1262 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */
1263 je 4f
1264 movb _PTmap(%edx),%dl
1265 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */
1266 cmpb $PG_V|PG_RW|PG_U,%dl
1267 je 1f
1268
1269 4:
1270 /* simulate a trap */
1271 pushl %eax
1272 call _trapwrite
1273 popl %edx /* remove junk parameter from stack */
1274 testl %eax,%eax
1275 jnz fusufault
1276 1:
1277 movl 4(%esp),%edx
1278 #endif
1279
1280 2:
1281 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
1282 ja fusufault
1283
1284 movb 8(%esp),%al
1285 movb %al,(%edx)
1286 xorl %eax,%eax
1287 movl _curpcb,%ecx /* restore trashed register */
1288 movl %eax,PCB_ONFAULT(%ecx)
1289 ret
1290
1291 /*
1292 * copyinstr(from, to, maxlen, int *lencopied)
1293 * copy a string from from to to, stop when a 0 character is reached.
1294 * return ENAMETOOLONG if string is longer than maxlen, and
1295 * EFAULT on protection violations. If lencopied is non-zero,
1296 * return the actual length in *lencopied.
1297 */
1298 ENTRY(copyinstr)
1299 pushl %esi
1300 pushl %edi
1301 movl _curpcb,%ecx
1302 movl $cpystrflt,PCB_ONFAULT(%ecx)
1303
1304 movl 12(%esp),%esi /* %esi = from */
1305 movl 16(%esp),%edi /* %edi = to */
1306 movl 20(%esp),%edx /* %edx = maxlen */
1307
1308 movl $VM_MAXUSER_ADDRESS,%eax
1309
1310 /* make sure 'from' is within bounds */
1311 subl %esi,%eax
1312 jbe cpystrflt
1313
1314 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1315 cmpl %edx,%eax
1316 jae 1f
1317 movl %eax,%edx
1318 movl %eax,20(%esp)
1319 1:
1320 incl %edx
1321 cld
1322
1323 2:
1324 decl %edx
1325 jz 3f
1326
1327 lodsb
1328 stosb
1329 orb %al,%al
1330 jnz 2b
1331
1332 /* Success -- 0 byte reached */
1333 decl %edx
1334 xorl %eax,%eax
1335 jmp cpystrflt_x
1336 3:
1337 /* edx is zero - return ENAMETOOLONG or EFAULT */
1338 cmpl $VM_MAXUSER_ADDRESS,%esi
1339 jae cpystrflt
1340 4:
1341 movl $ENAMETOOLONG,%eax
1342 jmp cpystrflt_x
1343
1344 cpystrflt:
1345 movl $EFAULT,%eax
1346
1347 cpystrflt_x:
1348 /* set *lencopied and return %eax */
1349 movl _curpcb,%ecx
1350 movl $0,PCB_ONFAULT(%ecx)
1351 movl 20(%esp),%ecx
1352 subl %edx,%ecx
1353 movl 24(%esp),%edx
1354 testl %edx,%edx
1355 jz 1f
1356 movl %ecx,(%edx)
1357 1:
1358 popl %edi
1359 popl %esi
1360 ret
1361
1362
1363 /*
1364 * copystr(from, to, maxlen, int *lencopied)
1365 */
1366 ENTRY(copystr)
1367 pushl %esi
1368 pushl %edi
1369
1370 movl 12(%esp),%esi /* %esi = from */
1371 movl 16(%esp),%edi /* %edi = to */
1372 movl 20(%esp),%edx /* %edx = maxlen */
1373 incl %edx
1374 cld
1375 1:
1376 decl %edx
1377 jz 4f
1378 lodsb
1379 stosb
1380 orb %al,%al
1381 jnz 1b
1382
1383 /* Success -- 0 byte reached */
1384 decl %edx
1385 xorl %eax,%eax
1386 jmp 6f
1387 4:
1388 /* edx is zero -- return ENAMETOOLONG */
1389 movl $ENAMETOOLONG,%eax
1390
1391 6:
1392 /* set *lencopied and return %eax */
1393 movl 20(%esp),%ecx
1394 subl %edx,%ecx
1395 movl 24(%esp),%edx
1396 testl %edx,%edx
1397 jz 7f
1398 movl %ecx,(%edx)
1399 7:
1400 popl %edi
1401 popl %esi
1402 ret
1403
1404 ENTRY(bcmp)
1405 pushl %edi
1406 pushl %esi
1407 movl 12(%esp),%edi
1408 movl 16(%esp),%esi
1409 movl 20(%esp),%edx
1410 xorl %eax,%eax
1411
1412 movl %edx,%ecx
1413 shrl $2,%ecx
1414 cld /* compare forwards */
1415 repe
1416 cmpsl
1417 jne 1f
1418
1419 movl %edx,%ecx
1420 andl $3,%ecx
1421 repe
1422 cmpsb
1423 je 2f
1424 1:
1425 incl %eax
1426 2:
1427 popl %esi
1428 popl %edi
1429 ret
1430
1431
1432 /*
1433 * Handling of special 386 registers and descriptor tables etc
1434 */
1435 /* void lgdt(struct region_descriptor *rdp); */
1436 ENTRY(lgdt)
1437 /* reload the descriptor table */
1438 movl 4(%esp),%eax
1439 lgdt (%eax)
1440
1441 /* flush the prefetch q */
1442 jmp 1f
1443 nop
1444 1:
1445 /* reload "stale" selectors */
1446 movl $KDSEL,%eax
1447 movl %ax,%ds
1448 movl %ax,%es
1449 movl %ax,%ss
1450
1451 /* reload code selector by turning return into intersegmental return */
1452 movl (%esp),%eax
1453 pushl %eax
1454 # movl $KCSEL,4(%esp)
1455 movl $8,4(%esp)
1456 lret
1457
1458 /*
1459 * void lidt(struct region_descriptor *rdp);
1460 */
1461 ENTRY(lidt)
1462 movl 4(%esp),%eax
1463 lidt (%eax)
1464 ret
1465
1466 /*
1467 * void lldt(u_short sel)
1468 */
1469 ENTRY(lldt)
1470 lldt 4(%esp)
1471 ret
1472
1473 /*
1474 * void ltr(u_short sel)
1475 */
1476 ENTRY(ltr)
1477 ltr 4(%esp)
1478 ret
1479
1480 /* ssdtosd(*ssdp,*sdp) */
1481 ENTRY(ssdtosd)
1482 pushl %ebx
1483 movl 8(%esp),%ecx
1484 movl 8(%ecx),%ebx
1485 shll $16,%ebx
1486 movl (%ecx),%edx
1487 roll $16,%edx
1488 movb %dh,%bl
1489 movb %dl,%bh
1490 rorl $8,%ebx
1491 movl 4(%ecx),%eax
1492 movw %ax,%dx
1493 andl $0xf0000,%eax
1494 orl %eax,%ebx
1495 movl 12(%esp),%ecx
1496 movl %edx,(%ecx)
1497 movl %ebx,4(%ecx)
1498 popl %ebx
1499 ret
1500
1501 /* load_cr0(cr0) */
1502 ENTRY(load_cr0)
1503 movl 4(%esp),%eax
1504 movl %eax,%cr0
1505 ret
1506
1507 /* rcr0() */
1508 ENTRY(rcr0)
1509 movl %cr0,%eax
1510 ret
1511
1512 /* rcr3() */
1513 ENTRY(rcr3)
1514 movl %cr3,%eax
1515 ret
1516
1517 /* void load_cr3(caddr_t cr3) */
1518 ENTRY(load_cr3)
1519 movl 4(%esp),%eax
1520 movl %eax,%cr3
1521 ret
1522
1523
1524 /*****************************************************************************/
1525 /* setjump, longjump */
1526 /*****************************************************************************/
1527
1528 ENTRY(setjmp)
1529 movl 4(%esp),%eax
1530 movl %ebx,(%eax) /* save ebx */
1531 movl %esp,4(%eax) /* save esp */
1532 movl %ebp,8(%eax) /* save ebp */
1533 movl %esi,12(%eax) /* save esi */
1534 movl %edi,16(%eax) /* save edi */
1535 movl (%esp),%edx /* get rta */
1536 movl %edx,20(%eax) /* save eip */
1537 xorl %eax,%eax /* return(0); */
1538 ret
1539
1540 ENTRY(longjmp)
1541 movl 4(%esp),%eax
1542 movl (%eax),%ebx /* restore ebx */
1543 movl 4(%eax),%esp /* restore esp */
1544 movl 8(%eax),%ebp /* restore ebp */
1545 movl 12(%eax),%esi /* restore esi */
1546 movl 16(%eax),%edi /* restore edi */
1547 movl 20(%eax),%edx /* get rta */
1548 movl %edx,(%esp) /* put in return frame */
1549 xorl %eax,%eax /* return(1); */
1550 incl %eax
1551 ret
1552
1553 /*
1554 * Here for doing BB-profiling (gcc -a).
1555 * We rely on the "bbset" instead, but need a dummy function.
1556 */
1557 NON_GPROF_ENTRY(__bb_init_func)
1558 movl 4(%esp),%eax
1559 movl $1,(%eax)
1560 .byte 0xc3 /* avoid macro for `ret' */
Cache object: cb58e94a00c6fa26377b8da13fac0eda
|