1 /*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32 #include "opt_npx.h"
33
34 #include <machine/asmacros.h>
35 #include <machine/cputypes.h>
36 #include <machine/intr_machdep.h>
37 #include <machine/pmap.h>
38 #include <machine/specialreg.h>
39
40 #include "assym.s"
41
42 #define IDXSHIFT 10
43
44 .data
45 .globl bcopy_vector
46 bcopy_vector:
47 .long generic_bcopy
48 .globl bzero_vector
49 bzero_vector:
50 .long generic_bzero
51 .globl copyin_vector
52 copyin_vector:
53 .long generic_copyin
54 .globl copyout_vector
55 copyout_vector:
56 .long generic_copyout
57 #if defined(I586_CPU) && defined(DEV_NPX)
58 kernel_fpu_lock:
59 .byte 0xfe
60 .space 3
61 #endif
62 ALIGN_DATA
63 .globl intrcnt, eintrcnt
64 intrcnt:
65 .space INTRCNT_COUNT * 4
66 eintrcnt:
67
68 .globl intrnames, eintrnames
69 intrnames:
70 .space INTRCNT_COUNT * (MAXCOMLEN + 1)
71 eintrnames:
72
73 .text
74
75 /*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80 ENTRY(bzero)
81 MEXITCOUNT
82 jmp *bzero_vector
83
84 ENTRY(generic_bzero)
85 pushl %edi
86 movl 8(%esp),%edi
87 movl 12(%esp),%ecx
88 xorl %eax,%eax
89 shrl $2,%ecx
90 cld
91 rep
92 stosl
93 movl 12(%esp),%ecx
94 andl $3,%ecx
95 rep
96 stosb
97 popl %edi
98 ret
99
100 #ifdef I486_CPU
101 ENTRY(i486_bzero)
102 movl 4(%esp),%edx
103 movl 8(%esp),%ecx
104 xorl %eax,%eax
105 /*
106 * do 64 byte chunks first
107 *
108 * XXX this is probably over-unrolled at least for DX2's
109 */
110 2:
111 cmpl $64,%ecx
112 jb 3f
113 movl %eax,(%edx)
114 movl %eax,4(%edx)
115 movl %eax,8(%edx)
116 movl %eax,12(%edx)
117 movl %eax,16(%edx)
118 movl %eax,20(%edx)
119 movl %eax,24(%edx)
120 movl %eax,28(%edx)
121 movl %eax,32(%edx)
122 movl %eax,36(%edx)
123 movl %eax,40(%edx)
124 movl %eax,44(%edx)
125 movl %eax,48(%edx)
126 movl %eax,52(%edx)
127 movl %eax,56(%edx)
128 movl %eax,60(%edx)
129 addl $64,%edx
130 subl $64,%ecx
131 jnz 2b
132 ret
133
134 /*
135 * do 16 byte chunks
136 */
137 SUPERALIGN_TEXT
138 3:
139 cmpl $16,%ecx
140 jb 4f
141 movl %eax,(%edx)
142 movl %eax,4(%edx)
143 movl %eax,8(%edx)
144 movl %eax,12(%edx)
145 addl $16,%edx
146 subl $16,%ecx
147 jnz 3b
148 ret
149
150 /*
151 * do 4 byte chunks
152 */
153 SUPERALIGN_TEXT
154 4:
155 cmpl $4,%ecx
156 jb 5f
157 movl %eax,(%edx)
158 addl $4,%edx
159 subl $4,%ecx
160 jnz 4b
161 ret
162
163 /*
164 * do 1 byte chunks
165 * a jump table seems to be faster than a loop or more range reductions
166 *
167 * XXX need a const section for non-text
168 */
169 .data
170 jtab:
171 .long do0
172 .long do1
173 .long do2
174 .long do3
175
176 .text
177 SUPERALIGN_TEXT
178 5:
179 jmp *jtab(,%ecx,4)
180
181 SUPERALIGN_TEXT
182 do3:
183 movw %ax,(%edx)
184 movb %al,2(%edx)
185 ret
186
187 SUPERALIGN_TEXT
188 do2:
189 movw %ax,(%edx)
190 ret
191
192 SUPERALIGN_TEXT
193 do1:
194 movb %al,(%edx)
195 ret
196
197 SUPERALIGN_TEXT
198 do0:
199 ret
200 #endif
201
202 #if defined(I586_CPU) && defined(DEV_NPX)
203 ENTRY(i586_bzero)
204 movl 4(%esp),%edx
205 movl 8(%esp),%ecx
206
207 /*
208 * The FPU register method is twice as fast as the integer register
209 * method unless the target is in the L1 cache and we pre-allocate a
210 * cache line for it (then the integer register method is 4-5 times
211 * faster). However, we never pre-allocate cache lines, since that
212 * would make the integer method 25% or more slower for the common
213 * case when the target isn't in either the L1 cache or the L2 cache.
214 * Thus we normally use the FPU register method unless the overhead
215 * would be too large.
216 */
217 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */
218 jb intreg_i586_bzero
219
220 /*
221 * The FPU registers may belong to an application or to fastmove()
222 * or to another invocation of bcopy() or ourself in a higher level
223 * interrupt or trap handler. Preserving the registers is
224 * complicated since we avoid it if possible at all levels. We
225 * want to localize the complications even when that increases them.
226 * Here the extra work involves preserving CR0_TS in TS.
227 * `fpcurthread != NULL' is supposed to be the condition that all the
228 * FPU resources belong to an application, but fpcurthread and CR0_TS
229 * aren't set atomically enough for this condition to work in
230 * interrupt handlers.
231 *
232 * Case 1: FPU registers belong to the application: we must preserve
233 * the registers if we use them, so we only use the FPU register
234 * method if the target size is large enough to amortize the extra
235 * overhead for preserving them. CR0_TS must be preserved although
236 * it is very likely to end up as set.
237 *
238 * Case 2: FPU registers belong to fastmove(): fastmove() currently
239 * makes the registers look like they belong to an application so
240 * that cpu_switch() and savectx() don't have to know about it, so
241 * this case reduces to case 1.
242 *
243 * Case 3: FPU registers belong to the kernel: don't use the FPU
244 * register method. This case is unlikely, and supporting it would
245 * be more complicated and might take too much stack.
246 *
247 * Case 4: FPU registers don't belong to anyone: the FPU registers
248 * don't need to be preserved, so we always use the FPU register
249 * method. CR0_TS must be preserved although it is very likely to
250 * always end up as clear.
251 */
252 cmpl $0,PCPU(FPCURTHREAD)
253 je i586_bz1
254
255 /*
256 * XXX don't use the FPU for cases 1 and 2, since preemptive
257 * scheduling of ithreads broke these cases. Note that we can
258 * no longer get here from an interrupt handler, since the
259 * context sitch to the interrupt handler will have saved the
260 * FPU state.
261 */
262 jmp intreg_i586_bzero
263
264 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */
265 jb intreg_i586_bzero
266 sarb $1,kernel_fpu_lock
267 jc intreg_i586_bzero
268 smsw %ax
269 clts
270 subl $108,%esp
271 fnsave 0(%esp)
272 jmp i586_bz2
273
274 i586_bz1:
275 sarb $1,kernel_fpu_lock
276 jc intreg_i586_bzero
277 smsw %ax
278 clts
279 fninit /* XXX should avoid needing this */
280 i586_bz2:
281 fldz
282
283 /*
284 * Align to an 8 byte boundary (misalignment in the main loop would
285 * cost a factor of >= 2). Avoid jumps (at little cost if it is
286 * already aligned) by always zeroing 8 bytes and using the part up
287 * to the _next_ alignment position.
288 */
289 fstl 0(%edx)
290 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */
291 addl $8,%edx
292 andl $~7,%edx
293 subl %edx,%ecx
294
295 /*
296 * Similarly align `len' to a multiple of 8.
297 */
298 fstl -8(%edx,%ecx)
299 decl %ecx
300 andl $~7,%ecx
301
302 /*
303 * This wouldn't be any faster if it were unrolled, since the loop
304 * control instructions are much faster than the fstl and/or done
305 * in parallel with it so their overhead is insignificant.
306 */
307 fpureg_i586_bzero_loop:
308 fstl 0(%edx)
309 addl $8,%edx
310 subl $8,%ecx
311 cmpl $8,%ecx
312 jae fpureg_i586_bzero_loop
313
314 cmpl $0,PCPU(FPCURTHREAD)
315 je i586_bz3
316
317 /* XXX check that the condition for cases 1-2 stayed false. */
318 i586_bzero_oops:
319 int $3
320 jmp i586_bzero_oops
321
322 frstor 0(%esp)
323 addl $108,%esp
324 lmsw %ax
325 movb $0xfe,kernel_fpu_lock
326 ret
327
328 i586_bz3:
329 fstp %st(0)
330 lmsw %ax
331 movb $0xfe,kernel_fpu_lock
332 ret
333
334 intreg_i586_bzero:
335 /*
336 * `rep stos' seems to be the best method in practice for small
337 * counts. Fancy methods usually take too long to start up due
338 * to cache and BTB misses.
339 */
340 pushl %edi
341 movl %edx,%edi
342 xorl %eax,%eax
343 shrl $2,%ecx
344 cld
345 rep
346 stosl
347 movl 12(%esp),%ecx
348 andl $3,%ecx
349 jne 1f
350 popl %edi
351 ret
352
353 1:
354 rep
355 stosb
356 popl %edi
357 ret
358 #endif /* I586_CPU && defined(DEV_NPX) */
359
360 ENTRY(sse2_pagezero)
361 pushl %ebx
362 movl 8(%esp),%ecx
363 movl %ecx,%eax
364 addl $4096,%eax
365 xor %ebx,%ebx
366 1:
367 movnti %ebx,(%ecx)
368 addl $4,%ecx
369 cmpl %ecx,%eax
370 jne 1b
371 sfence
372 popl %ebx
373 ret
374
375 ENTRY(i686_pagezero)
376 pushl %edi
377 pushl %ebx
378
379 movl 12(%esp), %edi
380 movl $1024, %ecx
381 cld
382
383 ALIGN_TEXT
384 1:
385 xorl %eax, %eax
386 repe
387 scasl
388 jnz 2f
389
390 popl %ebx
391 popl %edi
392 ret
393
394 ALIGN_TEXT
395
396 2:
397 incl %ecx
398 subl $4, %edi
399
400 movl %ecx, %edx
401 cmpl $16, %ecx
402
403 jge 3f
404
405 movl %edi, %ebx
406 andl $0x3f, %ebx
407 shrl %ebx
408 shrl %ebx
409 movl $16, %ecx
410 subl %ebx, %ecx
411
412 3:
413 subl %ecx, %edx
414 rep
415 stosl
416
417 movl %edx, %ecx
418 testl %edx, %edx
419 jnz 1b
420
421 popl %ebx
422 popl %edi
423 ret
424
425 /* fillw(pat, base, cnt) */
426 ENTRY(fillw)
427 pushl %edi
428 movl 8(%esp),%eax
429 movl 12(%esp),%edi
430 movl 16(%esp),%ecx
431 cld
432 rep
433 stosw
434 popl %edi
435 ret
436
437 ENTRY(bcopyb)
438 pushl %esi
439 pushl %edi
440 movl 12(%esp),%esi
441 movl 16(%esp),%edi
442 movl 20(%esp),%ecx
443 movl %edi,%eax
444 subl %esi,%eax
445 cmpl %ecx,%eax /* overlapping && src < dst? */
446 jb 1f
447 cld /* nope, copy forwards */
448 rep
449 movsb
450 popl %edi
451 popl %esi
452 ret
453
454 ALIGN_TEXT
455 1:
456 addl %ecx,%edi /* copy backwards. */
457 addl %ecx,%esi
458 decl %edi
459 decl %esi
460 std
461 rep
462 movsb
463 popl %edi
464 popl %esi
465 cld
466 ret
467
468 ENTRY(bcopy)
469 MEXITCOUNT
470 jmp *bcopy_vector
471
472 /*
473 * generic_bcopy(src, dst, cnt)
474 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
475 */
476 ENTRY(generic_bcopy)
477 pushl %esi
478 pushl %edi
479 movl 12(%esp),%esi
480 movl 16(%esp),%edi
481 movl 20(%esp),%ecx
482
483 movl %edi,%eax
484 subl %esi,%eax
485 cmpl %ecx,%eax /* overlapping && src < dst? */
486 jb 1f
487
488 shrl $2,%ecx /* copy by 32-bit words */
489 cld /* nope, copy forwards */
490 rep
491 movsl
492 movl 20(%esp),%ecx
493 andl $3,%ecx /* any bytes left? */
494 rep
495 movsb
496 popl %edi
497 popl %esi
498 ret
499
500 ALIGN_TEXT
501 1:
502 addl %ecx,%edi /* copy backwards */
503 addl %ecx,%esi
504 decl %edi
505 decl %esi
506 andl $3,%ecx /* any fractional bytes? */
507 std
508 rep
509 movsb
510 movl 20(%esp),%ecx /* copy remainder by 32-bit words */
511 shrl $2,%ecx
512 subl $3,%esi
513 subl $3,%edi
514 rep
515 movsl
516 popl %edi
517 popl %esi
518 cld
519 ret
520
521 #if defined(I586_CPU) && defined(DEV_NPX)
522 ENTRY(i586_bcopy)
523 pushl %esi
524 pushl %edi
525 movl 12(%esp),%esi
526 movl 16(%esp),%edi
527 movl 20(%esp),%ecx
528
529 movl %edi,%eax
530 subl %esi,%eax
531 cmpl %ecx,%eax /* overlapping && src < dst? */
532 jb 1f
533
534 cmpl $1024,%ecx
535 jb small_i586_bcopy
536
537 sarb $1,kernel_fpu_lock
538 jc small_i586_bcopy
539 cmpl $0,PCPU(FPCURTHREAD)
540 je i586_bc1
541
542 /* XXX turn off handling of cases 1-2, as above. */
543 movb $0xfe,kernel_fpu_lock
544 jmp small_i586_bcopy
545
546 smsw %dx
547 clts
548 subl $108,%esp
549 fnsave 0(%esp)
550 jmp 4f
551
552 i586_bc1:
553 smsw %dx
554 clts
555 fninit /* XXX should avoid needing this */
556
557 ALIGN_TEXT
558 4:
559 pushl %ecx
560 #define DCACHE_SIZE 8192
561 cmpl $(DCACHE_SIZE-512)/2,%ecx
562 jbe 2f
563 movl $(DCACHE_SIZE-512)/2,%ecx
564 2:
565 subl %ecx,0(%esp)
566 cmpl $256,%ecx
567 jb 5f /* XXX should prefetch if %ecx >= 32 */
568 pushl %esi
569 pushl %ecx
570 ALIGN_TEXT
571 3:
572 movl 0(%esi),%eax
573 movl 32(%esi),%eax
574 movl 64(%esi),%eax
575 movl 96(%esi),%eax
576 movl 128(%esi),%eax
577 movl 160(%esi),%eax
578 movl 192(%esi),%eax
579 movl 224(%esi),%eax
580 addl $256,%esi
581 subl $256,%ecx
582 cmpl $256,%ecx
583 jae 3b
584 popl %ecx
585 popl %esi
586 5:
587 ALIGN_TEXT
588 large_i586_bcopy_loop:
589 fildq 0(%esi)
590 fildq 8(%esi)
591 fildq 16(%esi)
592 fildq 24(%esi)
593 fildq 32(%esi)
594 fildq 40(%esi)
595 fildq 48(%esi)
596 fildq 56(%esi)
597 fistpq 56(%edi)
598 fistpq 48(%edi)
599 fistpq 40(%edi)
600 fistpq 32(%edi)
601 fistpq 24(%edi)
602 fistpq 16(%edi)
603 fistpq 8(%edi)
604 fistpq 0(%edi)
605 addl $64,%esi
606 addl $64,%edi
607 subl $64,%ecx
608 cmpl $64,%ecx
609 jae large_i586_bcopy_loop
610 popl %eax
611 addl %eax,%ecx
612 cmpl $64,%ecx
613 jae 4b
614
615 cmpl $0,PCPU(FPCURTHREAD)
616 je i586_bc2
617
618 /* XXX check that the condition for cases 1-2 stayed false. */
619 i586_bcopy_oops:
620 int $3
621 jmp i586_bcopy_oops
622
623 frstor 0(%esp)
624 addl $108,%esp
625 i586_bc2:
626 lmsw %dx
627 movb $0xfe,kernel_fpu_lock
628
629 /*
630 * This is a duplicate of the main part of generic_bcopy. See the comments
631 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and
632 * would mess up high resolution profiling.
633 */
634 ALIGN_TEXT
635 small_i586_bcopy:
636 shrl $2,%ecx
637 cld
638 rep
639 movsl
640 movl 20(%esp),%ecx
641 andl $3,%ecx
642 rep
643 movsb
644 popl %edi
645 popl %esi
646 ret
647
648 ALIGN_TEXT
649 1:
650 addl %ecx,%edi
651 addl %ecx,%esi
652 decl %edi
653 decl %esi
654 andl $3,%ecx
655 std
656 rep
657 movsb
658 movl 20(%esp),%ecx
659 shrl $2,%ecx
660 subl $3,%esi
661 subl $3,%edi
662 rep
663 movsl
664 popl %edi
665 popl %esi
666 cld
667 ret
668 #endif /* I586_CPU && defined(DEV_NPX) */
669
670 /*
671 * Note: memcpy does not support overlapping copies
672 */
673 ENTRY(memcpy)
674 pushl %edi
675 pushl %esi
676 movl 12(%esp),%edi
677 movl 16(%esp),%esi
678 movl 20(%esp),%ecx
679 movl %edi,%eax
680 shrl $2,%ecx /* copy by 32-bit words */
681 cld /* nope, copy forwards */
682 rep
683 movsl
684 movl 20(%esp),%ecx
685 andl $3,%ecx /* any bytes left? */
686 rep
687 movsb
688 popl %esi
689 popl %edi
690 ret
691
692
693 /*****************************************************************************/
694 /* copyout and fubyte family */
695 /*****************************************************************************/
696 /*
697 * Access user memory from inside the kernel. These routines and possibly
698 * the math- and DOS emulators should be the only places that do this.
699 *
700 * We have to access the memory with user's permissions, so use a segment
701 * selector with RPL 3. For writes to user space we have to additionally
702 * check the PTE for write permission, because the 386 does not check
703 * write permissions when we are executing with EPL 0. The 486 does check
704 * this if the WP bit is set in CR0, so we can use a simpler version here.
705 *
706 * These routines set curpcb->onfault for the time they execute. When a
707 * protection violation occurs inside the functions, the trap handler
708 * returns to *curpcb->onfault instead of the function.
709 */
710
711 /*
712 * copyout(from_kernel, to_user, len) - MP SAFE
713 */
714 ENTRY(copyout)
715 MEXITCOUNT
716 jmp *copyout_vector
717
718 ENTRY(generic_copyout)
719 movl PCPU(CURPCB),%eax
720 movl $copyout_fault,PCB_ONFAULT(%eax)
721 pushl %esi
722 pushl %edi
723 pushl %ebx
724 movl 16(%esp),%esi
725 movl 20(%esp),%edi
726 movl 24(%esp),%ebx
727 testl %ebx,%ebx /* anything to do? */
728 jz done_copyout
729
730 /*
731 * Check explicitly for non-user addresses. If 486 write protection
732 * is being used, this check is essential because we are in kernel
733 * mode so the h/w does not provide any protection against writing
734 * kernel addresses.
735 */
736
737 /*
738 * First, prevent address wrapping.
739 */
740 movl %edi,%eax
741 addl %ebx,%eax
742 jc copyout_fault
743 /*
744 * XXX STOP USING VM_MAXUSER_ADDRESS.
745 * It is an end address, not a max, so every time it is used correctly it
746 * looks like there is an off by one error, and of course it caused an off
747 * by one error in several places.
748 */
749 cmpl $VM_MAXUSER_ADDRESS,%eax
750 ja copyout_fault
751
752 /* bcopy(%esi, %edi, %ebx) */
753 movl %ebx,%ecx
754
755 #if defined(I586_CPU) && defined(DEV_NPX)
756 ALIGN_TEXT
757 slow_copyout:
758 #endif
759 shrl $2,%ecx
760 cld
761 rep
762 movsl
763 movb %bl,%cl
764 andb $3,%cl
765 rep
766 movsb
767
768 done_copyout:
769 popl %ebx
770 popl %edi
771 popl %esi
772 xorl %eax,%eax
773 movl PCPU(CURPCB),%edx
774 movl %eax,PCB_ONFAULT(%edx)
775 ret
776
777 ALIGN_TEXT
778 copyout_fault:
779 popl %ebx
780 popl %edi
781 popl %esi
782 movl PCPU(CURPCB),%edx
783 movl $0,PCB_ONFAULT(%edx)
784 movl $EFAULT,%eax
785 ret
786
787 #if defined(I586_CPU) && defined(DEV_NPX)
788 ENTRY(i586_copyout)
789 /*
790 * Duplicated from generic_copyout. Could be done a bit better.
791 */
792 movl PCPU(CURPCB),%eax
793 movl $copyout_fault,PCB_ONFAULT(%eax)
794 pushl %esi
795 pushl %edi
796 pushl %ebx
797 movl 16(%esp),%esi
798 movl 20(%esp),%edi
799 movl 24(%esp),%ebx
800 testl %ebx,%ebx /* anything to do? */
801 jz done_copyout
802
803 /*
804 * Check explicitly for non-user addresses. If 486 write protection
805 * is being used, this check is essential because we are in kernel
806 * mode so the h/w does not provide any protection against writing
807 * kernel addresses.
808 */
809
810 /*
811 * First, prevent address wrapping.
812 */
813 movl %edi,%eax
814 addl %ebx,%eax
815 jc copyout_fault
816 /*
817 * XXX STOP USING VM_MAXUSER_ADDRESS.
818 * It is an end address, not a max, so every time it is used correctly it
819 * looks like there is an off by one error, and of course it caused an off
820 * by one error in several places.
821 */
822 cmpl $VM_MAXUSER_ADDRESS,%eax
823 ja copyout_fault
824
825 /* bcopy(%esi, %edi, %ebx) */
826 3:
827 movl %ebx,%ecx
828 /*
829 * End of duplicated code.
830 */
831
832 cmpl $1024,%ecx
833 jb slow_copyout
834
835 pushl %ecx
836 call fastmove
837 addl $4,%esp
838 jmp done_copyout
839 #endif /* I586_CPU && defined(DEV_NPX) */
840
841 /*
842 * copyin(from_user, to_kernel, len) - MP SAFE
843 */
844 ENTRY(copyin)
845 MEXITCOUNT
846 jmp *copyin_vector
847
848 ENTRY(generic_copyin)
849 movl PCPU(CURPCB),%eax
850 movl $copyin_fault,PCB_ONFAULT(%eax)
851 pushl %esi
852 pushl %edi
853 movl 12(%esp),%esi /* caddr_t from */
854 movl 16(%esp),%edi /* caddr_t to */
855 movl 20(%esp),%ecx /* size_t len */
856
857 /*
858 * make sure address is valid
859 */
860 movl %esi,%edx
861 addl %ecx,%edx
862 jc copyin_fault
863 cmpl $VM_MAXUSER_ADDRESS,%edx
864 ja copyin_fault
865
866 #if defined(I586_CPU) && defined(DEV_NPX)
867 ALIGN_TEXT
868 slow_copyin:
869 #endif
870 movb %cl,%al
871 shrl $2,%ecx /* copy longword-wise */
872 cld
873 rep
874 movsl
875 movb %al,%cl
876 andb $3,%cl /* copy remaining bytes */
877 rep
878 movsb
879
880 #if defined(I586_CPU) && defined(DEV_NPX)
881 ALIGN_TEXT
882 done_copyin:
883 #endif
884 popl %edi
885 popl %esi
886 xorl %eax,%eax
887 movl PCPU(CURPCB),%edx
888 movl %eax,PCB_ONFAULT(%edx)
889 ret
890
891 ALIGN_TEXT
892 copyin_fault:
893 popl %edi
894 popl %esi
895 movl PCPU(CURPCB),%edx
896 movl $0,PCB_ONFAULT(%edx)
897 movl $EFAULT,%eax
898 ret
899
900 #if defined(I586_CPU) && defined(DEV_NPX)
901 ENTRY(i586_copyin)
902 /*
903 * Duplicated from generic_copyin. Could be done a bit better.
904 */
905 movl PCPU(CURPCB),%eax
906 movl $copyin_fault,PCB_ONFAULT(%eax)
907 pushl %esi
908 pushl %edi
909 movl 12(%esp),%esi /* caddr_t from */
910 movl 16(%esp),%edi /* caddr_t to */
911 movl 20(%esp),%ecx /* size_t len */
912
913 /*
914 * make sure address is valid
915 */
916 movl %esi,%edx
917 addl %ecx,%edx
918 jc copyin_fault
919 cmpl $VM_MAXUSER_ADDRESS,%edx
920 ja copyin_fault
921 /*
922 * End of duplicated code.
923 */
924
925 cmpl $1024,%ecx
926 jb slow_copyin
927
928 pushl %ebx /* XXX prepare for fastmove_fault */
929 pushl %ecx
930 call fastmove
931 addl $8,%esp
932 jmp done_copyin
933 #endif /* I586_CPU && defined(DEV_NPX) */
934
935 #if defined(I586_CPU) && defined(DEV_NPX)
936 /* fastmove(src, dst, len)
937 src in %esi
938 dst in %edi
939 len in %ecx XXX changed to on stack for profiling
940 uses %eax and %edx for tmp. storage
941 */
942 /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
943 ENTRY(fastmove)
944 pushl %ebp
945 movl %esp,%ebp
946 subl $PCB_SAVEFPU_SIZE+3*4,%esp
947
948 movl 8(%ebp),%ecx
949 cmpl $63,%ecx
950 jbe fastmove_tail
951
952 testl $7,%esi /* check if src addr is multiple of 8 */
953 jnz fastmove_tail
954
955 testl $7,%edi /* check if dst addr is multiple of 8 */
956 jnz fastmove_tail
957
958 /* XXX grab FPU context atomically. */
959 cli
960
961 /* if (fpcurthread != NULL) { */
962 cmpl $0,PCPU(FPCURTHREAD)
963 je 6f
964 /* fnsave(&curpcb->pcb_savefpu); */
965 movl PCPU(CURPCB),%eax
966 fnsave PCB_SAVEFPU(%eax)
967 /* FPCURTHREAD = NULL; */
968 movl $0,PCPU(FPCURTHREAD)
969 /* } */
970 6:
971 /* now we own the FPU. */
972
973 /*
974 * The process' FP state is saved in the pcb, but if we get
975 * switched, the cpu_switch() will store our FP state in the
976 * pcb. It should be possible to avoid all the copying for
977 * this, e.g., by setting a flag to tell cpu_switch() to
978 * save the state somewhere else.
979 */
980 /* tmp = curpcb->pcb_savefpu; */
981 movl %ecx,-12(%ebp)
982 movl %esi,-8(%ebp)
983 movl %edi,-4(%ebp)
984 movl %esp,%edi
985 movl PCPU(CURPCB),%esi
986 addl $PCB_SAVEFPU,%esi
987 cld
988 movl $PCB_SAVEFPU_SIZE>>2,%ecx
989 rep
990 movsl
991 movl -12(%ebp),%ecx
992 movl -8(%ebp),%esi
993 movl -4(%ebp),%edi
994 /* stop_emulating(); */
995 clts
996 /* fpcurthread = curthread; */
997 movl PCPU(CURTHREAD),%eax
998 movl %eax,PCPU(FPCURTHREAD)
999 movl PCPU(CURPCB),%eax
1000
1001 /* XXX end of atomic FPU context grab. */
1002 sti
1003
1004 movl $fastmove_fault,PCB_ONFAULT(%eax)
1005 4:
1006 movl %ecx,-12(%ebp)
1007 cmpl $1792,%ecx
1008 jbe 2f
1009 movl $1792,%ecx
1010 2:
1011 subl %ecx,-12(%ebp)
1012 cmpl $256,%ecx
1013 jb 5f
1014 movl %ecx,-8(%ebp)
1015 movl %esi,-4(%ebp)
1016 ALIGN_TEXT
1017 3:
1018 movl 0(%esi),%eax
1019 movl 32(%esi),%eax
1020 movl 64(%esi),%eax
1021 movl 96(%esi),%eax
1022 movl 128(%esi),%eax
1023 movl 160(%esi),%eax
1024 movl 192(%esi),%eax
1025 movl 224(%esi),%eax
1026 addl $256,%esi
1027 subl $256,%ecx
1028 cmpl $256,%ecx
1029 jae 3b
1030 movl -8(%ebp),%ecx
1031 movl -4(%ebp),%esi
1032 5:
1033 ALIGN_TEXT
1034 fastmove_loop:
1035 fildq 0(%esi)
1036 fildq 8(%esi)
1037 fildq 16(%esi)
1038 fildq 24(%esi)
1039 fildq 32(%esi)
1040 fildq 40(%esi)
1041 fildq 48(%esi)
1042 fildq 56(%esi)
1043 fistpq 56(%edi)
1044 fistpq 48(%edi)
1045 fistpq 40(%edi)
1046 fistpq 32(%edi)
1047 fistpq 24(%edi)
1048 fistpq 16(%edi)
1049 fistpq 8(%edi)
1050 fistpq 0(%edi)
1051 addl $-64,%ecx
1052 addl $64,%esi
1053 addl $64,%edi
1054 cmpl $63,%ecx
1055 ja fastmove_loop
1056 movl -12(%ebp),%eax
1057 addl %eax,%ecx
1058 cmpl $64,%ecx
1059 jae 4b
1060
1061 /* XXX ungrab FPU context atomically. */
1062 cli
1063
1064 /* curpcb->pcb_savefpu = tmp; */
1065 movl %ecx,-12(%ebp)
1066 movl %esi,-8(%ebp)
1067 movl %edi,-4(%ebp)
1068 movl PCPU(CURPCB),%edi
1069 addl $PCB_SAVEFPU,%edi
1070 movl %esp,%esi
1071 cld
1072 movl $PCB_SAVEFPU_SIZE>>2,%ecx
1073 rep
1074 movsl
1075 movl -12(%ebp),%ecx
1076 movl -8(%ebp),%esi
1077 movl -4(%ebp),%edi
1078
1079 /* start_emulating(); */
1080 smsw %ax
1081 orb $CR0_TS,%al
1082 lmsw %ax
1083 /* fpcurthread = NULL; */
1084 movl $0,PCPU(FPCURTHREAD)
1085
1086 /* XXX end of atomic FPU context ungrab. */
1087 sti
1088
1089 ALIGN_TEXT
1090 fastmove_tail:
1091 movl PCPU(CURPCB),%eax
1092 movl $fastmove_tail_fault,PCB_ONFAULT(%eax)
1093
1094 movb %cl,%al
1095 shrl $2,%ecx /* copy longword-wise */
1096 cld
1097 rep
1098 movsl
1099 movb %al,%cl
1100 andb $3,%cl /* copy remaining bytes */
1101 rep
1102 movsb
1103
1104 movl %ebp,%esp
1105 popl %ebp
1106 ret
1107
1108 ALIGN_TEXT
1109 fastmove_fault:
1110 /* XXX ungrab FPU context atomically. */
1111 cli
1112
1113 movl PCPU(CURPCB),%edi
1114 addl $PCB_SAVEFPU,%edi
1115 movl %esp,%esi
1116 cld
1117 movl $PCB_SAVEFPU_SIZE>>2,%ecx
1118 rep
1119 movsl
1120
1121 smsw %ax
1122 orb $CR0_TS,%al
1123 lmsw %ax
1124 movl $0,PCPU(FPCURTHREAD)
1125
1126 /* XXX end of atomic FPU context ungrab. */
1127 sti
1128
1129 fastmove_tail_fault:
1130 movl %ebp,%esp
1131 popl %ebp
1132 addl $8,%esp
1133 popl %ebx
1134 popl %edi
1135 popl %esi
1136 movl PCPU(CURPCB),%edx
1137 movl $0,PCB_ONFAULT(%edx)
1138 movl $EFAULT,%eax
1139 ret
1140 #endif /* I586_CPU && defined(DEV_NPX) */
1141
1142 /*
1143 * casuptr. Compare and set user pointer. Returns -1 or the current value.
1144 */
1145 ENTRY(casuptr)
1146 movl PCPU(CURPCB),%ecx
1147 movl $fusufault,PCB_ONFAULT(%ecx)
1148 movl 4(%esp),%edx /* dst */
1149 movl 8(%esp),%eax /* old */
1150 movl 12(%esp),%ecx /* new */
1151
1152 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
1153 ja fusufault
1154
1155 #ifdef SMP
1156 lock
1157 #endif
1158 cmpxchgl %ecx, (%edx) /* Compare and set. */
1159
1160 /*
1161 * The old value is in %eax. If the store succeeded it will be the
1162 * value we expected (old) from before the store, otherwise it will
1163 * be the current value.
1164 */
1165
1166 movl PCPU(CURPCB),%ecx
1167 movl $fusufault,PCB_ONFAULT(%ecx)
1168 movl $0,PCB_ONFAULT(%ecx)
1169 ret
1170
1171 /*
1172 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
1173 * memory. All these functions are MPSAFE.
1174 */
1175
1176 ALTENTRY(fuword32)
1177 ENTRY(fuword)
1178 movl PCPU(CURPCB),%ecx
1179 movl $fusufault,PCB_ONFAULT(%ecx)
1180 movl 4(%esp),%edx /* from */
1181
1182 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
1183 ja fusufault
1184
1185 movl (%edx),%eax
1186 movl $0,PCB_ONFAULT(%ecx)
1187 ret
1188
1189 /*
1190 * fuswintr() and suswintr() are specialized variants of fuword16() and
1191 * suword16(), respectively. They are called from the profiling code,
1192 * potentially at interrupt time. If they fail, that's okay; good things
1193 * will happen later. They always fail for now, until the trap code is
1194 * able to deal with this.
1195 */
1196 ALTENTRY(suswintr)
1197 ENTRY(fuswintr)
1198 movl $-1,%eax
1199 ret
1200
1201 ENTRY(fuword16)
1202 movl PCPU(CURPCB),%ecx
1203 movl $fusufault,PCB_ONFAULT(%ecx)
1204 movl 4(%esp),%edx
1205
1206 cmpl $VM_MAXUSER_ADDRESS-2,%edx
1207 ja fusufault
1208
1209 movzwl (%edx),%eax
1210 movl $0,PCB_ONFAULT(%ecx)
1211 ret
1212
1213 ENTRY(fubyte)
1214 movl PCPU(CURPCB),%ecx
1215 movl $fusufault,PCB_ONFAULT(%ecx)
1216 movl 4(%esp),%edx
1217
1218 cmpl $VM_MAXUSER_ADDRESS-1,%edx
1219 ja fusufault
1220
1221 movzbl (%edx),%eax
1222 movl $0,PCB_ONFAULT(%ecx)
1223 ret
1224
1225 ALIGN_TEXT
1226 fusufault:
1227 movl PCPU(CURPCB),%ecx
1228 xorl %eax,%eax
1229 movl %eax,PCB_ONFAULT(%ecx)
1230 decl %eax
1231 ret
1232
1233 /*
1234 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
1235 * All these functions are MPSAFE.
1236 */
1237
1238 ALTENTRY(suword32)
1239 ENTRY(suword)
1240 movl PCPU(CURPCB),%ecx
1241 movl $fusufault,PCB_ONFAULT(%ecx)
1242 movl 4(%esp),%edx
1243
1244 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
1245 ja fusufault
1246
1247 movl 8(%esp),%eax
1248 movl %eax,(%edx)
1249 xorl %eax,%eax
1250 movl PCPU(CURPCB),%ecx
1251 movl %eax,PCB_ONFAULT(%ecx)
1252 ret
1253
1254 ENTRY(suword16)
1255 movl PCPU(CURPCB),%ecx
1256 movl $fusufault,PCB_ONFAULT(%ecx)
1257 movl 4(%esp),%edx
1258
1259 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
1260 ja fusufault
1261
1262 movw 8(%esp),%ax
1263 movw %ax,(%edx)
1264 xorl %eax,%eax
1265 movl PCPU(CURPCB),%ecx /* restore trashed register */
1266 movl %eax,PCB_ONFAULT(%ecx)
1267 ret
1268
1269 ENTRY(subyte)
1270 movl PCPU(CURPCB),%ecx
1271 movl $fusufault,PCB_ONFAULT(%ecx)
1272 movl 4(%esp),%edx
1273
1274 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
1275 ja fusufault
1276
1277 movb 8(%esp),%al
1278 movb %al,(%edx)
1279 xorl %eax,%eax
1280 movl PCPU(CURPCB),%ecx /* restore trashed register */
1281 movl %eax,PCB_ONFAULT(%ecx)
1282 ret
1283
1284 /*
1285 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1286 *
1287 * copy a string from from to to, stop when a 0 character is reached.
1288 * return ENAMETOOLONG if string is longer than maxlen, and
1289 * EFAULT on protection violations. If lencopied is non-zero,
1290 * return the actual length in *lencopied.
1291 */
1292 ENTRY(copyinstr)
1293 pushl %esi
1294 pushl %edi
1295 movl PCPU(CURPCB),%ecx
1296 movl $cpystrflt,PCB_ONFAULT(%ecx)
1297
1298 movl 12(%esp),%esi /* %esi = from */
1299 movl 16(%esp),%edi /* %edi = to */
1300 movl 20(%esp),%edx /* %edx = maxlen */
1301
1302 movl $VM_MAXUSER_ADDRESS,%eax
1303
1304 /* make sure 'from' is within bounds */
1305 subl %esi,%eax
1306 jbe cpystrflt
1307
1308 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1309 cmpl %edx,%eax
1310 jae 1f
1311 movl %eax,%edx
1312 movl %eax,20(%esp)
1313 1:
1314 incl %edx
1315 cld
1316
1317 2:
1318 decl %edx
1319 jz 3f
1320
1321 lodsb
1322 stosb
1323 orb %al,%al
1324 jnz 2b
1325
1326 /* Success -- 0 byte reached */
1327 decl %edx
1328 xorl %eax,%eax
1329 jmp cpystrflt_x
1330 3:
1331 /* edx is zero - return ENAMETOOLONG or EFAULT */
1332 cmpl $VM_MAXUSER_ADDRESS,%esi
1333 jae cpystrflt
1334 4:
1335 movl $ENAMETOOLONG,%eax
1336 jmp cpystrflt_x
1337
1338 cpystrflt:
1339 movl $EFAULT,%eax
1340
1341 cpystrflt_x:
1342 /* set *lencopied and return %eax */
1343 movl PCPU(CURPCB),%ecx
1344 movl $0,PCB_ONFAULT(%ecx)
1345 movl 20(%esp),%ecx
1346 subl %edx,%ecx
1347 movl 24(%esp),%edx
1348 testl %edx,%edx
1349 jz 1f
1350 movl %ecx,(%edx)
1351 1:
1352 popl %edi
1353 popl %esi
1354 ret
1355
1356
1357 /*
1358 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1359 */
1360 ENTRY(copystr)
1361 pushl %esi
1362 pushl %edi
1363
1364 movl 12(%esp),%esi /* %esi = from */
1365 movl 16(%esp),%edi /* %edi = to */
1366 movl 20(%esp),%edx /* %edx = maxlen */
1367 incl %edx
1368 cld
1369 1:
1370 decl %edx
1371 jz 4f
1372 lodsb
1373 stosb
1374 orb %al,%al
1375 jnz 1b
1376
1377 /* Success -- 0 byte reached */
1378 decl %edx
1379 xorl %eax,%eax
1380 jmp 6f
1381 4:
1382 /* edx is zero -- return ENAMETOOLONG */
1383 movl $ENAMETOOLONG,%eax
1384
1385 6:
1386 /* set *lencopied and return %eax */
1387 movl 20(%esp),%ecx
1388 subl %edx,%ecx
1389 movl 24(%esp),%edx
1390 testl %edx,%edx
1391 jz 7f
1392 movl %ecx,(%edx)
1393 7:
1394 popl %edi
1395 popl %esi
1396 ret
1397
1398 ENTRY(bcmp)
1399 pushl %edi
1400 pushl %esi
1401 movl 12(%esp),%edi
1402 movl 16(%esp),%esi
1403 movl 20(%esp),%edx
1404
1405 movl %edx,%ecx
1406 shrl $2,%ecx
1407 cld /* compare forwards */
1408 repe
1409 cmpsl
1410 jne 1f
1411
1412 movl %edx,%ecx
1413 andl $3,%ecx
1414 repe
1415 cmpsb
1416 1:
1417 setne %al
1418 movsbl %al,%eax
1419 popl %esi
1420 popl %edi
1421 ret
1422
1423
1424 /*
1425 * Handling of special 386 registers and descriptor tables etc
1426 */
1427 /* void lgdt(struct region_descriptor *rdp); */
1428 ENTRY(lgdt)
1429 /* reload the descriptor table */
1430 movl 4(%esp),%eax
1431 lgdt (%eax)
1432
1433 /* flush the prefetch q */
1434 jmp 1f
1435 nop
1436 1:
1437 /* reload "stale" selectors */
1438 movl $KDSEL,%eax
1439 movl %eax,%ds
1440 movl %eax,%es
1441 movl %eax,%gs
1442 movl %eax,%ss
1443 movl $KPSEL,%eax
1444 movl %eax,%fs
1445
1446 /* reload code selector by turning return into intersegmental return */
1447 movl (%esp),%eax
1448 pushl %eax
1449 movl $KCSEL,4(%esp)
1450 MEXITCOUNT
1451 lret
1452
1453 /* ssdtosd(*ssdp,*sdp) */
1454 ENTRY(ssdtosd)
1455 pushl %ebx
1456 movl 8(%esp),%ecx
1457 movl 8(%ecx),%ebx
1458 shll $16,%ebx
1459 movl (%ecx),%edx
1460 roll $16,%edx
1461 movb %dh,%bl
1462 movb %dl,%bh
1463 rorl $8,%ebx
1464 movl 4(%ecx),%eax
1465 movw %ax,%dx
1466 andl $0xf0000,%eax
1467 orl %eax,%ebx
1468 movl 12(%esp),%ecx
1469 movl %edx,(%ecx)
1470 movl %ebx,4(%ecx)
1471 popl %ebx
1472 ret
1473
1474 /* void reset_dbregs() */
1475 ENTRY(reset_dbregs)
1476 movl $0,%eax
1477 movl %eax,%dr7 /* disable all breapoints first */
1478 movl %eax,%dr0
1479 movl %eax,%dr1
1480 movl %eax,%dr2
1481 movl %eax,%dr3
1482 movl %eax,%dr6
1483 ret
1484
1485 /*****************************************************************************/
1486 /* setjump, longjump */
1487 /*****************************************************************************/
1488
1489 ENTRY(setjmp)
1490 movl 4(%esp),%eax
1491 movl %ebx,(%eax) /* save ebx */
1492 movl %esp,4(%eax) /* save esp */
1493 movl %ebp,8(%eax) /* save ebp */
1494 movl %esi,12(%eax) /* save esi */
1495 movl %edi,16(%eax) /* save edi */
1496 movl (%esp),%edx /* get rta */
1497 movl %edx,20(%eax) /* save eip */
1498 xorl %eax,%eax /* return(0); */
1499 ret
1500
1501 ENTRY(longjmp)
1502 movl 4(%esp),%eax
1503 movl (%eax),%ebx /* restore ebx */
1504 movl 4(%eax),%esp /* restore esp */
1505 movl 8(%eax),%ebp /* restore ebp */
1506 movl 12(%eax),%esi /* restore esi */
1507 movl 16(%eax),%edi /* restore edi */
1508 movl 20(%eax),%edx /* get rta */
1509 movl %edx,(%esp) /* put in return frame */
1510 xorl %eax,%eax /* return(1); */
1511 incl %eax
1512 ret
1513
1514 /*
1515 * Support for BB-profiling (gcc -a). The kernbb program will extract
1516 * the data from the kernel.
1517 */
1518
1519 .data
1520 ALIGN_DATA
1521 .globl bbhead
1522 bbhead:
1523 .long 0
1524
1525 .text
1526 NON_GPROF_ENTRY(__bb_init_func)
1527 movl 4(%esp),%eax
1528 movl $1,(%eax)
1529 movl bbhead,%edx
1530 movl %edx,16(%eax)
1531 movl %eax,bbhead
1532 NON_GPROF_RET
Cache object: d1b8a437ad836fec31b16e860c9d1cb7
|