1 /*-
2 * Copyright (c) 2001 Jake Burkholder.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <machine/asm.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/errno.h>
31
32 #include <machine/asi.h>
33 #include <machine/asmacros.h>
34 #include <machine/fsr.h>
35 #include <machine/intr_machdep.h>
36 #include <machine/pcb.h>
37 #include <machine/pstate.h>
38 #include <machine/wstate.h>
39
40 #include "assym.s"
41
42 .register %g2, #ignore
43 .register %g3, #ignore
44 .register %g6, #ignore
45
46 /*
47 * Common code for copy routines.
48 *
49 * We use large macros to generate functions for each of the copy routines.
50 * This allows the load and store instructions to be generated for the right
51 * operation, asi or not. It is possible to write an asi independent function
52 * but this would require 2 expensive wrs in the main loop to switch %asi.
53 * It would also screw up profiling (if we ever get it), but may save some I$.
54 * We assume that either one of dasi and sasi is empty, or that they are both
55 * the same (empty or non-empty). It is up to the caller to set %asi.
56 */
57
58 /*
59 * ASI independent implementation of copystr(9).
60 * Used to implement copyinstr() and copystr().
61 *
62 * Return value is in %g1.
63 */
64 #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
65 brz len, 4f ; \
66 mov src, %g2 ; \
67 1: deccc 1, len ; \
68 bl,a,pn %xcc, 3f ; \
69 nop ; \
70 LD(ub, sa) [src] sasi, %g1 ; \
71 ST(b, da) %g1, [dst] dasi ; \
72 brz,pn %g1, 3f ; \
73 inc src ; \
74 ba %xcc, 1b ; \
75 inc dst ; \
76 2: mov ENAMETOOLONG, %g1 ; \
77 3: sub src, %g2, %g2 ; \
78 brnz,a done, 4f ; \
79 stx %g2, [done] ; \
80 4:
81
82 /*
83 * ASI independent implementation of memset(3).
84 * Used to implement bzero(), memset() and aszero().
85 *
86 * If the pattern is non-zero, duplicate it to fill 64 bits.
87 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
88 * It has yet to be determined how much unrolling is beneficial.
89 * Could also read and compare before writing to minimize snoop traffic.
90 *
91 * XXX bzero() should be implemented as
92 * #define bzero(dst, len) (void)memset((dst), 0, (len))
93 * if at all.
94 */
95 #define _MEMSET(dst, pat, len, da, dasi) \
96 brlez,pn len, 5f ; \
97 and pat, 0xff, pat ; \
98 brz,pt pat, 1f ; \
99 sllx pat, 8, %g1 ; \
100 or pat, %g1, pat ; \
101 sllx pat, 16, %g1 ; \
102 or pat, %g1, pat ; \
103 sllx pat, 32, %g1 ; \
104 or pat, %g1, pat ; \
105 .align 16 ; \
106 1: deccc 1, len ; \
107 bl,pn %xcc, 5f ; \
108 btst 7, dst ; \
109 bz,a,pt %xcc, 2f ; \
110 inc 1, len ; \
111 ST(b, da) pat, [dst] dasi ; \
112 ba %xcc, 1b ; \
113 inc dst ; \
114 .align 16 ; \
115 2: deccc 32, len ; \
116 bl,a,pn %xcc, 3f ; \
117 inc 32, len ; \
118 ST(x, da) pat, [dst] dasi ; \
119 ST(x, da) pat, [dst + 8] dasi ; \
120 ST(x, da) pat, [dst + 16] dasi ; \
121 ST(x, da) pat, [dst + 24] dasi ; \
122 ba %xcc, 2b ; \
123 inc 32, dst ; \
124 .align 16 ; \
125 3: deccc 8, len ; \
126 bl,a,pn %xcc, 4f ; \
127 inc 8, len ; \
128 ST(x, da) pat, [dst] dasi ; \
129 ba %xcc, 3b ; \
130 inc 8, dst ; \
131 .align 16 ; \
132 4: deccc 1, len ; \
133 bl,a,pn %xcc, 5f ; \
134 nop ; \
135 ST(b, da) pat, [dst] dasi ; \
136 ba %xcc, 4b ; \
137 inc 1, dst ; \
138 5:
139
140 /*
141 * ASI independent implementation of memcpy(3).
142 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
143 * ascopyfrom() and ascopyto().
144 *
145 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte
146 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned
147 * case could be optimized, but it is expected that this is the uncommon
148 * case and of questionable value. The code to do so is also rather large
149 * and ugly. It has yet to be determined how much unrolling is beneficial.
150 *
151 * XXX bcopy() must also check for overlap. This is stupid.
152 * XXX bcopy() should be implemented as
153 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
154 * if at all.
155 */
156 #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \
157 1: deccc 1, len ; \
158 bl,pn %xcc, 6f ; \
159 btst 7, dst ; \
160 bz,a,pt %xcc, 2f ; \
161 inc 1, len ; \
162 LD(ub, sa) [src] sasi, %g1 ; \
163 ST(b, da) %g1, [dst] dasi ; \
164 inc 1, src ; \
165 ba %xcc, 1b ; \
166 inc 1, dst ; \
167 .align 16 ; \
168 2: btst 7, src ; \
169 bz,a,pt %xcc, 3f ; \
170 nop ; \
171 ba,a %xcc, 5f ; \
172 .align 16 ; \
173 3: deccc 32, len ; \
174 bl,a,pn %xcc, 4f ; \
175 inc 32, len ; \
176 LD(x, sa) [src] sasi, %g1 ; \
177 LD(x, sa) [src + 8] sasi, %g2 ; \
178 LD(x, sa) [src + 16] sasi, %g3 ; \
179 LD(x, sa) [src + 24] sasi, %g4 ; \
180 ST(x, da) %g1, [dst] dasi ; \
181 ST(x, da) %g2, [dst + 8] dasi ; \
182 ST(x, da) %g3, [dst + 16] dasi ; \
183 ST(x, da) %g4, [dst + 24] dasi ; \
184 inc 32, src ; \
185 ba %xcc, 3b ; \
186 inc 32, dst ; \
187 .align 16 ; \
188 4: deccc 8, len ; \
189 bl,a,pn %xcc, 5f ; \
190 inc 8, len ; \
191 LD(x, sa) [src] sasi, %g1 ; \
192 ST(x, da) %g1, [dst] dasi ; \
193 inc 8, src ; \
194 ba %xcc, 4b ; \
195 inc 8, dst ; \
196 .align 16 ; \
197 5: deccc 1, len ; \
198 bl,a,pn %xcc, 6f ; \
199 nop ; \
200 LD(ub, sa) [src] sasi, %g1 ; \
201 ST(b, da) %g1, [dst] dasi ; \
202 inc src ; \
203 ba %xcc, 5b ; \
204 inc dst ; \
205 6:
206
207 /*
208 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
209 */
210 ENTRY(ascopy)
211 wr %o0, 0, %asi
212 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
213 retl
214 nop
215 END(ascopy)
216
217 /*
218 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
219 */
220 ENTRY(ascopyfrom)
221 wr %o0, 0, %asi
222 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
223 retl
224 nop
225 END(ascopyfrom)
226
227 /*
228 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
229 */
230 ENTRY(ascopyto)
231 wr %o1, 0, %asi
232 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
233 retl
234 nop
235 END(ascopyto)
236
237 /*
238 * void aszero(u_long asi, vm_offset_t pa, size_t len)
239 */
240 ENTRY(aszero)
241 wr %o0, 0, %asi
242 _MEMSET(%o1, %g0, %o2, a, %asi)
243 retl
244 nop
245 END(aszero)
246
247 /*
248 * int bcmp(const void *b1, const void *b2, size_t len)
249 */
250 ENTRY(bcmp)
251 brz,pn %o2, 2f
252 clr %o3
253 1: ldub [%o0 + %o3], %o4
254 ldub [%o1 + %o3], %o5
255 cmp %o4, %o5
256 bne,pn %xcc, 2f
257 inc %o3
258 deccc %o2
259 bne,pt %xcc, 1b
260 nop
261 2: retl
262 mov %o2, %o0
263 END(bcmp)
264
265 /*
266 * void bcopy(const void *src, void *dst, size_t len)
267 */
268 ENTRY(bcopy)
269 /*
270 * Check for overlap, and copy backwards if so.
271 */
272 sub %o1, %o0, %g1
273 cmp %g1, %o2
274 bgeu,a,pt %xcc, 3f
275 nop
276
277 /*
278 * Copy backwards.
279 */
280 add %o0, %o2, %o0
281 add %o1, %o2, %o1
282 1: deccc 1, %o2
283 bl,a,pn %xcc, 2f
284 nop
285 dec 1, %o0
286 ldub [%o0], %g1
287 dec 1, %o1
288 ba %xcc, 1b
289 stb %g1, [%o1]
290 2: retl
291 nop
292
293 /*
294 * Do the fast version.
295 */
296 3: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
297 retl
298 nop
299 END(bcopy)
300
301 /*
302 * void bzero(void *b, size_t len)
303 */
304 ENTRY(bzero)
305 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
306 retl
307 nop
308 END(bzero)
309
310 /*
311 * int copystr(const void *src, void *dst, size_t len, size_t *done)
312 */
313 ENTRY(copystr)
314 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
315 retl
316 mov %g1, %o0
317 END(copystr)
318
319 /*
320 * void *memcpy(void *dst, const void *src, size_t len)
321 */
322 ENTRY(memcpy)
323 mov %o0, %o3
324 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
325 retl
326 nop
327 END(memcpy)
328
329 /*
330 * void *memset(void *b, int c, size_t len)
331 */
332 ENTRY(memset)
333 mov %o0, %o3
334 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
335 retl
336 nop
337 END(memset)
338
339 .globl copy_nofault_begin
340 copy_nofault_begin:
341 nop
342
343 /*
344 * int copyin(const void *uaddr, void *kaddr, size_t len)
345 */
346 ENTRY(copyin)
347 wr %g0, ASI_AIUP, %asi
348 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
349 retl
350 clr %o0
351 END(copyin)
352
353 /*
354 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
355 */
356 ENTRY(copyinstr)
357 wr %g0, ASI_AIUP, %asi
358 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
359 retl
360 mov %g1, %o0
361 END(copyinstr)
362
363 /*
364 * int copyout(const void *kaddr, void *uaddr, size_t len)
365 */
366 ENTRY(copyout)
367 wr %g0, ASI_AIUP, %asi
368 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
369 retl
370 clr %o0
371 END(copyout)
372
373 .globl copy_nofault_end
374 copy_nofault_end:
375 nop
376
377 ENTRY(copy_fault)
378 retl
379 mov EFAULT, %o0
380 END(copy_fault)
381
382 .globl fs_nofault_begin
383 fs_nofault_begin:
384 nop
385
386 /*
387 * Chatty aliases for fetch, store functions.
388 */
389 .globl fubyte, fusword, fuword, subyte, susword, suword
390 .set fubyte, fuword8
391 .set fusword, fuword16
392 .set fuword, fuword64
393 .set subyte, suword8
394 .set susword, suword16
395 .set suword, suword64
396
397 .globl casuword32, casuword, fuptr, suptr
398 .set casuword, casuword64
399 .set fuptr, fuword64
400 .set suptr, suword64
401
402 /*
403 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
404 */
405 ENTRY(casuword32)
406 casa [%o0] ASI_AIUP, %o1, %o2
407 retl
408 mov %o2, %o0
409 END(casuword32)
410
411 /*
412 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
413 */
414 ENTRY(casuword64)
415 casxa [%o0] ASI_AIUP, %o1, %o2
416 retl
417 mov %o2, %o0
418 END(casuword64)
419
420 /*
421 * int fuword8(const void *base)
422 */
423 ENTRY(fuword8)
424 retl
425 lduba [%o0] ASI_AIUP, %o0
426 END(fuword8)
427
428 /*
429 * int fuword16(const void *base)
430 */
431 ENTRY(fuword16)
432 retl
433 lduha [%o0] ASI_AIUP, %o0
434 END(fuword16)
435
436 /*
437 * int32_t fuword32(const void *base)
438 */
439 ENTRY(fuword32)
440 retl
441 lduwa [%o0] ASI_AIUP, %o0
442 END(fuword32)
443
444 /*
445 * int64_t fuword64(const void *base)
446 */
447 ENTRY(fuword64)
448 retl
449 ldxa [%o0] ASI_AIUP, %o0
450 END(fuword64)
451
452 /*
453 * int suword8(const void *base, int word)
454 */
455 ENTRY(suword8)
456 stba %o1, [%o0] ASI_AIUP
457 retl
458 clr %o0
459 END(suword8)
460
461 /*
462 * int suword16(const void *base, int word)
463 */
464 ENTRY(suword16)
465 stha %o1, [%o0] ASI_AIUP
466 retl
467 clr %o0
468 END(suword16)
469
470 /*
471 * int suword32(const void *base, int32_t word)
472 */
473 ENTRY(suword32)
474 stwa %o1, [%o0] ASI_AIUP
475 retl
476 clr %o0
477 END(suword32)
478
479 /*
480 * int suword64(const void *base, int64_t word)
481 */
482 ENTRY(suword64)
483 stxa %o1, [%o0] ASI_AIUP
484 retl
485 clr %o0
486 END(suword64)
487
488 .globl fs_nofault_intr_begin
489 fs_nofault_intr_begin:
490 nop
491
492 /*
493 * int fuswintr(const void *base)
494 */
495 ENTRY(fuswintr)
496 retl
497 lduha [%o0] ASI_AIUP, %o0
498 END(fuswintr)
499
500 /*
501 * int suswintr(const void *base, int word)
502 */
503 ENTRY(suswintr)
504 stha %o1, [%o0] ASI_AIUP
505 retl
506 clr %o0
507 END(suswintr)
508
509 .globl fs_nofault_intr_end
510 fs_nofault_intr_end:
511 nop
512
513 .globl fs_nofault_end
514 fs_nofault_end:
515 nop
516
517 ENTRY(fs_fault)
518 retl
519 mov -1, %o0
520 END(fsfault)
521
522 .globl fas_nofault_begin
523 fas_nofault_begin:
524
525 /*
526 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
527 */
528 ENTRY(fasword8)
529 wr %o0, 0, %asi
530 membar #Sync
531 lduba [%o1] %asi, %o3
532 membar #Sync
533 stb %o3, [%o2]
534 retl
535 clr %o0
536 END(fasword8)
537
538 /*
539 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
540 */
541 ENTRY(fasword16)
542 wr %o0, 0, %asi
543 membar #Sync
544 lduha [%o1] %asi, %o3
545 membar #Sync
546 sth %o3, [%o2]
547 retl
548 clr %o0
549 END(fasword16)
550
551 /*
552 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
553 */
554 ENTRY(fasword32)
555 wr %o0, 0, %asi
556 membar #Sync
557 lduwa [%o1] %asi, %o3
558 membar #Sync
559 stw %o3, [%o2]
560 retl
561 clr %o0
562 END(fasword32)
563
564 .globl fas_nofault_end
565 fas_nofault_end:
566 nop
567
568 .globl fas_fault
569 ENTRY(fas_fault)
570 retl
571 mov -1, %o0
572 END(fas_fault)
573
574 .globl fpu_fault_begin
575 fpu_fault_begin:
576 nop
577
578 /*
579 * void spitfire_block_copy(void *src, void *dst, size_t len)
580 */
581 ENTRY(spitfire_block_copy)
582 rdpr %pstate, %o3
583 wrpr %g0, PSTATE_NORMAL, %pstate
584
585 wr %g0, ASI_BLK_S, %asi
586 wr %g0, FPRS_FEF, %fprs
587
588 sub PCB_REG, TF_SIZEOF, %o4
589 ldx [%o4 + TF_FPRS], %o5
590 andcc %o5, FPRS_FEF, %g0
591 bz,a,pt %xcc, 1f
592 nop
593 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
594 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
595 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
596 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
597 membar #Sync
598
599 andn %o5, FPRS_FEF, %o5
600 stx %o5, [%o4 + TF_FPRS]
601 ldx [PCB_REG + PCB_FLAGS], %o4
602 or %o4, PCB_FEF, %o4
603 stx %o4, [PCB_REG + PCB_FLAGS]
604
605 1: wrpr %o3, 0, %pstate
606
607 ldda [%o0] %asi, %f0
608 add %o0, VIS_BLOCKSIZE, %o0
609 sub %o2, VIS_BLOCKSIZE, %o2
610
611 2: ldda [%o0] %asi, %f16
612 fsrc1 %f0, %f32
613 fsrc1 %f2, %f34
614 fsrc1 %f4, %f36
615 fsrc1 %f6, %f38
616 fsrc1 %f8, %f40
617 fsrc1 %f10, %f42
618 fsrc1 %f12, %f44
619 fsrc1 %f14, %f46
620 stda %f32, [%o1] %asi
621 add %o0, VIS_BLOCKSIZE, %o0
622 subcc %o2, VIS_BLOCKSIZE, %o2
623 bz,pn %xcc, 3f
624 add %o1, VIS_BLOCKSIZE, %o1
625 ldda [%o0] %asi, %f0
626 fsrc1 %f16, %f32
627 fsrc1 %f18, %f34
628 fsrc1 %f20, %f36
629 fsrc1 %f22, %f38
630 fsrc1 %f24, %f40
631 fsrc1 %f26, %f42
632 fsrc1 %f28, %f44
633 fsrc1 %f30, %f46
634 stda %f32, [%o1] %asi
635 add %o0, VIS_BLOCKSIZE, %o0
636 sub %o2, VIS_BLOCKSIZE, %o2
637 ba,pt %xcc, 2b
638 add %o1, VIS_BLOCKSIZE, %o1
639
640 3: membar #Sync
641
642 stda %f16, [%o1] %asi
643 membar #Sync
644
645 retl
646 wr %g0, 0, %fprs
647 END(spitfire_block_copy)
648
649 /*
650 * void zeus_block_copy(void *src, void *dst, size_t len)
651 */
652 ENTRY(zeus_block_copy)
653 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
654
655 rdpr %pstate, %o3
656 wrpr %g0, PSTATE_NORMAL, %pstate
657
658 wr %g0, ASI_BLK_S, %asi
659 wr %g0, FPRS_FEF, %fprs
660
661 sub PCB_REG, TF_SIZEOF, %o4
662 ldx [%o4 + TF_FPRS], %o5
663 andcc %o5, FPRS_FEF, %g0
664 bz,a,pt %xcc, 1f
665 nop
666 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
667 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
668 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
669 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
670 membar #Sync
671
672 andn %o5, FPRS_FEF, %o5
673 stx %o5, [%o4 + TF_FPRS]
674 ldx [PCB_REG + PCB_FLAGS], %o4
675 or %o4, PCB_FEF, %o4
676 stx %o4, [PCB_REG + PCB_FLAGS]
677
678 1: wrpr %o3, 0, %pstate
679
680 ldd [%o0 + (0 * 8)], %f0
681 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
682 ldd [%o0 + (1 * 8)], %f2
683 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
684 fmovd %f0, %f32
685 ldd [%o0 + (2 * 8)], %f4
686 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
687 fmovd %f2, %f34
688 ldd [%o0 + (3 * 8)], %f6
689 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
690 fmovd %f4, %f36
691 ldd [%o0 + (4 * 8)], %f8
692 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
693 fmovd %f6, %f38
694 ldd [%o0 + (5 * 8)], %f10
695 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
696 fmovd %f8, %f40
697 ldd [%o0 + (6 * 8)], %f12
698 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
699 fmovd %f10, %f42
700 ldd [%o0 + (7 * 8)], %f14
701 ldd [%o0 + (8 * 8)], %f0
702 sub %o2, VIS_BLOCKSIZE, %o2
703 add %o0, VIS_BLOCKSIZE, %o0
704 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
705 ba,pt %xcc, 2f
706 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
707 .align 32
708
709 2: ldd [%o0 + (1 * 8)], %f2
710 fmovd %f12, %f44
711 ldd [%o0 + (2 * 8)], %f4
712 fmovd %f14, %f46
713 stda %f32, [%o1] %asi
714 ldd [%o0 + (3 * 8)], %f6
715 fmovd %f0, %f32
716 ldd [%o0 + (4 * 8)], %f8
717 fmovd %f2, %f34
718 ldd [%o0 + (5 * 8)], %f10
719 fmovd %f4, %f36
720 ldd [%o0 + (6 * 8)], %f12
721 fmovd %f6, %f38
722 ldd [%o0 + (7 * 8)], %f14
723 fmovd %f8, %f40
724 ldd [%o0 + (8 * 8)], %f0
725 fmovd %f10, %f42
726 sub %o2, VIS_BLOCKSIZE, %o2
727 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
728 add %o1, VIS_BLOCKSIZE, %o1
729 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
730 add %o0, VIS_BLOCKSIZE, %o0
731 cmp %o2, VIS_BLOCKSIZE + 8
732 bgu,pt %xcc, 2b
733 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
734 ldd [%o0 + (1 * 8)], %f2
735 fsrc1 %f12, %f44
736 ldd [%o0 + (2 * 8)], %f4
737 fsrc1 %f14, %f46
738 stda %f32, [%o1] %asi
739 ldd [%o0 + (3 * 8)], %f6
740 fsrc1 %f0, %f32
741 ldd [%o0 + (4 * 8)], %f8
742 fsrc1 %f2, %f34
743 ldd [%o0 + (5 * 8)], %f10
744 fsrc1 %f4, %f36
745 ldd [%o0 + (6 * 8)], %f12
746 fsrc1 %f6, %f38
747 ldd [%o0 + (7 * 8)], %f14
748 fsrc1 %f8, %f40
749 add %o1, VIS_BLOCKSIZE, %o1
750 fsrc1 %f10, %f42
751 fsrc1 %f12, %f44
752 fsrc1 %f14, %f46
753 stda %f32, [%o1] %asi
754 membar #Sync
755
756 retl
757 wr %g0, 0, %fprs
758 END(zeus_block_copy)
759
760 /*
761 * void spitfire_block_zero(void *dst, size_t len)
762 * void zeus_block_zero(void *dst, size_t len)
763 */
764 ALTENTRY(zeus_block_zero)
765 ENTRY(spitfire_block_zero)
766 rdpr %pstate, %o3
767 wrpr %g0, PSTATE_NORMAL, %pstate
768
769 wr %g0, ASI_BLK_S, %asi
770 wr %g0, FPRS_FEF, %fprs
771
772 sub PCB_REG, TF_SIZEOF, %o4
773 ldx [%o4 + TF_FPRS], %o5
774 andcc %o5, FPRS_FEF, %g0
775 bz,a,pt %xcc, 1f
776 nop
777 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
778 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
779 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
780 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
781 membar #Sync
782
783 andn %o5, FPRS_FEF, %o5
784 stx %o5, [%o4 + TF_FPRS]
785 ldx [PCB_REG + PCB_FLAGS], %o4
786 or %o4, PCB_FEF, %o4
787 stx %o4, [PCB_REG + PCB_FLAGS]
788
789 1: wrpr %o3, 0, %pstate
790
791 fzero %f0
792 fzero %f2
793 fzero %f4
794 fzero %f6
795 fzero %f8
796 fzero %f10
797 fzero %f12
798 fzero %f14
799
800 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
801 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
802 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
803 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
804 sub %o1, (4 * VIS_BLOCKSIZE), %o1
805 brnz,pt %o1, 1b
806 add %o0, (4 * VIS_BLOCKSIZE), %o0
807 membar #Sync
808
809 retl
810 wr %g0, 0, %fprs
811 END(spitfire_block_zero)
812
813 .globl fpu_fault_end
814 fpu_fault_end:
815 nop
816
817 .globl fpu_fault_size
818 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin
819
820 ENTRY(longjmp)
821 set 1, %g3
822 movrz %o1, %o1, %g3
823 mov %o0, %g1
824 ldx [%g1 + _JB_FP], %g2
825 1: cmp %fp, %g2
826 bl,a,pt %xcc, 1b
827 restore
828 bne,pn %xcc, 2f
829 ldx [%g1 + _JB_SP], %o2
830 cmp %o2, %sp
831 blt,pn %xcc, 2f
832 movge %xcc, %o2, %sp
833 ldx [%g1 + _JB_PC], %o7
834 retl
835 mov %g3, %o0
836 2: PANIC("longjmp botch", %l1)
837 END(longjmp)
838
839 ENTRY(setjmp)
840 stx %sp, [%o0 + _JB_SP]
841 stx %o7, [%o0 + _JB_PC]
842 stx %fp, [%o0 + _JB_FP]
843 retl
844 clr %o0
845 END(setjmp)
846
847 /*
848 * void ofw_entry(cell_t args[])
849 */
850 ENTRY(ofw_entry)
851 save %sp, -CCFSZ, %sp
852 SET(ofw_vec, %l7, %l6)
853 ldx [%l6], %l6
854 rdpr %pstate, %l7
855 andn %l7, PSTATE_AM | PSTATE_IE, %l5
856 wrpr %l5, 0, %pstate
857 SET(tba_taken_over, %l5, %l4)
858 brz,pn %l4, 1f
859 rdpr %wstate, %l5
860 andn %l5, WSTATE_PROM_MASK, %l3
861 wrpr %l3, WSTATE_PROM_KMIX, %wstate
862 1: call %l6
863 mov %i0, %o0
864 brz,pn %l4, 1f
865 nop
866 wrpr %g0, %l5, %wstate
867 1: wrpr %l7, 0, %pstate
868 ret
869 restore %o0, %g0, %o0
870 END(ofw_entry)
871
872 /*
873 * void ofw_exit(cell_t args[])
874 */
875 ENTRY(ofw_exit)
876 save %sp, -CCFSZ, %sp
877 flushw
878 SET(ofw_tba, %l7, %l5)
879 ldx [%l5], %l5
880 rdpr %pstate, %l7
881 andn %l7, PSTATE_AM | PSTATE_IE, %l7
882 wrpr %l7, 0, %pstate
883 rdpr %wstate, %l7
884 andn %l7, WSTATE_PROM_MASK, %l7
885 wrpr %l7, WSTATE_PROM_KMIX, %wstate
886 wrpr %l5, 0, %tba ! restore the OFW trap table
887 SET(ofw_vec, %l7, %l6)
888 ldx [%l6], %l6
889 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
890 sub %l0, SPOFF, %fp ! setup a stack in a locked page
891 sub %l0, SPOFF + CCFSZ, %sp
892 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0
893 sethi %hi(KERNBASE), %l5
894 stxa %g0, [%l3] ASI_DMMU
895 flush %l5
896 wrpr %g0, 0, %tl ! force trap level 0
897 call %l6
898 mov %i0, %o0
899 ! never to return
900 END(ofw_exit)
901
902 #ifdef GPROF
903
904 ENTRY(user)
905 nop
906
907 ENTRY(btrap)
908 nop
909
910 ENTRY(etrap)
911 nop
912
913 ENTRY(bintr)
914 nop
915
916 ENTRY(eintr)
917 nop
918
919 /*
920 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
921 * badness.
922 */
923 #define GM_STATE 0x0
924 #define GMON_PROF_OFF 3
925 #define GMON_PROF_HIRES 4
926
927 .globl _mcount
928 .set _mcount, __cyg_profile_func_enter
929
930 ENTRY(__cyg_profile_func_enter)
931 SET(_gmonparam, %o3, %o2)
932 lduw [%o2 + GM_STATE], %o3
933 cmp %o3, GMON_PROF_OFF
934 be,a,pn %icc, 1f
935 nop
936 SET(mcount, %o3, %o2)
937 jmpl %o2, %g0
938 nop
939 1: retl
940 nop
941 END(__cyg_profile_func_enter)
942
943 #ifdef GUPROF
944
945 ENTRY(__cyg_profile_func_exit)
946 SET(_gmonparam, %o3, %o2)
947 lduw [%o2 + GM_STATE], %o3
948 cmp %o3, GMON_PROF_HIRES
949 be,a,pn %icc, 1f
950 nop
951 SET(mexitcount, %o3, %o2)
952 jmpl %o2, %g0
953 nop
954 1: retl
955 nop
956 END(__cyg_profile_func_exit)
957
958 #endif /* GUPROF */
959
960 #endif /* GPROF */
Cache object: abab2d4e5c276feb3fcac06c80516f16
|