1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
2
3 /*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD: releng/11.1/sys/mips/mips/cache_mipsNN.c 280691 2015-03-26 14:51:24Z br $");
40
41 #include <sys/types.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44
45 #include <machine/cache.h>
46 #include <machine/cache_r4k.h>
47 #include <machine/cpuinfo.h>
48
49 #define round_line16(x) (((x) + 15) & ~15)
50 #define trunc_line16(x) ((x) & ~15)
51
52 #define round_line32(x) (((x) + 31) & ~31)
53 #define trunc_line32(x) ((x) & ~31)
54
55 #define round_line64(x) (((x) + 63) & ~63)
56 #define trunc_line64(x) ((x) & ~63)
57
58 #define round_line128(x) (((x) + 127) & ~127)
59 #define trunc_line128(x) ((x) & ~127)
60
61 #if defined(CPU_NLM)
62 static __inline void
63 xlp_sync(void)
64 {
65 __asm __volatile (
66 ".set push \n"
67 ".set noreorder \n"
68 ".set mips64 \n"
69 "dla $8, 1f \n"
70 "/* jr.hb $8 */ \n"
71 ".word 0x1000408 \n"
72 "nop \n"
73 "1: nop \n"
74 ".set pop \n"
75 : : : "$8");
76 }
77 #endif
78
79 #if defined(SB1250_PASS1)
80 #define SYNC __asm volatile("sync; sync")
81 #elif defined(CPU_NLM)
82 #define SYNC xlp_sync()
83 #else
84 #define SYNC __asm volatile("sync")
85 #endif
86
87 #if defined(CPU_CNMIPS)
88 #define SYNCI mips_sync_icache();
89 #elif defined(CPU_NLM)
90 #define SYNCI xlp_sync()
91 #else
92 #define SYNCI
93 #endif
94
95 /*
96 * Exported variables for consumers like bus_dma code
97 */
98 int mips_picache_linesize;
99 int mips_pdcache_linesize;
100
101 static int picache_size;
102 static int picache_stride;
103 static int picache_loopcount;
104 static int picache_way_mask;
105 static int pdcache_size;
106 static int pdcache_stride;
107 static int pdcache_loopcount;
108 static int pdcache_way_mask;
109 static int sdcache_size;
110 static int sdcache_stride;
111 static int sdcache_loopcount;
112 static int sdcache_way_mask;
113
114 void
115 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
116 {
117 int flush_multiple_lines_per_way;
118
119 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
120 if (cpuinfo->icache_virtual) {
121 /*
122 * With a virtual Icache we don't need to flush
123 * multiples of the page size with index ops; we just
124 * need to flush one pages' worth.
125 */
126 flush_multiple_lines_per_way = 0;
127 }
128
129 if (flush_multiple_lines_per_way) {
130 picache_stride = PAGE_SIZE;
131 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
132 cpuinfo->l1.ic_nways;
133 } else {
134 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
135 picache_loopcount = cpuinfo->l1.ic_nways;
136 }
137
138 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
139 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
140 pdcache_loopcount = cpuinfo->l1.dc_nways;
141 } else {
142 pdcache_stride = PAGE_SIZE;
143 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
144 cpuinfo->l1.dc_nways;
145 }
146
147 mips_picache_linesize = cpuinfo->l1.ic_linesize;
148 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
149
150 picache_size = cpuinfo->l1.ic_size;
151 picache_way_mask = cpuinfo->l1.ic_nways - 1;
152 pdcache_size = cpuinfo->l1.dc_size;
153 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
154
155 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
156 sdcache_loopcount = cpuinfo->l2.dc_nways;
157 sdcache_size = cpuinfo->l2.dc_size;
158 sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
159
160 #define CACHE_DEBUG
161 #ifdef CACHE_DEBUG
162 printf("Cache info:\n");
163 if (cpuinfo->icache_virtual)
164 printf(" icache is virtual\n");
165 printf(" picache_stride = %d\n", picache_stride);
166 printf(" picache_loopcount = %d\n", picache_loopcount);
167 printf(" pdcache_stride = %d\n", pdcache_stride);
168 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
169 #endif
170 }
171
172 void
173 mipsNN_icache_sync_all_16(void)
174 {
175 vm_offset_t va, eva;
176
177 va = MIPS_PHYS_TO_KSEG0(0);
178 eva = va + picache_size;
179
180 /*
181 * Since we're hitting the whole thing, we don't have to
182 * worry about the N different "ways".
183 */
184
185 mips_intern_dcache_wbinv_all();
186
187 while (va < eva) {
188 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
189 va += (32 * 16);
190 }
191
192 SYNC;
193 }
194
195 void
196 mipsNN_icache_sync_all_32(void)
197 {
198 vm_offset_t va, eva;
199
200 va = MIPS_PHYS_TO_KSEG0(0);
201 eva = va + picache_size;
202
203 /*
204 * Since we're hitting the whole thing, we don't have to
205 * worry about the N different "ways".
206 */
207
208 mips_intern_dcache_wbinv_all();
209
210 while (va < eva) {
211 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
212 va += (32 * 32);
213 }
214
215 SYNC;
216 }
217
218 void
219 mipsNN_icache_sync_all_64(void)
220 {
221 vm_offset_t va, eva;
222
223 va = MIPS_PHYS_TO_KSEG0(0);
224 eva = va + picache_size;
225
226 /*
227 * Since we're hitting the whole thing, we don't have to
228 * worry about the N different "ways".
229 */
230
231 mips_intern_dcache_wbinv_all();
232
233 while (va < eva) {
234 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
235 va += (32 * 64);
236 }
237
238 SYNC;
239 }
240
241 void
242 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
243 {
244 vm_offset_t eva;
245
246 eva = round_line16(va + size);
247 va = trunc_line16(va);
248
249 mips_intern_dcache_wb_range(va, (eva - va));
250
251 while ((eva - va) >= (32 * 16)) {
252 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
253 va += (32 * 16);
254 }
255
256 while (va < eva) {
257 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
258 va += 16;
259 }
260
261 SYNC;
262 }
263
264 void
265 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
266 {
267 vm_offset_t eva;
268
269 eva = round_line32(va + size);
270 va = trunc_line32(va);
271
272 mips_intern_dcache_wb_range(va, (eva - va));
273
274 while ((eva - va) >= (32 * 32)) {
275 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
276 va += (32 * 32);
277 }
278
279 while (va < eva) {
280 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
281 va += 32;
282 }
283
284 SYNC;
285 }
286
287 void
288 mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
289 {
290 vm_offset_t eva;
291
292 eva = round_line64(va + size);
293 va = trunc_line64(va);
294
295 mips_intern_dcache_wb_range(va, (eva - va));
296
297 while ((eva - va) >= (32 * 64)) {
298 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
299 va += (32 * 64);
300 }
301
302 while (va < eva) {
303 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
304 va += 64;
305 }
306
307 SYNC;
308 }
309
310 void
311 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
312 {
313 vm_offset_t eva, tmpva;
314 int i, stride, loopcount;
315
316 /*
317 * Since we're doing Index ops, we expect to not be able
318 * to access the address we've been given. So, get the
319 * bits that determine the cache index, and make a KSEG0
320 * address out of them.
321 */
322 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
323
324 eva = round_line16(va + size);
325 va = trunc_line16(va);
326
327 /*
328 * GCC generates better code in the loops if we reference local
329 * copies of these global variables.
330 */
331 stride = picache_stride;
332 loopcount = picache_loopcount;
333
334 mips_intern_dcache_wbinv_range_index(va, (eva - va));
335
336 while ((eva - va) >= (8 * 16)) {
337 tmpva = va;
338 for (i = 0; i < loopcount; i++, tmpva += stride)
339 cache_r4k_op_8lines_16(tmpva,
340 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
341 va += 8 * 16;
342 }
343
344 while (va < eva) {
345 tmpva = va;
346 for (i = 0; i < loopcount; i++, tmpva += stride)
347 cache_op_r4k_line(tmpva,
348 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
349 va += 16;
350 }
351 }
352
353 void
354 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
355 {
356 vm_offset_t eva, tmpva;
357 int i, stride, loopcount;
358
359 /*
360 * Since we're doing Index ops, we expect to not be able
361 * to access the address we've been given. So, get the
362 * bits that determine the cache index, and make a KSEG0
363 * address out of them.
364 */
365 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
366
367 eva = round_line32(va + size);
368 va = trunc_line32(va);
369
370 /*
371 * GCC generates better code in the loops if we reference local
372 * copies of these global variables.
373 */
374 stride = picache_stride;
375 loopcount = picache_loopcount;
376
377 mips_intern_dcache_wbinv_range_index(va, (eva - va));
378
379 while ((eva - va) >= (8 * 32)) {
380 tmpva = va;
381 for (i = 0; i < loopcount; i++, tmpva += stride)
382 cache_r4k_op_8lines_32(tmpva,
383 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
384 va += 8 * 32;
385 }
386
387 while (va < eva) {
388 tmpva = va;
389 for (i = 0; i < loopcount; i++, tmpva += stride)
390 cache_op_r4k_line(tmpva,
391 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
392 va += 32;
393 }
394 }
395
396 void
397 mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
398 {
399 vm_offset_t eva, tmpva;
400 int i, stride, loopcount;
401
402 /*
403 * Since we're doing Index ops, we expect to not be able
404 * to access the address we've been given. So, get the
405 * bits that determine the cache index, and make a KSEG0
406 * address out of them.
407 */
408 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
409
410 eva = round_line64(va + size);
411 va = trunc_line64(va);
412
413 /*
414 * GCC generates better code in the loops if we reference local
415 * copies of these global variables.
416 */
417 stride = picache_stride;
418 loopcount = picache_loopcount;
419
420 mips_intern_dcache_wbinv_range_index(va, (eva - va));
421
422 while ((eva - va) >= (8 * 64)) {
423 tmpva = va;
424 for (i = 0; i < loopcount; i++, tmpva += stride)
425 cache_r4k_op_8lines_64(tmpva,
426 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
427 va += 8 * 64;
428 }
429
430 while (va < eva) {
431 tmpva = va;
432 for (i = 0; i < loopcount; i++, tmpva += stride)
433 cache_op_r4k_line(tmpva,
434 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
435 va += 64;
436 }
437 }
438
439 void
440 mipsNN_pdcache_wbinv_all_16(void)
441 {
442 vm_offset_t va, eva;
443
444 va = MIPS_PHYS_TO_KSEG0(0);
445 eva = va + pdcache_size;
446
447 /*
448 * Since we're hitting the whole thing, we don't have to
449 * worry about the N different "ways".
450 */
451
452 while (va < eva) {
453 cache_r4k_op_32lines_16(va,
454 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
455 va += (32 * 16);
456 }
457
458 SYNC;
459 }
460
461 void
462 mipsNN_pdcache_wbinv_all_32(void)
463 {
464 vm_offset_t va, eva;
465
466 va = MIPS_PHYS_TO_KSEG0(0);
467 eva = va + pdcache_size;
468
469 /*
470 * Since we're hitting the whole thing, we don't have to
471 * worry about the N different "ways".
472 */
473
474 while (va < eva) {
475 cache_r4k_op_32lines_32(va,
476 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
477 va += (32 * 32);
478 }
479
480 SYNC;
481 }
482
483 void
484 mipsNN_pdcache_wbinv_all_64(void)
485 {
486 vm_offset_t va, eva;
487
488 va = MIPS_PHYS_TO_KSEG0(0);
489 eva = va + pdcache_size;
490
491 /*
492 * Since we're hitting the whole thing, we don't have to
493 * worry about the N different "ways".
494 */
495
496 while (va < eva) {
497 cache_r4k_op_32lines_64(va,
498 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
499 va += (32 * 64);
500 }
501
502 SYNC;
503 }
504
505 void
506 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
507 {
508 vm_offset_t eva;
509
510 eva = round_line16(va + size);
511 va = trunc_line16(va);
512
513 while ((eva - va) >= (32 * 16)) {
514 cache_r4k_op_32lines_16(va,
515 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
516 va += (32 * 16);
517 }
518
519 while (va < eva) {
520 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
521 va += 16;
522 }
523
524 SYNC;
525 }
526
527 void
528 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
529 {
530 vm_offset_t eva;
531
532 eva = round_line32(va + size);
533 va = trunc_line32(va);
534
535 while ((eva - va) >= (32 * 32)) {
536 cache_r4k_op_32lines_32(va,
537 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
538 va += (32 * 32);
539 }
540
541 while (va < eva) {
542 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
543 va += 32;
544 }
545
546 SYNC;
547 }
548
549 void
550 mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
551 {
552 vm_offset_t eva;
553
554 eva = round_line64(va + size);
555 va = trunc_line64(va);
556
557 while ((eva - va) >= (32 * 64)) {
558 cache_r4k_op_32lines_64(va,
559 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
560 va += (32 * 64);
561 }
562
563 while (va < eva) {
564 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
565 va += 64;
566 }
567
568 SYNC;
569 }
570
571 void
572 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
573 {
574 vm_offset_t eva, tmpva;
575 int i, stride, loopcount;
576
577 /*
578 * Since we're doing Index ops, we expect to not be able
579 * to access the address we've been given. So, get the
580 * bits that determine the cache index, and make a KSEG0
581 * address out of them.
582 */
583 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
584
585 eva = round_line16(va + size);
586 va = trunc_line16(va);
587
588 /*
589 * GCC generates better code in the loops if we reference local
590 * copies of these global variables.
591 */
592 stride = pdcache_stride;
593 loopcount = pdcache_loopcount;
594
595 while ((eva - va) >= (8 * 16)) {
596 tmpva = va;
597 for (i = 0; i < loopcount; i++, tmpva += stride)
598 cache_r4k_op_8lines_16(tmpva,
599 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
600 va += 8 * 16;
601 }
602
603 while (va < eva) {
604 tmpva = va;
605 for (i = 0; i < loopcount; i++, tmpva += stride)
606 cache_op_r4k_line(tmpva,
607 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
608 va += 16;
609 }
610 }
611
612 void
613 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
614 {
615 vm_offset_t eva, tmpva;
616 int i, stride, loopcount;
617
618 /*
619 * Since we're doing Index ops, we expect to not be able
620 * to access the address we've been given. So, get the
621 * bits that determine the cache index, and make a KSEG0
622 * address out of them.
623 */
624 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
625
626 eva = round_line32(va + size);
627 va = trunc_line32(va);
628
629 /*
630 * GCC generates better code in the loops if we reference local
631 * copies of these global variables.
632 */
633 stride = pdcache_stride;
634 loopcount = pdcache_loopcount;
635
636 while ((eva - va) >= (8 * 32)) {
637 tmpva = va;
638 for (i = 0; i < loopcount; i++, tmpva += stride)
639 cache_r4k_op_8lines_32(tmpva,
640 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
641 va += 8 * 32;
642 }
643
644 while (va < eva) {
645 tmpva = va;
646 for (i = 0; i < loopcount; i++, tmpva += stride)
647 cache_op_r4k_line(tmpva,
648 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
649 va += 32;
650 }
651 }
652
653 void
654 mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
655 {
656 vm_offset_t eva, tmpva;
657 int i, stride, loopcount;
658
659 /*
660 * Since we're doing Index ops, we expect to not be able
661 * to access the address we've been given. So, get the
662 * bits that determine the cache index, and make a KSEG0
663 * address out of them.
664 */
665 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
666
667 eva = round_line64(va + size);
668 va = trunc_line64(va);
669
670 /*
671 * GCC generates better code in the loops if we reference local
672 * copies of these global variables.
673 */
674 stride = pdcache_stride;
675 loopcount = pdcache_loopcount;
676
677 while ((eva - va) >= (8 * 64)) {
678 tmpva = va;
679 for (i = 0; i < loopcount; i++, tmpva += stride)
680 cache_r4k_op_8lines_64(tmpva,
681 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
682 va += 8 * 64;
683 }
684
685 while (va < eva) {
686 tmpva = va;
687 for (i = 0; i < loopcount; i++, tmpva += stride)
688 cache_op_r4k_line(tmpva,
689 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
690 va += 64;
691 }
692 }
693
694 void
695 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
696 {
697 vm_offset_t eva;
698
699 eva = round_line16(va + size);
700 va = trunc_line16(va);
701
702 while ((eva - va) >= (32 * 16)) {
703 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
704 va += (32 * 16);
705 }
706
707 while (va < eva) {
708 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
709 va += 16;
710 }
711
712 SYNC;
713 }
714
715 void
716 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
717 {
718 vm_offset_t eva;
719
720 eva = round_line32(va + size);
721 va = trunc_line32(va);
722
723 while ((eva - va) >= (32 * 32)) {
724 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
725 va += (32 * 32);
726 }
727
728 while (va < eva) {
729 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
730 va += 32;
731 }
732
733 SYNC;
734 }
735
736 void
737 mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
738 {
739 vm_offset_t eva;
740
741 eva = round_line64(va + size);
742 va = trunc_line64(va);
743
744 while ((eva - va) >= (32 * 64)) {
745 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
746 va += (32 * 64);
747 }
748
749 while (va < eva) {
750 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
751 va += 64;
752 }
753
754 SYNC;
755 }
756
757 void
758 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
759 {
760 vm_offset_t eva;
761
762 eva = round_line16(va + size);
763 va = trunc_line16(va);
764
765 while ((eva - va) >= (32 * 16)) {
766 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
767 va += (32 * 16);
768 }
769
770 while (va < eva) {
771 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
772 va += 16;
773 }
774
775 SYNC;
776 }
777
778 void
779 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
780 {
781 vm_offset_t eva;
782
783 eva = round_line32(va + size);
784 va = trunc_line32(va);
785
786 while ((eva - va) >= (32 * 32)) {
787 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
788 va += (32 * 32);
789 }
790
791 while (va < eva) {
792 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
793 va += 32;
794 }
795
796 SYNC;
797 }
798
799 void
800 mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
801 {
802 vm_offset_t eva;
803
804 eva = round_line64(va + size);
805 va = trunc_line64(va);
806
807 while ((eva - va) >= (32 * 64)) {
808 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
809 va += (32 * 64);
810 }
811
812 while (va < eva) {
813 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
814 va += 64;
815 }
816
817 SYNC;
818 }
819
820 #ifdef CPU_CNMIPS
821
822 void
823 mipsNN_icache_sync_all_128(void)
824 {
825 SYNCI
826 }
827
828 void
829 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
830 {
831 SYNC;
832 }
833
834 void
835 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
836 {
837 }
838
839
840 void
841 mipsNN_pdcache_wbinv_all_128(void)
842 {
843 }
844
845
846 void
847 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
848 {
849 SYNC;
850 }
851
852 void
853 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
854 {
855 }
856
857 void
858 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
859 {
860 }
861
862 void
863 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
864 {
865 SYNC;
866 }
867
868 #else
869
870 void
871 mipsNN_icache_sync_all_128(void)
872 {
873 vm_offset_t va, eva;
874
875 va = MIPS_PHYS_TO_KSEG0(0);
876 eva = va + picache_size;
877
878 /*
879 * Since we're hitting the whole thing, we don't have to
880 * worry about the N different "ways".
881 */
882
883 mips_intern_dcache_wbinv_all();
884
885 while (va < eva) {
886 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
887 va += (32 * 128);
888 }
889
890 SYNC;
891 }
892
893 void
894 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
895 {
896 vm_offset_t eva;
897
898 eva = round_line128(va + size);
899 va = trunc_line128(va);
900
901 mips_intern_dcache_wb_range(va, (eva - va));
902
903 while ((eva - va) >= (32 * 128)) {
904 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
905 va += (32 * 128);
906 }
907
908 while (va < eva) {
909 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
910 va += 128;
911 }
912
913 SYNC;
914 }
915
916 void
917 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
918 {
919 vm_offset_t eva, tmpva;
920 int i, stride, loopcount;
921
922 /*
923 * Since we're doing Index ops, we expect to not be able
924 * to access the address we've been given. So, get the
925 * bits that determine the cache index, and make a KSEG0
926 * address out of them.
927 */
928 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
929
930 eva = round_line128(va + size);
931 va = trunc_line128(va);
932
933 /*
934 * GCC generates better code in the loops if we reference local
935 * copies of these global variables.
936 */
937 stride = picache_stride;
938 loopcount = picache_loopcount;
939
940 mips_intern_dcache_wbinv_range_index(va, (eva - va));
941
942 while ((eva - va) >= (32 * 128)) {
943 tmpva = va;
944 for (i = 0; i < loopcount; i++, tmpva += stride)
945 cache_r4k_op_32lines_128(tmpva,
946 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
947 va += 32 * 128;
948 }
949
950 while (va < eva) {
951 tmpva = va;
952 for (i = 0; i < loopcount; i++, tmpva += stride)
953 cache_op_r4k_line(tmpva,
954 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
955 va += 128;
956 }
957 }
958
959 void
960 mipsNN_pdcache_wbinv_all_128(void)
961 {
962 vm_offset_t va, eva;
963
964 va = MIPS_PHYS_TO_KSEG0(0);
965 eva = va + pdcache_size;
966
967 /*
968 * Since we're hitting the whole thing, we don't have to
969 * worry about the N different "ways".
970 */
971
972 while (va < eva) {
973 cache_r4k_op_32lines_128(va,
974 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
975 va += (32 * 128);
976 }
977
978 SYNC;
979 }
980
981
982 void
983 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
984 {
985 vm_offset_t eva;
986
987 eva = round_line128(va + size);
988 va = trunc_line128(va);
989
990 while ((eva - va) >= (32 * 128)) {
991 cache_r4k_op_32lines_128(va,
992 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
993 va += (32 * 128);
994 }
995
996 while (va < eva) {
997 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
998 va += 128;
999 }
1000
1001 SYNC;
1002 }
1003
1004 void
1005 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1006 {
1007 vm_offset_t eva, tmpva;
1008 int i, stride, loopcount;
1009
1010 /*
1011 * Since we're doing Index ops, we expect to not be able
1012 * to access the address we've been given. So, get the
1013 * bits that determine the cache index, and make a KSEG0
1014 * address out of them.
1015 */
1016 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
1017
1018 eva = round_line128(va + size);
1019 va = trunc_line128(va);
1020
1021 /*
1022 * GCC generates better code in the loops if we reference local
1023 * copies of these global variables.
1024 */
1025 stride = pdcache_stride;
1026 loopcount = pdcache_loopcount;
1027
1028 while ((eva - va) >= (32 * 128)) {
1029 tmpva = va;
1030 for (i = 0; i < loopcount; i++, tmpva += stride)
1031 cache_r4k_op_32lines_128(tmpva,
1032 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1033 va += 32 * 128;
1034 }
1035
1036 while (va < eva) {
1037 tmpva = va;
1038 for (i = 0; i < loopcount; i++, tmpva += stride)
1039 cache_op_r4k_line(tmpva,
1040 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1041 va += 128;
1042 }
1043 }
1044
1045 void
1046 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1047 {
1048 vm_offset_t eva;
1049
1050 eva = round_line128(va + size);
1051 va = trunc_line128(va);
1052
1053 while ((eva - va) >= (32 * 128)) {
1054 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1055 va += (32 * 128);
1056 }
1057
1058 while (va < eva) {
1059 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1060 va += 128;
1061 }
1062
1063 SYNC;
1064 }
1065
1066 void
1067 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1068 {
1069 vm_offset_t eva;
1070
1071 eva = round_line128(va + size);
1072 va = trunc_line128(va);
1073
1074 while ((eva - va) >= (32 * 128)) {
1075 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1076 va += (32 * 128);
1077 }
1078
1079 while (va < eva) {
1080 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1081 va += 128;
1082 }
1083
1084 SYNC;
1085 }
1086
1087 #endif
1088
1089 void
1090 mipsNN_sdcache_wbinv_all_32(void)
1091 {
1092 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1093 vm_offset_t eva = va + sdcache_size;
1094
1095 while (va < eva) {
1096 cache_r4k_op_32lines_32(va,
1097 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1098 va += (32 * 32);
1099 }
1100 }
1101
1102 void
1103 mipsNN_sdcache_wbinv_all_64(void)
1104 {
1105 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1106 vm_offset_t eva = va + sdcache_size;
1107
1108 while (va < eva) {
1109 cache_r4k_op_32lines_64(va,
1110 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1111 va += (32 * 64);
1112 }
1113 }
1114
1115 void
1116 mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
1117 {
1118 vm_offset_t eva = round_line32(va + size);
1119
1120 va = trunc_line32(va);
1121
1122 while ((eva - va) >= (32 * 32)) {
1123 cache_r4k_op_32lines_32(va,
1124 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1125 va += (32 * 32);
1126 }
1127
1128 while (va < eva) {
1129 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1130 va += 32;
1131 }
1132 }
1133
1134 void
1135 mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
1136 {
1137 vm_offset_t eva = round_line64(va + size);
1138
1139 va = trunc_line64(va);
1140
1141 while ((eva - va) >= (32 * 64)) {
1142 cache_r4k_op_32lines_64(va,
1143 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1144 va += (32 * 64);
1145 }
1146
1147 while (va < eva) {
1148 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1149 va += 64;
1150 }
1151 }
1152
1153 void
1154 mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
1155 {
1156 vm_offset_t eva;
1157
1158 /*
1159 * Since we're doing Index ops, we expect to not be able
1160 * to access the address we've been given. So, get the
1161 * bits that determine the cache index, and make a KSEG0
1162 * address out of them.
1163 */
1164 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1165
1166 eva = round_line32(va + size);
1167 va = trunc_line32(va);
1168
1169 while ((eva - va) >= (32 * 32)) {
1170 cache_r4k_op_32lines_32(va,
1171 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1172 va += (32 * 32);
1173 }
1174
1175 while (va < eva) {
1176 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1177 va += 32;
1178 }
1179 }
1180
1181 void
1182 mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
1183 {
1184 vm_offset_t eva;
1185
1186 /*
1187 * Since we're doing Index ops, we expect to not be able
1188 * to access the address we've been given. So, get the
1189 * bits that determine the cache index, and make a KSEG0
1190 * address out of them.
1191 */
1192 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1193
1194 eva = round_line64(va + size);
1195 va = trunc_line64(va);
1196
1197 while ((eva - va) >= (32 * 64)) {
1198 cache_r4k_op_32lines_64(va,
1199 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1200 va += (32 * 64);
1201 }
1202
1203 while (va < eva) {
1204 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1205 va += 64;
1206 }
1207 }
1208
1209 void
1210 mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
1211 {
1212 vm_offset_t eva = round_line32(va + size);
1213
1214 va = trunc_line32(va);
1215
1216 while ((eva - va) >= (32 * 32)) {
1217 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1218 va += (32 * 32);
1219 }
1220
1221 while (va < eva) {
1222 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1223 va += 32;
1224 }
1225 }
1226
1227 void
1228 mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
1229 {
1230 vm_offset_t eva = round_line64(va + size);
1231
1232 va = trunc_line64(va);
1233
1234 while ((eva - va) >= (32 * 64)) {
1235 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1236 va += (32 * 64);
1237 }
1238
1239 while (va < eva) {
1240 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1241 va += 64;
1242 }
1243 }
1244
1245 void
1246 mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
1247 {
1248 vm_offset_t eva = round_line32(va + size);
1249
1250 va = trunc_line32(va);
1251
1252 while ((eva - va) >= (32 * 32)) {
1253 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1254 va += (32 * 32);
1255 }
1256
1257 while (va < eva) {
1258 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1259 va += 32;
1260 }
1261 }
1262
1263 void
1264 mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
1265 {
1266 vm_offset_t eva = round_line64(va + size);
1267
1268 va = trunc_line64(va);
1269
1270 while ((eva - va) >= (32 * 64)) {
1271 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1272 va += (32 * 64);
1273 }
1274
1275 while (va < eva) {
1276 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1277 va += 64;
1278 }
1279 }
1280
1281 void
1282 mipsNN_sdcache_wbinv_all_128(void)
1283 {
1284 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1285 vm_offset_t eva = va + sdcache_size;
1286
1287 while (va < eva) {
1288 cache_r4k_op_32lines_128(va,
1289 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1290 va += (32 * 128);
1291 }
1292 }
1293
1294 void
1295 mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
1296 {
1297 vm_offset_t eva = round_line128(va + size);
1298
1299 va = trunc_line128(va);
1300
1301 while ((eva - va) >= (32 * 128)) {
1302 cache_r4k_op_32lines_128(va,
1303 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1304 va += (32 * 128);
1305 }
1306
1307 while (va < eva) {
1308 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1309 va += 128;
1310 }
1311 }
1312
1313 void
1314 mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1315 {
1316 vm_offset_t eva;
1317
1318 /*
1319 * Since we're doing Index ops, we expect to not be able
1320 * to access the address we've been given. So, get the
1321 * bits that determine the cache index, and make a KSEG0
1322 * address out of them.
1323 */
1324 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1325
1326 eva = round_line128(va + size);
1327 va = trunc_line128(va);
1328
1329 while ((eva - va) >= (32 * 128)) {
1330 cache_r4k_op_32lines_128(va,
1331 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1332 va += (32 * 128);
1333 }
1334
1335 while (va < eva) {
1336 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1337 va += 128;
1338 }
1339 }
1340
1341 void
1342 mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1343 {
1344 vm_offset_t eva = round_line128(va + size);
1345
1346 va = trunc_line128(va);
1347
1348 while ((eva - va) >= (32 * 128)) {
1349 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1350 va += (32 * 128);
1351 }
1352
1353 while (va < eva) {
1354 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1355 va += 128;
1356 }
1357 }
1358
1359 void
1360 mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1361 {
1362 vm_offset_t eva = round_line128(va + size);
1363
1364 va = trunc_line128(va);
1365
1366 while ((eva - va) >= (32 * 128)) {
1367 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1368 va += (32 * 128);
1369 }
1370
1371 while (va < eva) {
1372 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1373 va += 128;
1374 }
1375 }
Cache object: 3c5f652ec740251fa5141bd1cd07bad4
|