1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
2
3 /*
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright 2001 Wasabi Systems, Inc.
7 * All rights reserved.
8 *
9 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed for the NetBSD Project by
22 * Wasabi Systems, Inc.
23 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
24 * or promote products derived from this software without specific prior
25 * written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/types.h>
44 #include <sys/systm.h>
45 #include <sys/param.h>
46
47 #include <machine/cache.h>
48 #include <machine/cache_r4k.h>
49 #include <machine/cpuinfo.h>
50
51 #define round_line16(x) (((x) + 15) & ~15)
52 #define trunc_line16(x) ((x) & ~15)
53
54 #define round_line32(x) (((x) + 31) & ~31)
55 #define trunc_line32(x) ((x) & ~31)
56
57 #define round_line64(x) (((x) + 63) & ~63)
58 #define trunc_line64(x) ((x) & ~63)
59
60 #define round_line128(x) (((x) + 127) & ~127)
61 #define trunc_line128(x) ((x) & ~127)
62
63 #if defined(CPU_NLM)
64 static __inline void
65 xlp_sync(void)
66 {
67 __asm __volatile (
68 ".set push \n"
69 ".set noreorder \n"
70 ".set mips64 \n"
71 "dla $8, 1f \n"
72 "/* jr.hb $8 */ \n"
73 ".word 0x1000408 \n"
74 "nop \n"
75 "1: nop \n"
76 ".set pop \n"
77 : : : "$8");
78 }
79 #endif
80
81 #if defined(SB1250_PASS1)
82 #define SYNC __asm volatile("sync; sync")
83 #elif defined(CPU_NLM)
84 #define SYNC xlp_sync()
85 #else
86 #define SYNC __asm volatile("sync")
87 #endif
88
89 #if defined(CPU_CNMIPS)
90 #define SYNCI mips_sync_icache();
91 #elif defined(CPU_NLM)
92 #define SYNCI xlp_sync()
93 #else
94 #define SYNCI
95 #endif
96
97 /*
98 * Exported variables for consumers like bus_dma code
99 */
100 int mips_picache_linesize;
101 int mips_pdcache_linesize;
102 int mips_sdcache_linesize;
103 int mips_dcache_max_linesize;
104
105 static int picache_size;
106 static int picache_stride;
107 static int picache_loopcount;
108 static int picache_way_mask;
109 static int pdcache_size;
110 static int pdcache_stride;
111 static int pdcache_loopcount;
112 static int pdcache_way_mask;
113 static int sdcache_size;
114 static int sdcache_stride;
115 static int sdcache_loopcount;
116 static int sdcache_way_mask;
117
118 void
119 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
120 {
121 int flush_multiple_lines_per_way;
122
123 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
124 if (cpuinfo->icache_virtual) {
125 /*
126 * With a virtual Icache we don't need to flush
127 * multiples of the page size with index ops; we just
128 * need to flush one pages' worth.
129 */
130 flush_multiple_lines_per_way = 0;
131 }
132
133 if (flush_multiple_lines_per_way) {
134 picache_stride = PAGE_SIZE;
135 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
136 cpuinfo->l1.ic_nways;
137 } else {
138 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
139 picache_loopcount = cpuinfo->l1.ic_nways;
140 }
141
142 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
143 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
144 pdcache_loopcount = cpuinfo->l1.dc_nways;
145 } else {
146 pdcache_stride = PAGE_SIZE;
147 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
148 cpuinfo->l1.dc_nways;
149 }
150
151 mips_picache_linesize = cpuinfo->l1.ic_linesize;
152 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
153
154 picache_size = cpuinfo->l1.ic_size;
155 picache_way_mask = cpuinfo->l1.ic_nways - 1;
156 pdcache_size = cpuinfo->l1.dc_size;
157 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
158
159 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
160 sdcache_loopcount = cpuinfo->l2.dc_nways;
161 sdcache_size = cpuinfo->l2.dc_size;
162 sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
163
164 mips_sdcache_linesize = cpuinfo->l2.dc_linesize;
165 mips_dcache_max_linesize = MAX(mips_pdcache_linesize,
166 mips_sdcache_linesize);
167
168 #define CACHE_DEBUG
169 #ifdef CACHE_DEBUG
170 printf("Cache info:\n");
171 if (cpuinfo->icache_virtual)
172 printf(" icache is virtual\n");
173 printf(" picache_stride = %d\n", picache_stride);
174 printf(" picache_loopcount = %d\n", picache_loopcount);
175 printf(" pdcache_stride = %d\n", pdcache_stride);
176 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
177 printf(" max line size = %d\n", mips_dcache_max_linesize);
178 #endif
179 }
180
181 void
182 mipsNN_icache_sync_all_16(void)
183 {
184 vm_offset_t va, eva;
185
186 va = MIPS_PHYS_TO_KSEG0(0);
187 eva = va + picache_size;
188
189 /*
190 * Since we're hitting the whole thing, we don't have to
191 * worry about the N different "ways".
192 */
193
194 mips_intern_dcache_wbinv_all();
195
196 while (va < eva) {
197 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
198 va += (32 * 16);
199 }
200
201 SYNC;
202 }
203
204 void
205 mipsNN_icache_sync_all_32(void)
206 {
207 vm_offset_t va, eva;
208
209 va = MIPS_PHYS_TO_KSEG0(0);
210 eva = va + picache_size;
211
212 /*
213 * Since we're hitting the whole thing, we don't have to
214 * worry about the N different "ways".
215 */
216
217 mips_intern_dcache_wbinv_all();
218
219 while (va < eva) {
220 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
221 va += (32 * 32);
222 }
223
224 SYNC;
225 }
226
227 void
228 mipsNN_icache_sync_all_64(void)
229 {
230 vm_offset_t va, eva;
231
232 va = MIPS_PHYS_TO_KSEG0(0);
233 eva = va + picache_size;
234
235 /*
236 * Since we're hitting the whole thing, we don't have to
237 * worry about the N different "ways".
238 */
239
240 mips_intern_dcache_wbinv_all();
241
242 while (va < eva) {
243 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
244 va += (32 * 64);
245 }
246
247 SYNC;
248 }
249
250 void
251 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
252 {
253 vm_offset_t eva;
254
255 eva = round_line16(va + size);
256 va = trunc_line16(va);
257
258 mips_intern_dcache_wb_range(va, (eva - va));
259
260 while ((eva - va) >= (32 * 16)) {
261 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
262 va += (32 * 16);
263 }
264
265 while (va < eva) {
266 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
267 va += 16;
268 }
269
270 SYNC;
271 }
272
273 void
274 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
275 {
276 vm_offset_t eva;
277
278 eva = round_line32(va + size);
279 va = trunc_line32(va);
280
281 mips_intern_dcache_wb_range(va, (eva - va));
282
283 while ((eva - va) >= (32 * 32)) {
284 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
285 va += (32 * 32);
286 }
287
288 while (va < eva) {
289 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
290 va += 32;
291 }
292
293 SYNC;
294 }
295
296 void
297 mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
298 {
299 vm_offset_t eva;
300
301 eva = round_line64(va + size);
302 va = trunc_line64(va);
303
304 mips_intern_dcache_wb_range(va, (eva - va));
305
306 while ((eva - va) >= (32 * 64)) {
307 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
308 va += (32 * 64);
309 }
310
311 while (va < eva) {
312 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
313 va += 64;
314 }
315
316 SYNC;
317 }
318
319 void
320 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
321 {
322 vm_offset_t eva, tmpva;
323 int i, stride, loopcount;
324
325 /*
326 * Since we're doing Index ops, we expect to not be able
327 * to access the address we've been given. So, get the
328 * bits that determine the cache index, and make a KSEG0
329 * address out of them.
330 */
331 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
332
333 eva = round_line16(va + size);
334 va = trunc_line16(va);
335
336 /*
337 * GCC generates better code in the loops if we reference local
338 * copies of these global variables.
339 */
340 stride = picache_stride;
341 loopcount = picache_loopcount;
342
343 mips_intern_dcache_wbinv_range_index(va, (eva - va));
344
345 while ((eva - va) >= (8 * 16)) {
346 tmpva = va;
347 for (i = 0; i < loopcount; i++, tmpva += stride)
348 cache_r4k_op_8lines_16(tmpva,
349 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
350 va += 8 * 16;
351 }
352
353 while (va < eva) {
354 tmpva = va;
355 for (i = 0; i < loopcount; i++, tmpva += stride)
356 cache_op_r4k_line(tmpva,
357 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
358 va += 16;
359 }
360 }
361
362 void
363 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
364 {
365 vm_offset_t eva, tmpva;
366 int i, stride, loopcount;
367
368 /*
369 * Since we're doing Index ops, we expect to not be able
370 * to access the address we've been given. So, get the
371 * bits that determine the cache index, and make a KSEG0
372 * address out of them.
373 */
374 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
375
376 eva = round_line32(va + size);
377 va = trunc_line32(va);
378
379 /*
380 * GCC generates better code in the loops if we reference local
381 * copies of these global variables.
382 */
383 stride = picache_stride;
384 loopcount = picache_loopcount;
385
386 mips_intern_dcache_wbinv_range_index(va, (eva - va));
387
388 while ((eva - va) >= (8 * 32)) {
389 tmpva = va;
390 for (i = 0; i < loopcount; i++, tmpva += stride)
391 cache_r4k_op_8lines_32(tmpva,
392 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
393 va += 8 * 32;
394 }
395
396 while (va < eva) {
397 tmpva = va;
398 for (i = 0; i < loopcount; i++, tmpva += stride)
399 cache_op_r4k_line(tmpva,
400 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
401 va += 32;
402 }
403 }
404
405 void
406 mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
407 {
408 vm_offset_t eva, tmpva;
409 int i, stride, loopcount;
410
411 /*
412 * Since we're doing Index ops, we expect to not be able
413 * to access the address we've been given. So, get the
414 * bits that determine the cache index, and make a KSEG0
415 * address out of them.
416 */
417 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
418
419 eva = round_line64(va + size);
420 va = trunc_line64(va);
421
422 /*
423 * GCC generates better code in the loops if we reference local
424 * copies of these global variables.
425 */
426 stride = picache_stride;
427 loopcount = picache_loopcount;
428
429 mips_intern_dcache_wbinv_range_index(va, (eva - va));
430
431 while ((eva - va) >= (8 * 64)) {
432 tmpva = va;
433 for (i = 0; i < loopcount; i++, tmpva += stride)
434 cache_r4k_op_8lines_64(tmpva,
435 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
436 va += 8 * 64;
437 }
438
439 while (va < eva) {
440 tmpva = va;
441 for (i = 0; i < loopcount; i++, tmpva += stride)
442 cache_op_r4k_line(tmpva,
443 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
444 va += 64;
445 }
446 }
447
448 void
449 mipsNN_pdcache_wbinv_all_16(void)
450 {
451 vm_offset_t va, eva;
452
453 va = MIPS_PHYS_TO_KSEG0(0);
454 eva = va + pdcache_size;
455
456 /*
457 * Since we're hitting the whole thing, we don't have to
458 * worry about the N different "ways".
459 */
460
461 while (va < eva) {
462 cache_r4k_op_32lines_16(va,
463 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
464 va += (32 * 16);
465 }
466
467 SYNC;
468 }
469
470 void
471 mipsNN_pdcache_wbinv_all_32(void)
472 {
473 vm_offset_t va, eva;
474
475 va = MIPS_PHYS_TO_KSEG0(0);
476 eva = va + pdcache_size;
477
478 /*
479 * Since we're hitting the whole thing, we don't have to
480 * worry about the N different "ways".
481 */
482
483 while (va < eva) {
484 cache_r4k_op_32lines_32(va,
485 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
486 va += (32 * 32);
487 }
488
489 SYNC;
490 }
491
492 void
493 mipsNN_pdcache_wbinv_all_64(void)
494 {
495 vm_offset_t va, eva;
496
497 va = MIPS_PHYS_TO_KSEG0(0);
498 eva = va + pdcache_size;
499
500 /*
501 * Since we're hitting the whole thing, we don't have to
502 * worry about the N different "ways".
503 */
504
505 while (va < eva) {
506 cache_r4k_op_32lines_64(va,
507 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
508 va += (32 * 64);
509 }
510
511 SYNC;
512 }
513
514 void
515 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
516 {
517 vm_offset_t eva;
518
519 eva = round_line16(va + size);
520 va = trunc_line16(va);
521
522 while ((eva - va) >= (32 * 16)) {
523 cache_r4k_op_32lines_16(va,
524 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
525 va += (32 * 16);
526 }
527
528 while (va < eva) {
529 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
530 va += 16;
531 }
532
533 SYNC;
534 }
535
536 void
537 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
538 {
539 vm_offset_t eva;
540
541 eva = round_line32(va + size);
542 va = trunc_line32(va);
543
544 while ((eva - va) >= (32 * 32)) {
545 cache_r4k_op_32lines_32(va,
546 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
547 va += (32 * 32);
548 }
549
550 while (va < eva) {
551 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
552 va += 32;
553 }
554
555 SYNC;
556 }
557
558 void
559 mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
560 {
561 vm_offset_t eva;
562
563 eva = round_line64(va + size);
564 va = trunc_line64(va);
565
566 while ((eva - va) >= (32 * 64)) {
567 cache_r4k_op_32lines_64(va,
568 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
569 va += (32 * 64);
570 }
571
572 while (va < eva) {
573 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
574 va += 64;
575 }
576
577 SYNC;
578 }
579
580 void
581 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
582 {
583 vm_offset_t eva, tmpva;
584 int i, stride, loopcount;
585
586 /*
587 * Since we're doing Index ops, we expect to not be able
588 * to access the address we've been given. So, get the
589 * bits that determine the cache index, and make a KSEG0
590 * address out of them.
591 */
592 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
593
594 eva = round_line16(va + size);
595 va = trunc_line16(va);
596
597 /*
598 * GCC generates better code in the loops if we reference local
599 * copies of these global variables.
600 */
601 stride = pdcache_stride;
602 loopcount = pdcache_loopcount;
603
604 while ((eva - va) >= (8 * 16)) {
605 tmpva = va;
606 for (i = 0; i < loopcount; i++, tmpva += stride)
607 cache_r4k_op_8lines_16(tmpva,
608 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
609 va += 8 * 16;
610 }
611
612 while (va < eva) {
613 tmpva = va;
614 for (i = 0; i < loopcount; i++, tmpva += stride)
615 cache_op_r4k_line(tmpva,
616 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
617 va += 16;
618 }
619 }
620
621 void
622 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
623 {
624 vm_offset_t eva, tmpva;
625 int i, stride, loopcount;
626
627 /*
628 * Since we're doing Index ops, we expect to not be able
629 * to access the address we've been given. So, get the
630 * bits that determine the cache index, and make a KSEG0
631 * address out of them.
632 */
633 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
634
635 eva = round_line32(va + size);
636 va = trunc_line32(va);
637
638 /*
639 * GCC generates better code in the loops if we reference local
640 * copies of these global variables.
641 */
642 stride = pdcache_stride;
643 loopcount = pdcache_loopcount;
644
645 while ((eva - va) >= (8 * 32)) {
646 tmpva = va;
647 for (i = 0; i < loopcount; i++, tmpva += stride)
648 cache_r4k_op_8lines_32(tmpva,
649 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
650 va += 8 * 32;
651 }
652
653 while (va < eva) {
654 tmpva = va;
655 for (i = 0; i < loopcount; i++, tmpva += stride)
656 cache_op_r4k_line(tmpva,
657 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
658 va += 32;
659 }
660 }
661
662 void
663 mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
664 {
665 vm_offset_t eva, tmpva;
666 int i, stride, loopcount;
667
668 /*
669 * Since we're doing Index ops, we expect to not be able
670 * to access the address we've been given. So, get the
671 * bits that determine the cache index, and make a KSEG0
672 * address out of them.
673 */
674 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
675
676 eva = round_line64(va + size);
677 va = trunc_line64(va);
678
679 /*
680 * GCC generates better code in the loops if we reference local
681 * copies of these global variables.
682 */
683 stride = pdcache_stride;
684 loopcount = pdcache_loopcount;
685
686 while ((eva - va) >= (8 * 64)) {
687 tmpva = va;
688 for (i = 0; i < loopcount; i++, tmpva += stride)
689 cache_r4k_op_8lines_64(tmpva,
690 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
691 va += 8 * 64;
692 }
693
694 while (va < eva) {
695 tmpva = va;
696 for (i = 0; i < loopcount; i++, tmpva += stride)
697 cache_op_r4k_line(tmpva,
698 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
699 va += 64;
700 }
701 }
702
703 void
704 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
705 {
706 vm_offset_t eva;
707
708 eva = round_line16(va + size);
709 va = trunc_line16(va);
710
711 while ((eva - va) >= (32 * 16)) {
712 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
713 va += (32 * 16);
714 }
715
716 while (va < eva) {
717 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
718 va += 16;
719 }
720
721 SYNC;
722 }
723
724 void
725 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
726 {
727 vm_offset_t eva;
728
729 eva = round_line32(va + size);
730 va = trunc_line32(va);
731
732 while ((eva - va) >= (32 * 32)) {
733 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
734 va += (32 * 32);
735 }
736
737 while (va < eva) {
738 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
739 va += 32;
740 }
741
742 SYNC;
743 }
744
745 void
746 mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
747 {
748 vm_offset_t eva;
749
750 eva = round_line64(va + size);
751 va = trunc_line64(va);
752
753 while ((eva - va) >= (32 * 64)) {
754 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
755 va += (32 * 64);
756 }
757
758 while (va < eva) {
759 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
760 va += 64;
761 }
762
763 SYNC;
764 }
765
766 void
767 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
768 {
769 vm_offset_t eva;
770
771 eva = round_line16(va + size);
772 va = trunc_line16(va);
773
774 while ((eva - va) >= (32 * 16)) {
775 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
776 va += (32 * 16);
777 }
778
779 while (va < eva) {
780 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
781 va += 16;
782 }
783
784 SYNC;
785 }
786
787 void
788 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
789 {
790 vm_offset_t eva;
791
792 eva = round_line32(va + size);
793 va = trunc_line32(va);
794
795 while ((eva - va) >= (32 * 32)) {
796 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
797 va += (32 * 32);
798 }
799
800 while (va < eva) {
801 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
802 va += 32;
803 }
804
805 SYNC;
806 }
807
808 void
809 mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
810 {
811 vm_offset_t eva;
812
813 eva = round_line64(va + size);
814 va = trunc_line64(va);
815
816 while ((eva - va) >= (32 * 64)) {
817 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
818 va += (32 * 64);
819 }
820
821 while (va < eva) {
822 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
823 va += 64;
824 }
825
826 SYNC;
827 }
828
829 #ifdef CPU_CNMIPS
830
831 void
832 mipsNN_icache_sync_all_128(void)
833 {
834 SYNCI
835 }
836
837 void
838 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
839 {
840 SYNC;
841 }
842
843 void
844 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
845 {
846 }
847
848 void
849 mipsNN_pdcache_wbinv_all_128(void)
850 {
851 }
852
853 void
854 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
855 {
856 SYNC;
857 }
858
859 void
860 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
861 {
862 }
863
864 void
865 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
866 {
867 }
868
869 void
870 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
871 {
872 SYNC;
873 }
874
875 #else
876
877 void
878 mipsNN_icache_sync_all_128(void)
879 {
880 vm_offset_t va, eva;
881
882 va = MIPS_PHYS_TO_KSEG0(0);
883 eva = va + picache_size;
884
885 /*
886 * Since we're hitting the whole thing, we don't have to
887 * worry about the N different "ways".
888 */
889
890 mips_intern_dcache_wbinv_all();
891
892 while (va < eva) {
893 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
894 va += (32 * 128);
895 }
896
897 SYNC;
898 }
899
900 void
901 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
902 {
903 vm_offset_t eva;
904
905 eva = round_line128(va + size);
906 va = trunc_line128(va);
907
908 mips_intern_dcache_wb_range(va, (eva - va));
909
910 while ((eva - va) >= (32 * 128)) {
911 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
912 va += (32 * 128);
913 }
914
915 while (va < eva) {
916 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
917 va += 128;
918 }
919
920 SYNC;
921 }
922
923 void
924 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
925 {
926 vm_offset_t eva, tmpva;
927 int i, stride, loopcount;
928
929 /*
930 * Since we're doing Index ops, we expect to not be able
931 * to access the address we've been given. So, get the
932 * bits that determine the cache index, and make a KSEG0
933 * address out of them.
934 */
935 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
936
937 eva = round_line128(va + size);
938 va = trunc_line128(va);
939
940 /*
941 * GCC generates better code in the loops if we reference local
942 * copies of these global variables.
943 */
944 stride = picache_stride;
945 loopcount = picache_loopcount;
946
947 mips_intern_dcache_wbinv_range_index(va, (eva - va));
948
949 while ((eva - va) >= (32 * 128)) {
950 tmpva = va;
951 for (i = 0; i < loopcount; i++, tmpva += stride)
952 cache_r4k_op_32lines_128(tmpva,
953 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
954 va += 32 * 128;
955 }
956
957 while (va < eva) {
958 tmpva = va;
959 for (i = 0; i < loopcount; i++, tmpva += stride)
960 cache_op_r4k_line(tmpva,
961 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
962 va += 128;
963 }
964 }
965
966 void
967 mipsNN_pdcache_wbinv_all_128(void)
968 {
969 vm_offset_t va, eva;
970
971 va = MIPS_PHYS_TO_KSEG0(0);
972 eva = va + pdcache_size;
973
974 /*
975 * Since we're hitting the whole thing, we don't have to
976 * worry about the N different "ways".
977 */
978
979 while (va < eva) {
980 cache_r4k_op_32lines_128(va,
981 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
982 va += (32 * 128);
983 }
984
985 SYNC;
986 }
987
988 void
989 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
990 {
991 vm_offset_t eva;
992
993 eva = round_line128(va + size);
994 va = trunc_line128(va);
995
996 while ((eva - va) >= (32 * 128)) {
997 cache_r4k_op_32lines_128(va,
998 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
999 va += (32 * 128);
1000 }
1001
1002 while (va < eva) {
1003 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
1004 va += 128;
1005 }
1006
1007 SYNC;
1008 }
1009
1010 void
1011 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1012 {
1013 vm_offset_t eva, tmpva;
1014 int i, stride, loopcount;
1015
1016 /*
1017 * Since we're doing Index ops, we expect to not be able
1018 * to access the address we've been given. So, get the
1019 * bits that determine the cache index, and make a KSEG0
1020 * address out of them.
1021 */
1022 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
1023
1024 eva = round_line128(va + size);
1025 va = trunc_line128(va);
1026
1027 /*
1028 * GCC generates better code in the loops if we reference local
1029 * copies of these global variables.
1030 */
1031 stride = pdcache_stride;
1032 loopcount = pdcache_loopcount;
1033
1034 while ((eva - va) >= (32 * 128)) {
1035 tmpva = va;
1036 for (i = 0; i < loopcount; i++, tmpva += stride)
1037 cache_r4k_op_32lines_128(tmpva,
1038 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1039 va += 32 * 128;
1040 }
1041
1042 while (va < eva) {
1043 tmpva = va;
1044 for (i = 0; i < loopcount; i++, tmpva += stride)
1045 cache_op_r4k_line(tmpva,
1046 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1047 va += 128;
1048 }
1049 }
1050
1051 void
1052 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1053 {
1054 vm_offset_t eva;
1055
1056 eva = round_line128(va + size);
1057 va = trunc_line128(va);
1058
1059 while ((eva - va) >= (32 * 128)) {
1060 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1061 va += (32 * 128);
1062 }
1063
1064 while (va < eva) {
1065 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1066 va += 128;
1067 }
1068
1069 SYNC;
1070 }
1071
1072 void
1073 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1074 {
1075 vm_offset_t eva;
1076
1077 eva = round_line128(va + size);
1078 va = trunc_line128(va);
1079
1080 while ((eva - va) >= (32 * 128)) {
1081 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1082 va += (32 * 128);
1083 }
1084
1085 while (va < eva) {
1086 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1087 va += 128;
1088 }
1089
1090 SYNC;
1091 }
1092
1093 #endif
1094
1095 void
1096 mipsNN_sdcache_wbinv_all_32(void)
1097 {
1098 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1099 vm_offset_t eva = va + sdcache_size;
1100
1101 while (va < eva) {
1102 cache_r4k_op_32lines_32(va,
1103 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1104 va += (32 * 32);
1105 }
1106 }
1107
1108 void
1109 mipsNN_sdcache_wbinv_all_64(void)
1110 {
1111 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1112 vm_offset_t eva = va + sdcache_size;
1113
1114 while (va < eva) {
1115 cache_r4k_op_32lines_64(va,
1116 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1117 va += (32 * 64);
1118 }
1119 }
1120
1121 void
1122 mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
1123 {
1124 vm_offset_t eva = round_line32(va + size);
1125
1126 va = trunc_line32(va);
1127
1128 while ((eva - va) >= (32 * 32)) {
1129 cache_r4k_op_32lines_32(va,
1130 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1131 va += (32 * 32);
1132 }
1133
1134 while (va < eva) {
1135 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1136 va += 32;
1137 }
1138 }
1139
1140 void
1141 mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
1142 {
1143 vm_offset_t eva = round_line64(va + size);
1144
1145 va = trunc_line64(va);
1146
1147 while ((eva - va) >= (32 * 64)) {
1148 cache_r4k_op_32lines_64(va,
1149 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1150 va += (32 * 64);
1151 }
1152
1153 while (va < eva) {
1154 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1155 va += 64;
1156 }
1157 }
1158
1159 void
1160 mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
1161 {
1162 vm_offset_t eva;
1163
1164 /*
1165 * Since we're doing Index ops, we expect to not be able
1166 * to access the address we've been given. So, get the
1167 * bits that determine the cache index, and make a KSEG0
1168 * address out of them.
1169 */
1170 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1171
1172 eva = round_line32(va + size);
1173 va = trunc_line32(va);
1174
1175 while ((eva - va) >= (32 * 32)) {
1176 cache_r4k_op_32lines_32(va,
1177 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1178 va += (32 * 32);
1179 }
1180
1181 while (va < eva) {
1182 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1183 va += 32;
1184 }
1185 }
1186
1187 void
1188 mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
1189 {
1190 vm_offset_t eva;
1191
1192 /*
1193 * Since we're doing Index ops, we expect to not be able
1194 * to access the address we've been given. So, get the
1195 * bits that determine the cache index, and make a KSEG0
1196 * address out of them.
1197 */
1198 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1199
1200 eva = round_line64(va + size);
1201 va = trunc_line64(va);
1202
1203 while ((eva - va) >= (32 * 64)) {
1204 cache_r4k_op_32lines_64(va,
1205 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1206 va += (32 * 64);
1207 }
1208
1209 while (va < eva) {
1210 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1211 va += 64;
1212 }
1213 }
1214
1215 void
1216 mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
1217 {
1218 vm_offset_t eva = round_line32(va + size);
1219
1220 va = trunc_line32(va);
1221
1222 while ((eva - va) >= (32 * 32)) {
1223 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1224 va += (32 * 32);
1225 }
1226
1227 while (va < eva) {
1228 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1229 va += 32;
1230 }
1231 }
1232
1233 void
1234 mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
1235 {
1236 vm_offset_t eva = round_line64(va + size);
1237
1238 va = trunc_line64(va);
1239
1240 while ((eva - va) >= (32 * 64)) {
1241 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1242 va += (32 * 64);
1243 }
1244
1245 while (va < eva) {
1246 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1247 va += 64;
1248 }
1249 }
1250
1251 void
1252 mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
1253 {
1254 vm_offset_t eva = round_line32(va + size);
1255
1256 va = trunc_line32(va);
1257
1258 while ((eva - va) >= (32 * 32)) {
1259 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1260 va += (32 * 32);
1261 }
1262
1263 while (va < eva) {
1264 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1265 va += 32;
1266 }
1267 }
1268
1269 void
1270 mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
1271 {
1272 vm_offset_t eva = round_line64(va + size);
1273
1274 va = trunc_line64(va);
1275
1276 while ((eva - va) >= (32 * 64)) {
1277 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1278 va += (32 * 64);
1279 }
1280
1281 while (va < eva) {
1282 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1283 va += 64;
1284 }
1285 }
1286
1287 void
1288 mipsNN_sdcache_wbinv_all_128(void)
1289 {
1290 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1291 vm_offset_t eva = va + sdcache_size;
1292
1293 while (va < eva) {
1294 cache_r4k_op_32lines_128(va,
1295 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1296 va += (32 * 128);
1297 }
1298 }
1299
1300 void
1301 mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
1302 {
1303 vm_offset_t eva = round_line128(va + size);
1304
1305 va = trunc_line128(va);
1306
1307 while ((eva - va) >= (32 * 128)) {
1308 cache_r4k_op_32lines_128(va,
1309 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1310 va += (32 * 128);
1311 }
1312
1313 while (va < eva) {
1314 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1315 va += 128;
1316 }
1317 }
1318
1319 void
1320 mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1321 {
1322 vm_offset_t eva;
1323
1324 /*
1325 * Since we're doing Index ops, we expect to not be able
1326 * to access the address we've been given. So, get the
1327 * bits that determine the cache index, and make a KSEG0
1328 * address out of them.
1329 */
1330 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1331
1332 eva = round_line128(va + size);
1333 va = trunc_line128(va);
1334
1335 while ((eva - va) >= (32 * 128)) {
1336 cache_r4k_op_32lines_128(va,
1337 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1338 va += (32 * 128);
1339 }
1340
1341 while (va < eva) {
1342 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1343 va += 128;
1344 }
1345 }
1346
1347 void
1348 mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1349 {
1350 vm_offset_t eva = round_line128(va + size);
1351
1352 va = trunc_line128(va);
1353
1354 while ((eva - va) >= (32 * 128)) {
1355 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1356 va += (32 * 128);
1357 }
1358
1359 while (va < eva) {
1360 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1361 va += 128;
1362 }
1363 }
1364
1365 void
1366 mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1367 {
1368 vm_offset_t eva = round_line128(va + size);
1369
1370 va = trunc_line128(va);
1371
1372 while ((eva - va) >= (32 * 128)) {
1373 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1374 va += (32 * 128);
1375 }
1376
1377 while (va < eva) {
1378 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1379 va += 128;
1380 }
1381 }
Cache object: 3c540932bc36f0bb13af4672e1be013f
|