1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
2
3 /*
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright 2001 Wasabi Systems, Inc.
7 * All rights reserved.
8 *
9 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed for the NetBSD Project by
22 * Wasabi Systems, Inc.
23 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
24 * or promote products derived from this software without specific prior
25 * written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/types.h>
44 #include <sys/systm.h>
45 #include <sys/param.h>
46
47 #include <machine/cache.h>
48 #include <machine/cache_r4k.h>
49 #include <machine/cpuinfo.h>
50
51 #define round_line16(x) (((x) + 15) & ~15)
52 #define trunc_line16(x) ((x) & ~15)
53
54 #define round_line32(x) (((x) + 31) & ~31)
55 #define trunc_line32(x) ((x) & ~31)
56
57 #define round_line64(x) (((x) + 63) & ~63)
58 #define trunc_line64(x) ((x) & ~63)
59
60 #define round_line128(x) (((x) + 127) & ~127)
61 #define trunc_line128(x) ((x) & ~127)
62
63 #if defined(CPU_NLM)
64 static __inline void
65 xlp_sync(void)
66 {
67 __asm __volatile (
68 ".set push \n"
69 ".set noreorder \n"
70 ".set mips64 \n"
71 "dla $8, 1f \n"
72 "/* jr.hb $8 */ \n"
73 ".word 0x1000408 \n"
74 "nop \n"
75 "1: nop \n"
76 ".set pop \n"
77 : : : "$8");
78 }
79 #endif
80
81 #if defined(SB1250_PASS1)
82 #define SYNC __asm volatile("sync; sync")
83 #elif defined(CPU_NLM)
84 #define SYNC xlp_sync()
85 #else
86 #define SYNC __asm volatile("sync")
87 #endif
88
89 #if defined(CPU_CNMIPS)
90 #define SYNCI mips_sync_icache();
91 #elif defined(CPU_NLM)
92 #define SYNCI xlp_sync()
93 #else
94 #define SYNCI
95 #endif
96
97 /*
98 * Exported variables for consumers like bus_dma code
99 */
100 int mips_picache_linesize;
101 int mips_pdcache_linesize;
102 int mips_sdcache_linesize;
103 int mips_dcache_max_linesize;
104
105 static int picache_size;
106 static int picache_stride;
107 static int picache_loopcount;
108 static int picache_way_mask;
109 static int pdcache_size;
110 static int pdcache_stride;
111 static int pdcache_loopcount;
112 static int pdcache_way_mask;
113 static int sdcache_size;
114 static int sdcache_stride;
115 static int sdcache_loopcount;
116 static int sdcache_way_mask;
117
118 void
119 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
120 {
121 int flush_multiple_lines_per_way;
122
123 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
124 if (cpuinfo->icache_virtual) {
125 /*
126 * With a virtual Icache we don't need to flush
127 * multiples of the page size with index ops; we just
128 * need to flush one pages' worth.
129 */
130 flush_multiple_lines_per_way = 0;
131 }
132
133 if (flush_multiple_lines_per_way) {
134 picache_stride = PAGE_SIZE;
135 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
136 cpuinfo->l1.ic_nways;
137 } else {
138 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
139 picache_loopcount = cpuinfo->l1.ic_nways;
140 }
141
142 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
143 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
144 pdcache_loopcount = cpuinfo->l1.dc_nways;
145 } else {
146 pdcache_stride = PAGE_SIZE;
147 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
148 cpuinfo->l1.dc_nways;
149 }
150
151 mips_picache_linesize = cpuinfo->l1.ic_linesize;
152 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
153
154 picache_size = cpuinfo->l1.ic_size;
155 picache_way_mask = cpuinfo->l1.ic_nways - 1;
156 pdcache_size = cpuinfo->l1.dc_size;
157 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
158
159 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
160 sdcache_loopcount = cpuinfo->l2.dc_nways;
161 sdcache_size = cpuinfo->l2.dc_size;
162 sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
163
164 mips_sdcache_linesize = cpuinfo->l2.dc_linesize;
165 mips_dcache_max_linesize = MAX(mips_pdcache_linesize,
166 mips_sdcache_linesize);
167
168 #define CACHE_DEBUG
169 #ifdef CACHE_DEBUG
170 printf("Cache info:\n");
171 if (cpuinfo->icache_virtual)
172 printf(" icache is virtual\n");
173 printf(" picache_stride = %d\n", picache_stride);
174 printf(" picache_loopcount = %d\n", picache_loopcount);
175 printf(" pdcache_stride = %d\n", pdcache_stride);
176 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
177 printf(" max line size = %d\n", mips_dcache_max_linesize);
178 #endif
179 }
180
181 void
182 mipsNN_icache_sync_all_16(void)
183 {
184 vm_offset_t va, eva;
185
186 va = MIPS_PHYS_TO_KSEG0(0);
187 eva = va + picache_size;
188
189 /*
190 * Since we're hitting the whole thing, we don't have to
191 * worry about the N different "ways".
192 */
193
194 mips_intern_dcache_wbinv_all();
195
196 while (va < eva) {
197 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
198 va += (32 * 16);
199 }
200
201 SYNC;
202 }
203
204 void
205 mipsNN_icache_sync_all_32(void)
206 {
207 vm_offset_t va, eva;
208
209 va = MIPS_PHYS_TO_KSEG0(0);
210 eva = va + picache_size;
211
212 /*
213 * Since we're hitting the whole thing, we don't have to
214 * worry about the N different "ways".
215 */
216
217 mips_intern_dcache_wbinv_all();
218
219 while (va < eva) {
220 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
221 va += (32 * 32);
222 }
223
224 SYNC;
225 }
226
227 void
228 mipsNN_icache_sync_all_64(void)
229 {
230 vm_offset_t va, eva;
231
232 va = MIPS_PHYS_TO_KSEG0(0);
233 eva = va + picache_size;
234
235 /*
236 * Since we're hitting the whole thing, we don't have to
237 * worry about the N different "ways".
238 */
239
240 mips_intern_dcache_wbinv_all();
241
242 while (va < eva) {
243 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
244 va += (32 * 64);
245 }
246
247 SYNC;
248 }
249
250 void
251 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
252 {
253 vm_offset_t eva;
254
255 eva = round_line16(va + size);
256 va = trunc_line16(va);
257
258 mips_intern_dcache_wb_range(va, (eva - va));
259
260 while ((eva - va) >= (32 * 16)) {
261 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
262 va += (32 * 16);
263 }
264
265 while (va < eva) {
266 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
267 va += 16;
268 }
269
270 SYNC;
271 }
272
273 void
274 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
275 {
276 vm_offset_t eva;
277
278 eva = round_line32(va + size);
279 va = trunc_line32(va);
280
281 mips_intern_dcache_wb_range(va, (eva - va));
282
283 while ((eva - va) >= (32 * 32)) {
284 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
285 va += (32 * 32);
286 }
287
288 while (va < eva) {
289 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
290 va += 32;
291 }
292
293 SYNC;
294 }
295
296 void
297 mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
298 {
299 vm_offset_t eva;
300
301 eva = round_line64(va + size);
302 va = trunc_line64(va);
303
304 mips_intern_dcache_wb_range(va, (eva - va));
305
306 while ((eva - va) >= (32 * 64)) {
307 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
308 va += (32 * 64);
309 }
310
311 while (va < eva) {
312 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
313 va += 64;
314 }
315
316 SYNC;
317 }
318
319 void
320 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
321 {
322 vm_offset_t eva, tmpva;
323 int i, stride, loopcount;
324
325 /*
326 * Since we're doing Index ops, we expect to not be able
327 * to access the address we've been given. So, get the
328 * bits that determine the cache index, and make a KSEG0
329 * address out of them.
330 */
331 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
332
333 eva = round_line16(va + size);
334 va = trunc_line16(va);
335
336 /*
337 * GCC generates better code in the loops if we reference local
338 * copies of these global variables.
339 */
340 stride = picache_stride;
341 loopcount = picache_loopcount;
342
343 mips_intern_dcache_wbinv_range_index(va, (eva - va));
344
345 while ((eva - va) >= (8 * 16)) {
346 tmpva = va;
347 for (i = 0; i < loopcount; i++, tmpva += stride)
348 cache_r4k_op_8lines_16(tmpva,
349 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
350 va += 8 * 16;
351 }
352
353 while (va < eva) {
354 tmpva = va;
355 for (i = 0; i < loopcount; i++, tmpva += stride)
356 cache_op_r4k_line(tmpva,
357 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
358 va += 16;
359 }
360 }
361
362 void
363 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
364 {
365 vm_offset_t eva, tmpva;
366 int i, stride, loopcount;
367
368 /*
369 * Since we're doing Index ops, we expect to not be able
370 * to access the address we've been given. So, get the
371 * bits that determine the cache index, and make a KSEG0
372 * address out of them.
373 */
374 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
375
376 eva = round_line32(va + size);
377 va = trunc_line32(va);
378
379 /*
380 * GCC generates better code in the loops if we reference local
381 * copies of these global variables.
382 */
383 stride = picache_stride;
384 loopcount = picache_loopcount;
385
386 mips_intern_dcache_wbinv_range_index(va, (eva - va));
387
388 while ((eva - va) >= (8 * 32)) {
389 tmpva = va;
390 for (i = 0; i < loopcount; i++, tmpva += stride)
391 cache_r4k_op_8lines_32(tmpva,
392 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
393 va += 8 * 32;
394 }
395
396 while (va < eva) {
397 tmpva = va;
398 for (i = 0; i < loopcount; i++, tmpva += stride)
399 cache_op_r4k_line(tmpva,
400 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
401 va += 32;
402 }
403 }
404
405 void
406 mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
407 {
408 vm_offset_t eva, tmpva;
409 int i, stride, loopcount;
410
411 /*
412 * Since we're doing Index ops, we expect to not be able
413 * to access the address we've been given. So, get the
414 * bits that determine the cache index, and make a KSEG0
415 * address out of them.
416 */
417 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
418
419 eva = round_line64(va + size);
420 va = trunc_line64(va);
421
422 /*
423 * GCC generates better code in the loops if we reference local
424 * copies of these global variables.
425 */
426 stride = picache_stride;
427 loopcount = picache_loopcount;
428
429 mips_intern_dcache_wbinv_range_index(va, (eva - va));
430
431 while ((eva - va) >= (8 * 64)) {
432 tmpva = va;
433 for (i = 0; i < loopcount; i++, tmpva += stride)
434 cache_r4k_op_8lines_64(tmpva,
435 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
436 va += 8 * 64;
437 }
438
439 while (va < eva) {
440 tmpva = va;
441 for (i = 0; i < loopcount; i++, tmpva += stride)
442 cache_op_r4k_line(tmpva,
443 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
444 va += 64;
445 }
446 }
447
448 void
449 mipsNN_pdcache_wbinv_all_16(void)
450 {
451 vm_offset_t va, eva;
452
453 va = MIPS_PHYS_TO_KSEG0(0);
454 eva = va + pdcache_size;
455
456 /*
457 * Since we're hitting the whole thing, we don't have to
458 * worry about the N different "ways".
459 */
460
461 while (va < eva) {
462 cache_r4k_op_32lines_16(va,
463 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
464 va += (32 * 16);
465 }
466
467 SYNC;
468 }
469
470 void
471 mipsNN_pdcache_wbinv_all_32(void)
472 {
473 vm_offset_t va, eva;
474
475 va = MIPS_PHYS_TO_KSEG0(0);
476 eva = va + pdcache_size;
477
478 /*
479 * Since we're hitting the whole thing, we don't have to
480 * worry about the N different "ways".
481 */
482
483 while (va < eva) {
484 cache_r4k_op_32lines_32(va,
485 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
486 va += (32 * 32);
487 }
488
489 SYNC;
490 }
491
492 void
493 mipsNN_pdcache_wbinv_all_64(void)
494 {
495 vm_offset_t va, eva;
496
497 va = MIPS_PHYS_TO_KSEG0(0);
498 eva = va + pdcache_size;
499
500 /*
501 * Since we're hitting the whole thing, we don't have to
502 * worry about the N different "ways".
503 */
504
505 while (va < eva) {
506 cache_r4k_op_32lines_64(va,
507 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
508 va += (32 * 64);
509 }
510
511 SYNC;
512 }
513
514 void
515 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
516 {
517 vm_offset_t eva;
518
519 eva = round_line16(va + size);
520 va = trunc_line16(va);
521
522 while ((eva - va) >= (32 * 16)) {
523 cache_r4k_op_32lines_16(va,
524 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
525 va += (32 * 16);
526 }
527
528 while (va < eva) {
529 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
530 va += 16;
531 }
532
533 SYNC;
534 }
535
536 void
537 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
538 {
539 vm_offset_t eva;
540
541 eva = round_line32(va + size);
542 va = trunc_line32(va);
543
544 while ((eva - va) >= (32 * 32)) {
545 cache_r4k_op_32lines_32(va,
546 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
547 va += (32 * 32);
548 }
549
550 while (va < eva) {
551 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
552 va += 32;
553 }
554
555 SYNC;
556 }
557
558 void
559 mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
560 {
561 vm_offset_t eva;
562
563 eva = round_line64(va + size);
564 va = trunc_line64(va);
565
566 while ((eva - va) >= (32 * 64)) {
567 cache_r4k_op_32lines_64(va,
568 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
569 va += (32 * 64);
570 }
571
572 while (va < eva) {
573 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
574 va += 64;
575 }
576
577 SYNC;
578 }
579
580 void
581 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
582 {
583 vm_offset_t eva, tmpva;
584 int i, stride, loopcount;
585
586 /*
587 * Since we're doing Index ops, we expect to not be able
588 * to access the address we've been given. So, get the
589 * bits that determine the cache index, and make a KSEG0
590 * address out of them.
591 */
592 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
593
594 eva = round_line16(va + size);
595 va = trunc_line16(va);
596
597 /*
598 * GCC generates better code in the loops if we reference local
599 * copies of these global variables.
600 */
601 stride = pdcache_stride;
602 loopcount = pdcache_loopcount;
603
604 while ((eva - va) >= (8 * 16)) {
605 tmpva = va;
606 for (i = 0; i < loopcount; i++, tmpva += stride)
607 cache_r4k_op_8lines_16(tmpva,
608 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
609 va += 8 * 16;
610 }
611
612 while (va < eva) {
613 tmpva = va;
614 for (i = 0; i < loopcount; i++, tmpva += stride)
615 cache_op_r4k_line(tmpva,
616 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
617 va += 16;
618 }
619 }
620
621 void
622 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
623 {
624 vm_offset_t eva, tmpva;
625 int i, stride, loopcount;
626
627 /*
628 * Since we're doing Index ops, we expect to not be able
629 * to access the address we've been given. So, get the
630 * bits that determine the cache index, and make a KSEG0
631 * address out of them.
632 */
633 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
634
635 eva = round_line32(va + size);
636 va = trunc_line32(va);
637
638 /*
639 * GCC generates better code in the loops if we reference local
640 * copies of these global variables.
641 */
642 stride = pdcache_stride;
643 loopcount = pdcache_loopcount;
644
645 while ((eva - va) >= (8 * 32)) {
646 tmpva = va;
647 for (i = 0; i < loopcount; i++, tmpva += stride)
648 cache_r4k_op_8lines_32(tmpva,
649 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
650 va += 8 * 32;
651 }
652
653 while (va < eva) {
654 tmpva = va;
655 for (i = 0; i < loopcount; i++, tmpva += stride)
656 cache_op_r4k_line(tmpva,
657 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
658 va += 32;
659 }
660 }
661
662 void
663 mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
664 {
665 vm_offset_t eva, tmpva;
666 int i, stride, loopcount;
667
668 /*
669 * Since we're doing Index ops, we expect to not be able
670 * to access the address we've been given. So, get the
671 * bits that determine the cache index, and make a KSEG0
672 * address out of them.
673 */
674 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
675
676 eva = round_line64(va + size);
677 va = trunc_line64(va);
678
679 /*
680 * GCC generates better code in the loops if we reference local
681 * copies of these global variables.
682 */
683 stride = pdcache_stride;
684 loopcount = pdcache_loopcount;
685
686 while ((eva - va) >= (8 * 64)) {
687 tmpva = va;
688 for (i = 0; i < loopcount; i++, tmpva += stride)
689 cache_r4k_op_8lines_64(tmpva,
690 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
691 va += 8 * 64;
692 }
693
694 while (va < eva) {
695 tmpva = va;
696 for (i = 0; i < loopcount; i++, tmpva += stride)
697 cache_op_r4k_line(tmpva,
698 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
699 va += 64;
700 }
701 }
702
703 void
704 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
705 {
706 vm_offset_t eva;
707
708 eva = round_line16(va + size);
709 va = trunc_line16(va);
710
711 while ((eva - va) >= (32 * 16)) {
712 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
713 va += (32 * 16);
714 }
715
716 while (va < eva) {
717 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
718 va += 16;
719 }
720
721 SYNC;
722 }
723
724 void
725 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
726 {
727 vm_offset_t eva;
728
729 eva = round_line32(va + size);
730 va = trunc_line32(va);
731
732 while ((eva - va) >= (32 * 32)) {
733 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
734 va += (32 * 32);
735 }
736
737 while (va < eva) {
738 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
739 va += 32;
740 }
741
742 SYNC;
743 }
744
745 void
746 mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
747 {
748 vm_offset_t eva;
749
750 eva = round_line64(va + size);
751 va = trunc_line64(va);
752
753 while ((eva - va) >= (32 * 64)) {
754 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
755 va += (32 * 64);
756 }
757
758 while (va < eva) {
759 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
760 va += 64;
761 }
762
763 SYNC;
764 }
765
766 void
767 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
768 {
769 vm_offset_t eva;
770
771 eva = round_line16(va + size);
772 va = trunc_line16(va);
773
774 while ((eva - va) >= (32 * 16)) {
775 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
776 va += (32 * 16);
777 }
778
779 while (va < eva) {
780 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
781 va += 16;
782 }
783
784 SYNC;
785 }
786
787 void
788 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
789 {
790 vm_offset_t eva;
791
792 eva = round_line32(va + size);
793 va = trunc_line32(va);
794
795 while ((eva - va) >= (32 * 32)) {
796 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
797 va += (32 * 32);
798 }
799
800 while (va < eva) {
801 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
802 va += 32;
803 }
804
805 SYNC;
806 }
807
808 void
809 mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
810 {
811 vm_offset_t eva;
812
813 eva = round_line64(va + size);
814 va = trunc_line64(va);
815
816 while ((eva - va) >= (32 * 64)) {
817 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
818 va += (32 * 64);
819 }
820
821 while (va < eva) {
822 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
823 va += 64;
824 }
825
826 SYNC;
827 }
828
829 #ifdef CPU_CNMIPS
830
831 void
832 mipsNN_icache_sync_all_128(void)
833 {
834 SYNCI
835 }
836
837 void
838 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
839 {
840 SYNC;
841 }
842
843 void
844 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
845 {
846 }
847
848
849 void
850 mipsNN_pdcache_wbinv_all_128(void)
851 {
852 }
853
854
855 void
856 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
857 {
858 SYNC;
859 }
860
861 void
862 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
863 {
864 }
865
866 void
867 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
868 {
869 }
870
871 void
872 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
873 {
874 SYNC;
875 }
876
877 #else
878
879 void
880 mipsNN_icache_sync_all_128(void)
881 {
882 vm_offset_t va, eva;
883
884 va = MIPS_PHYS_TO_KSEG0(0);
885 eva = va + picache_size;
886
887 /*
888 * Since we're hitting the whole thing, we don't have to
889 * worry about the N different "ways".
890 */
891
892 mips_intern_dcache_wbinv_all();
893
894 while (va < eva) {
895 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
896 va += (32 * 128);
897 }
898
899 SYNC;
900 }
901
902 void
903 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
904 {
905 vm_offset_t eva;
906
907 eva = round_line128(va + size);
908 va = trunc_line128(va);
909
910 mips_intern_dcache_wb_range(va, (eva - va));
911
912 while ((eva - va) >= (32 * 128)) {
913 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
914 va += (32 * 128);
915 }
916
917 while (va < eva) {
918 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
919 va += 128;
920 }
921
922 SYNC;
923 }
924
925 void
926 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
927 {
928 vm_offset_t eva, tmpva;
929 int i, stride, loopcount;
930
931 /*
932 * Since we're doing Index ops, we expect to not be able
933 * to access the address we've been given. So, get the
934 * bits that determine the cache index, and make a KSEG0
935 * address out of them.
936 */
937 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
938
939 eva = round_line128(va + size);
940 va = trunc_line128(va);
941
942 /*
943 * GCC generates better code in the loops if we reference local
944 * copies of these global variables.
945 */
946 stride = picache_stride;
947 loopcount = picache_loopcount;
948
949 mips_intern_dcache_wbinv_range_index(va, (eva - va));
950
951 while ((eva - va) >= (32 * 128)) {
952 tmpva = va;
953 for (i = 0; i < loopcount; i++, tmpva += stride)
954 cache_r4k_op_32lines_128(tmpva,
955 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
956 va += 32 * 128;
957 }
958
959 while (va < eva) {
960 tmpva = va;
961 for (i = 0; i < loopcount; i++, tmpva += stride)
962 cache_op_r4k_line(tmpva,
963 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
964 va += 128;
965 }
966 }
967
968 void
969 mipsNN_pdcache_wbinv_all_128(void)
970 {
971 vm_offset_t va, eva;
972
973 va = MIPS_PHYS_TO_KSEG0(0);
974 eva = va + pdcache_size;
975
976 /*
977 * Since we're hitting the whole thing, we don't have to
978 * worry about the N different "ways".
979 */
980
981 while (va < eva) {
982 cache_r4k_op_32lines_128(va,
983 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
984 va += (32 * 128);
985 }
986
987 SYNC;
988 }
989
990
991 void
992 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
993 {
994 vm_offset_t eva;
995
996 eva = round_line128(va + size);
997 va = trunc_line128(va);
998
999 while ((eva - va) >= (32 * 128)) {
1000 cache_r4k_op_32lines_128(va,
1001 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
1002 va += (32 * 128);
1003 }
1004
1005 while (va < eva) {
1006 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
1007 va += 128;
1008 }
1009
1010 SYNC;
1011 }
1012
1013 void
1014 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1015 {
1016 vm_offset_t eva, tmpva;
1017 int i, stride, loopcount;
1018
1019 /*
1020 * Since we're doing Index ops, we expect to not be able
1021 * to access the address we've been given. So, get the
1022 * bits that determine the cache index, and make a KSEG0
1023 * address out of them.
1024 */
1025 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
1026
1027 eva = round_line128(va + size);
1028 va = trunc_line128(va);
1029
1030 /*
1031 * GCC generates better code in the loops if we reference local
1032 * copies of these global variables.
1033 */
1034 stride = pdcache_stride;
1035 loopcount = pdcache_loopcount;
1036
1037 while ((eva - va) >= (32 * 128)) {
1038 tmpva = va;
1039 for (i = 0; i < loopcount; i++, tmpva += stride)
1040 cache_r4k_op_32lines_128(tmpva,
1041 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1042 va += 32 * 128;
1043 }
1044
1045 while (va < eva) {
1046 tmpva = va;
1047 for (i = 0; i < loopcount; i++, tmpva += stride)
1048 cache_op_r4k_line(tmpva,
1049 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1050 va += 128;
1051 }
1052 }
1053
1054 void
1055 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1056 {
1057 vm_offset_t eva;
1058
1059 eva = round_line128(va + size);
1060 va = trunc_line128(va);
1061
1062 while ((eva - va) >= (32 * 128)) {
1063 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1064 va += (32 * 128);
1065 }
1066
1067 while (va < eva) {
1068 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1069 va += 128;
1070 }
1071
1072 SYNC;
1073 }
1074
1075 void
1076 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1077 {
1078 vm_offset_t eva;
1079
1080 eva = round_line128(va + size);
1081 va = trunc_line128(va);
1082
1083 while ((eva - va) >= (32 * 128)) {
1084 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1085 va += (32 * 128);
1086 }
1087
1088 while (va < eva) {
1089 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1090 va += 128;
1091 }
1092
1093 SYNC;
1094 }
1095
1096 #endif
1097
1098 void
1099 mipsNN_sdcache_wbinv_all_32(void)
1100 {
1101 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1102 vm_offset_t eva = va + sdcache_size;
1103
1104 while (va < eva) {
1105 cache_r4k_op_32lines_32(va,
1106 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1107 va += (32 * 32);
1108 }
1109 }
1110
1111 void
1112 mipsNN_sdcache_wbinv_all_64(void)
1113 {
1114 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1115 vm_offset_t eva = va + sdcache_size;
1116
1117 while (va < eva) {
1118 cache_r4k_op_32lines_64(va,
1119 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1120 va += (32 * 64);
1121 }
1122 }
1123
1124 void
1125 mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
1126 {
1127 vm_offset_t eva = round_line32(va + size);
1128
1129 va = trunc_line32(va);
1130
1131 while ((eva - va) >= (32 * 32)) {
1132 cache_r4k_op_32lines_32(va,
1133 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1134 va += (32 * 32);
1135 }
1136
1137 while (va < eva) {
1138 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1139 va += 32;
1140 }
1141 }
1142
1143 void
1144 mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
1145 {
1146 vm_offset_t eva = round_line64(va + size);
1147
1148 va = trunc_line64(va);
1149
1150 while ((eva - va) >= (32 * 64)) {
1151 cache_r4k_op_32lines_64(va,
1152 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1153 va += (32 * 64);
1154 }
1155
1156 while (va < eva) {
1157 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1158 va += 64;
1159 }
1160 }
1161
1162 void
1163 mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
1164 {
1165 vm_offset_t eva;
1166
1167 /*
1168 * Since we're doing Index ops, we expect to not be able
1169 * to access the address we've been given. So, get the
1170 * bits that determine the cache index, and make a KSEG0
1171 * address out of them.
1172 */
1173 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1174
1175 eva = round_line32(va + size);
1176 va = trunc_line32(va);
1177
1178 while ((eva - va) >= (32 * 32)) {
1179 cache_r4k_op_32lines_32(va,
1180 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1181 va += (32 * 32);
1182 }
1183
1184 while (va < eva) {
1185 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1186 va += 32;
1187 }
1188 }
1189
1190 void
1191 mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
1192 {
1193 vm_offset_t eva;
1194
1195 /*
1196 * Since we're doing Index ops, we expect to not be able
1197 * to access the address we've been given. So, get the
1198 * bits that determine the cache index, and make a KSEG0
1199 * address out of them.
1200 */
1201 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1202
1203 eva = round_line64(va + size);
1204 va = trunc_line64(va);
1205
1206 while ((eva - va) >= (32 * 64)) {
1207 cache_r4k_op_32lines_64(va,
1208 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1209 va += (32 * 64);
1210 }
1211
1212 while (va < eva) {
1213 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1214 va += 64;
1215 }
1216 }
1217
1218 void
1219 mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
1220 {
1221 vm_offset_t eva = round_line32(va + size);
1222
1223 va = trunc_line32(va);
1224
1225 while ((eva - va) >= (32 * 32)) {
1226 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1227 va += (32 * 32);
1228 }
1229
1230 while (va < eva) {
1231 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1232 va += 32;
1233 }
1234 }
1235
1236 void
1237 mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
1238 {
1239 vm_offset_t eva = round_line64(va + size);
1240
1241 va = trunc_line64(va);
1242
1243 while ((eva - va) >= (32 * 64)) {
1244 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1245 va += (32 * 64);
1246 }
1247
1248 while (va < eva) {
1249 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1250 va += 64;
1251 }
1252 }
1253
1254 void
1255 mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
1256 {
1257 vm_offset_t eva = round_line32(va + size);
1258
1259 va = trunc_line32(va);
1260
1261 while ((eva - va) >= (32 * 32)) {
1262 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1263 va += (32 * 32);
1264 }
1265
1266 while (va < eva) {
1267 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1268 va += 32;
1269 }
1270 }
1271
1272 void
1273 mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
1274 {
1275 vm_offset_t eva = round_line64(va + size);
1276
1277 va = trunc_line64(va);
1278
1279 while ((eva - va) >= (32 * 64)) {
1280 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1281 va += (32 * 64);
1282 }
1283
1284 while (va < eva) {
1285 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1286 va += 64;
1287 }
1288 }
1289
1290 void
1291 mipsNN_sdcache_wbinv_all_128(void)
1292 {
1293 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1294 vm_offset_t eva = va + sdcache_size;
1295
1296 while (va < eva) {
1297 cache_r4k_op_32lines_128(va,
1298 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1299 va += (32 * 128);
1300 }
1301 }
1302
1303 void
1304 mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
1305 {
1306 vm_offset_t eva = round_line128(va + size);
1307
1308 va = trunc_line128(va);
1309
1310 while ((eva - va) >= (32 * 128)) {
1311 cache_r4k_op_32lines_128(va,
1312 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1313 va += (32 * 128);
1314 }
1315
1316 while (va < eva) {
1317 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1318 va += 128;
1319 }
1320 }
1321
1322 void
1323 mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1324 {
1325 vm_offset_t eva;
1326
1327 /*
1328 * Since we're doing Index ops, we expect to not be able
1329 * to access the address we've been given. So, get the
1330 * bits that determine the cache index, and make a KSEG0
1331 * address out of them.
1332 */
1333 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1334
1335 eva = round_line128(va + size);
1336 va = trunc_line128(va);
1337
1338 while ((eva - va) >= (32 * 128)) {
1339 cache_r4k_op_32lines_128(va,
1340 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1341 va += (32 * 128);
1342 }
1343
1344 while (va < eva) {
1345 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1346 va += 128;
1347 }
1348 }
1349
1350 void
1351 mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1352 {
1353 vm_offset_t eva = round_line128(va + size);
1354
1355 va = trunc_line128(va);
1356
1357 while ((eva - va) >= (32 * 128)) {
1358 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1359 va += (32 * 128);
1360 }
1361
1362 while (va < eva) {
1363 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1364 va += 128;
1365 }
1366 }
1367
1368 void
1369 mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1370 {
1371 vm_offset_t eva = round_line128(va + size);
1372
1373 va = trunc_line128(va);
1374
1375 while ((eva - va) >= (32 * 128)) {
1376 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1377 va += (32 * 128);
1378 }
1379
1380 while (va < eva) {
1381 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1382 va += 128;
1383 }
1384 }
Cache object: b925d057c2f94aa889d3894e9af98aca
|