1 /* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */
2
3 /*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD: releng/8.3/sys/mips/mips/cache_mipsNN.c 215938 2010-11-27 12:26:40Z jchandra $");
40
41 #include "opt_cputype.h"
42
43 #include <sys/types.h>
44 #include <sys/systm.h>
45 #include <sys/param.h>
46
47 #include <machine/cache.h>
48 #include <machine/cache_r4k.h>
49 #include <machine/cpuinfo.h>
50
51 #define round_line16(x) (((x) + 15) & ~15)
52 #define trunc_line16(x) ((x) & ~15)
53
54 #define round_line32(x) (((x) + 31) & ~31)
55 #define trunc_line32(x) ((x) & ~31)
56
57
58 #ifdef SB1250_PASS1
59 #define SYNC __asm volatile("sync; sync")
60 #else
61 #define SYNC __asm volatile("sync")
62 #endif
63
64 #ifdef CPU_CNMIPS
65 #define SYNCI mips_sync_icache();
66 #else
67 #define SYNCI
68 #endif
69
70 /*
71 * Exported variables for consumers like bus_dma code
72 */
73 int mips_picache_linesize;
74 int mips_pdcache_linesize;
75
76 static int picache_size;
77 static int picache_stride;
78 static int picache_loopcount;
79 static int picache_way_mask;
80 static int pdcache_size;
81 static int pdcache_stride;
82 static int pdcache_loopcount;
83 static int pdcache_way_mask;
84
85 void
86 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
87 {
88 int flush_multiple_lines_per_way;
89
90 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
91 if (cpuinfo->icache_virtual) {
92 /*
93 * With a virtual Icache we don't need to flush
94 * multiples of the page size with index ops; we just
95 * need to flush one pages' worth.
96 */
97 flush_multiple_lines_per_way = 0;
98 }
99
100 if (flush_multiple_lines_per_way) {
101 picache_stride = PAGE_SIZE;
102 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
103 cpuinfo->l1.ic_nways;
104 } else {
105 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
106 picache_loopcount = cpuinfo->l1.ic_nways;
107 }
108
109 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
110 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
111 pdcache_loopcount = cpuinfo->l1.dc_nways;
112 } else {
113 pdcache_stride = PAGE_SIZE;
114 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
115 cpuinfo->l1.dc_nways;
116 }
117
118 mips_picache_linesize = cpuinfo->l1.ic_linesize;
119 mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
120
121 picache_size = cpuinfo->l1.ic_size;
122 picache_way_mask = cpuinfo->l1.ic_nways - 1;
123 pdcache_size = cpuinfo->l1.dc_size;
124 pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
125
126 #define CACHE_DEBUG
127 #ifdef CACHE_DEBUG
128 printf("Cache info:\n");
129 if (cpuinfo->icache_virtual)
130 printf(" icache is virtual\n");
131 printf(" picache_stride = %d\n", picache_stride);
132 printf(" picache_loopcount = %d\n", picache_loopcount);
133 printf(" pdcache_stride = %d\n", pdcache_stride);
134 printf(" pdcache_loopcount = %d\n", pdcache_loopcount);
135 #endif
136 }
137
138 void
139 mipsNN_icache_sync_all_16(void)
140 {
141 vm_offset_t va, eva;
142
143 va = MIPS_PHYS_TO_KSEG0(0);
144 eva = va + picache_size;
145
146 /*
147 * Since we're hitting the whole thing, we don't have to
148 * worry about the N different "ways".
149 */
150
151 mips_intern_dcache_wbinv_all();
152
153 while (va < eva) {
154 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
155 va += (32 * 16);
156 }
157
158 SYNC;
159 }
160
161 void
162 mipsNN_icache_sync_all_32(void)
163 {
164 vm_offset_t va, eva;
165
166 va = MIPS_PHYS_TO_KSEG0(0);
167 eva = va + picache_size;
168
169 /*
170 * Since we're hitting the whole thing, we don't have to
171 * worry about the N different "ways".
172 */
173
174 mips_intern_dcache_wbinv_all();
175
176 while (va < eva) {
177 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
178 va += (32 * 32);
179 }
180
181 SYNC;
182 }
183
184 void
185 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
186 {
187 vm_offset_t eva;
188
189 eva = round_line16(va + size);
190 va = trunc_line16(va);
191
192 mips_intern_dcache_wb_range(va, (eva - va));
193
194 while ((eva - va) >= (32 * 16)) {
195 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
196 va += (32 * 16);
197 }
198
199 while (va < eva) {
200 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
201 va += 16;
202 }
203
204 SYNC;
205 }
206
207 void
208 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
209 {
210 vm_offset_t eva;
211
212 eva = round_line32(va + size);
213 va = trunc_line32(va);
214
215 mips_intern_dcache_wb_range(va, (eva - va));
216
217 while ((eva - va) >= (32 * 32)) {
218 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
219 va += (32 * 32);
220 }
221
222 while (va < eva) {
223 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
224 va += 32;
225 }
226
227 SYNC;
228 }
229
230 void
231 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
232 {
233 unsigned int eva, tmpva;
234 int i, stride, loopcount;
235
236 /*
237 * Since we're doing Index ops, we expect to not be able
238 * to access the address we've been given. So, get the
239 * bits that determine the cache index, and make a KSEG0
240 * address out of them.
241 */
242 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
243
244 eva = round_line16(va + size);
245 va = trunc_line16(va);
246
247 /*
248 * GCC generates better code in the loops if we reference local
249 * copies of these global variables.
250 */
251 stride = picache_stride;
252 loopcount = picache_loopcount;
253
254 mips_intern_dcache_wbinv_range_index(va, (eva - va));
255
256 while ((eva - va) >= (8 * 16)) {
257 tmpva = va;
258 for (i = 0; i < loopcount; i++, tmpva += stride)
259 cache_r4k_op_8lines_16(tmpva,
260 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
261 va += 8 * 16;
262 }
263
264 while (va < eva) {
265 tmpva = va;
266 for (i = 0; i < loopcount; i++, tmpva += stride)
267 cache_op_r4k_line(tmpva,
268 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
269 va += 16;
270 }
271 }
272
273 void
274 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
275 {
276 unsigned int eva, tmpva;
277 int i, stride, loopcount;
278
279 /*
280 * Since we're doing Index ops, we expect to not be able
281 * to access the address we've been given. So, get the
282 * bits that determine the cache index, and make a KSEG0
283 * address out of them.
284 */
285 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
286
287 eva = round_line32(va + size);
288 va = trunc_line32(va);
289
290 /*
291 * GCC generates better code in the loops if we reference local
292 * copies of these global variables.
293 */
294 stride = picache_stride;
295 loopcount = picache_loopcount;
296
297 mips_intern_dcache_wbinv_range_index(va, (eva - va));
298
299 while ((eva - va) >= (8 * 32)) {
300 tmpva = va;
301 for (i = 0; i < loopcount; i++, tmpva += stride)
302 cache_r4k_op_8lines_32(tmpva,
303 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
304 va += 8 * 32;
305 }
306
307 while (va < eva) {
308 tmpva = va;
309 for (i = 0; i < loopcount; i++, tmpva += stride)
310 cache_op_r4k_line(tmpva,
311 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
312 va += 32;
313 }
314 }
315
316 void
317 mipsNN_pdcache_wbinv_all_16(void)
318 {
319 vm_offset_t va, eva;
320
321 va = MIPS_PHYS_TO_KSEG0(0);
322 eva = va + pdcache_size;
323
324 /*
325 * Since we're hitting the whole thing, we don't have to
326 * worry about the N different "ways".
327 */
328
329 while (va < eva) {
330 cache_r4k_op_32lines_16(va,
331 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
332 va += (32 * 16);
333 }
334
335 SYNC;
336 }
337
338 void
339 mipsNN_pdcache_wbinv_all_32(void)
340 {
341 vm_offset_t va, eva;
342
343 va = MIPS_PHYS_TO_KSEG0(0);
344 eva = va + pdcache_size;
345
346 /*
347 * Since we're hitting the whole thing, we don't have to
348 * worry about the N different "ways".
349 */
350
351 while (va < eva) {
352 cache_r4k_op_32lines_32(va,
353 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
354 va += (32 * 32);
355 }
356
357 SYNC;
358 }
359
360 void
361 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
362 {
363 vm_offset_t eva;
364
365 eva = round_line16(va + size);
366 va = trunc_line16(va);
367
368 while ((eva - va) >= (32 * 16)) {
369 cache_r4k_op_32lines_16(va,
370 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
371 va += (32 * 16);
372 }
373
374 while (va < eva) {
375 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
376 va += 16;
377 }
378
379 SYNC;
380 }
381
382 void
383 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
384 {
385 vm_offset_t eva;
386
387 eva = round_line32(va + size);
388 va = trunc_line32(va);
389
390 while ((eva - va) >= (32 * 32)) {
391 cache_r4k_op_32lines_32(va,
392 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
393 va += (32 * 32);
394 }
395
396 while (va < eva) {
397 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
398 va += 32;
399 }
400
401 SYNC;
402 }
403
404 void
405 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
406 {
407 vm_offset_t eva, tmpva;
408 int i, stride, loopcount;
409
410 /*
411 * Since we're doing Index ops, we expect to not be able
412 * to access the address we've been given. So, get the
413 * bits that determine the cache index, and make a KSEG0
414 * address out of them.
415 */
416 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
417
418 eva = round_line16(va + size);
419 va = trunc_line16(va);
420
421 /*
422 * GCC generates better code in the loops if we reference local
423 * copies of these global variables.
424 */
425 stride = pdcache_stride;
426 loopcount = pdcache_loopcount;
427
428 while ((eva - va) >= (8 * 16)) {
429 tmpva = va;
430 for (i = 0; i < loopcount; i++, tmpva += stride)
431 cache_r4k_op_8lines_16(tmpva,
432 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
433 va += 8 * 16;
434 }
435
436 while (va < eva) {
437 tmpva = va;
438 for (i = 0; i < loopcount; i++, tmpva += stride)
439 cache_op_r4k_line(tmpva,
440 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
441 va += 16;
442 }
443 }
444
445 void
446 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
447 {
448 vm_offset_t eva, tmpva;
449 int i, stride, loopcount;
450
451 /*
452 * Since we're doing Index ops, we expect to not be able
453 * to access the address we've been given. So, get the
454 * bits that determine the cache index, and make a KSEG0
455 * address out of them.
456 */
457 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
458
459 eva = round_line32(va + size);
460 va = trunc_line32(va);
461
462 /*
463 * GCC generates better code in the loops if we reference local
464 * copies of these global variables.
465 */
466 stride = pdcache_stride;
467 loopcount = pdcache_loopcount;
468
469 while ((eva - va) >= (8 * 32)) {
470 tmpva = va;
471 for (i = 0; i < loopcount; i++, tmpva += stride)
472 cache_r4k_op_8lines_32(tmpva,
473 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
474 va += 8 * 32;
475 }
476
477 while (va < eva) {
478 tmpva = va;
479 for (i = 0; i < loopcount; i++, tmpva += stride)
480 cache_op_r4k_line(tmpva,
481 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
482 va += 32;
483 }
484 }
485
486 void
487 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
488 {
489 vm_offset_t eva;
490
491 eva = round_line16(va + size);
492 va = trunc_line16(va);
493
494 while ((eva - va) >= (32 * 16)) {
495 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
496 va += (32 * 16);
497 }
498
499 while (va < eva) {
500 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
501 va += 16;
502 }
503
504 SYNC;
505 }
506
507 void
508 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
509 {
510 vm_offset_t eva;
511
512 eva = round_line32(va + size);
513 va = trunc_line32(va);
514
515 while ((eva - va) >= (32 * 32)) {
516 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
517 va += (32 * 32);
518 }
519
520 while (va < eva) {
521 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
522 va += 32;
523 }
524
525 SYNC;
526 }
527
528 void
529 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
530 {
531 vm_offset_t eva;
532
533 eva = round_line16(va + size);
534 va = trunc_line16(va);
535
536 while ((eva - va) >= (32 * 16)) {
537 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
538 va += (32 * 16);
539 }
540
541 while (va < eva) {
542 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
543 va += 16;
544 }
545
546 SYNC;
547 }
548
549 void
550 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
551 {
552 vm_offset_t eva;
553
554 eva = round_line32(va + size);
555 va = trunc_line32(va);
556
557 while ((eva - va) >= (32 * 32)) {
558 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
559 va += (32 * 32);
560 }
561
562 while (va < eva) {
563 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
564 va += 32;
565 }
566
567 SYNC;
568 }
569
570
571 #ifdef CPU_CNMIPS
572
573 void
574 mipsNN_icache_sync_all_128(void)
575 {
576 SYNCI
577 }
578
579 void
580 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
581 {
582 SYNC;
583 }
584
585 void
586 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
587 {
588 }
589
590
591 void
592 mipsNN_pdcache_wbinv_all_128(void)
593 {
594 }
595
596
597 void
598 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
599 {
600 SYNC;
601 }
602
603 void
604 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
605 {
606 }
607
608 void
609 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
610 {
611 }
612
613 void
614 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
615 {
616 SYNC;
617 }
618
619 #endif
Cache object: 9f8c7a5f997682bcffa2586dc0c7f1be
|