1 /*
2 * include/asm-alpha/xor.h
3 *
4 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16 extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
17 extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
18 unsigned long *);
19 extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
20 unsigned long *, unsigned long *);
21 extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
22 unsigned long *, unsigned long *, unsigned long *);
23
24 extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
25 unsigned long *);
26 extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
27 unsigned long *, unsigned long *);
28 extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
29 unsigned long *, unsigned long *,
30 unsigned long *);
31 extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
32 unsigned long *, unsigned long *,
33 unsigned long *, unsigned long *);
34
35 asm("
36 .text
37 .align 3
38 .ent xor_alpha_2
39 xor_alpha_2:
40 .prologue 0
41 srl $16, 6, $16
42 .align 4
43 2:
44 ldq $0,0($17)
45 ldq $1,0($18)
46 ldq $2,8($17)
47 ldq $3,8($18)
48
49 ldq $4,16($17)
50 ldq $5,16($18)
51 ldq $6,24($17)
52 ldq $7,24($18)
53
54 ldq $19,32($17)
55 ldq $20,32($18)
56 ldq $21,40($17)
57 ldq $22,40($18)
58
59 ldq $23,48($17)
60 ldq $24,48($18)
61 ldq $25,56($17)
62 xor $0,$1,$0 # 7 cycles from $1 load
63
64 ldq $27,56($18)
65 xor $2,$3,$2
66 stq $0,0($17)
67 xor $4,$5,$4
68
69 stq $2,8($17)
70 xor $6,$7,$6
71 stq $4,16($17)
72 xor $19,$20,$19
73
74 stq $6,24($17)
75 xor $21,$22,$21
76 stq $19,32($17)
77 xor $23,$24,$23
78
79 stq $21,40($17)
80 xor $25,$27,$25
81 stq $23,48($17)
82 subq $16,1,$16
83
84 stq $25,56($17)
85 addq $17,64,$17
86 addq $18,64,$18
87 bgt $16,2b
88
89 ret
90 .end xor_alpha_2
91
92 .align 3
93 .ent xor_alpha_3
94 xor_alpha_3:
95 .prologue 0
96 srl $16, 6, $16
97 .align 4
98 3:
99 ldq $0,0($17)
100 ldq $1,0($18)
101 ldq $2,0($19)
102 ldq $3,8($17)
103
104 ldq $4,8($18)
105 ldq $6,16($17)
106 ldq $7,16($18)
107 ldq $21,24($17)
108
109 ldq $22,24($18)
110 ldq $24,32($17)
111 ldq $25,32($18)
112 ldq $5,8($19)
113
114 ldq $20,16($19)
115 ldq $23,24($19)
116 ldq $27,32($19)
117 nop
118
119 xor $0,$1,$1 # 8 cycles from $0 load
120 xor $3,$4,$4 # 6 cycles from $4 load
121 xor $6,$7,$7 # 6 cycles from $7 load
122 xor $21,$22,$22 # 5 cycles from $22 load
123
124 xor $1,$2,$2 # 9 cycles from $2 load
125 xor $24,$25,$25 # 5 cycles from $25 load
126 stq $2,0($17)
127 xor $4,$5,$5 # 6 cycles from $5 load
128
129 stq $5,8($17)
130 xor $7,$20,$20 # 7 cycles from $20 load
131 stq $20,16($17)
132 xor $22,$23,$23 # 7 cycles from $23 load
133
134 stq $23,24($17)
135 xor $25,$27,$27 # 7 cycles from $27 load
136 stq $27,32($17)
137 nop
138
139 ldq $0,40($17)
140 ldq $1,40($18)
141 ldq $3,48($17)
142 ldq $4,48($18)
143
144 ldq $6,56($17)
145 ldq $7,56($18)
146 ldq $2,40($19)
147 ldq $5,48($19)
148
149 ldq $20,56($19)
150 xor $0,$1,$1 # 4 cycles from $1 load
151 xor $3,$4,$4 # 5 cycles from $4 load
152 xor $6,$7,$7 # 5 cycles from $7 load
153
154 xor $1,$2,$2 # 4 cycles from $2 load
155 xor $4,$5,$5 # 5 cycles from $5 load
156 stq $2,40($17)
157 xor $7,$20,$20 # 4 cycles from $20 load
158
159 stq $5,48($17)
160 subq $16,1,$16
161 stq $20,56($17)
162 addq $19,64,$19
163
164 addq $18,64,$18
165 addq $17,64,$17
166 bgt $16,3b
167 ret
168 .end xor_alpha_3
169
170 .align 3
171 .ent xor_alpha_4
172 xor_alpha_4:
173 .prologue 0
174 srl $16, 6, $16
175 .align 4
176 4:
177 ldq $0,0($17)
178 ldq $1,0($18)
179 ldq $2,0($19)
180 ldq $3,0($20)
181
182 ldq $4,8($17)
183 ldq $5,8($18)
184 ldq $6,8($19)
185 ldq $7,8($20)
186
187 ldq $21,16($17)
188 ldq $22,16($18)
189 ldq $23,16($19)
190 ldq $24,16($20)
191
192 ldq $25,24($17)
193 xor $0,$1,$1 # 6 cycles from $1 load
194 ldq $27,24($18)
195 xor $2,$3,$3 # 6 cycles from $3 load
196
197 ldq $0,24($19)
198 xor $1,$3,$3
199 ldq $1,24($20)
200 xor $4,$5,$5 # 7 cycles from $5 load
201
202 stq $3,0($17)
203 xor $6,$7,$7
204 xor $21,$22,$22 # 7 cycles from $22 load
205 xor $5,$7,$7
206
207 stq $7,8($17)
208 xor $23,$24,$24 # 7 cycles from $24 load
209 ldq $2,32($17)
210 xor $22,$24,$24
211
212 ldq $3,32($18)
213 ldq $4,32($19)
214 ldq $5,32($20)
215 xor $25,$27,$27 # 8 cycles from $27 load
216
217 ldq $6,40($17)
218 ldq $7,40($18)
219 ldq $21,40($19)
220 ldq $22,40($20)
221
222 stq $24,16($17)
223 xor $0,$1,$1 # 9 cycles from $1 load
224 xor $2,$3,$3 # 5 cycles from $3 load
225 xor $27,$1,$1
226
227 stq $1,24($17)
228 xor $4,$5,$5 # 5 cycles from $5 load
229 ldq $23,48($17)
230 ldq $24,48($18)
231
232 ldq $25,48($19)
233 xor $3,$5,$5
234 ldq $27,48($20)
235 ldq $0,56($17)
236
237 ldq $1,56($18)
238 ldq $2,56($19)
239 xor $6,$7,$7 # 8 cycles from $6 load
240 ldq $3,56($20)
241
242 stq $5,32($17)
243 xor $21,$22,$22 # 8 cycles from $22 load
244 xor $7,$22,$22
245 xor $23,$24,$24 # 5 cycles from $24 load
246
247 stq $22,40($17)
248 xor $25,$27,$27 # 5 cycles from $27 load
249 xor $24,$27,$27
250 xor $0,$1,$1 # 5 cycles from $1 load
251
252 stq $27,48($17)
253 xor $2,$3,$3 # 4 cycles from $3 load
254 xor $1,$3,$3
255 subq $16,1,$16
256
257 stq $3,56($17)
258 addq $20,64,$20
259 addq $19,64,$19
260 addq $18,64,$18
261
262 addq $17,64,$17
263 bgt $16,4b
264 ret
265 .end xor_alpha_4
266
267 .align 3
268 .ent xor_alpha_5
269 xor_alpha_5:
270 .prologue 0
271 srl $16, 6, $16
272 .align 4
273 5:
274 ldq $0,0($17)
275 ldq $1,0($18)
276 ldq $2,0($19)
277 ldq $3,0($20)
278
279 ldq $4,0($21)
280 ldq $5,8($17)
281 ldq $6,8($18)
282 ldq $7,8($19)
283
284 ldq $22,8($20)
285 ldq $23,8($21)
286 ldq $24,16($17)
287 ldq $25,16($18)
288
289 ldq $27,16($19)
290 xor $0,$1,$1 # 6 cycles from $1 load
291 ldq $28,16($20)
292 xor $2,$3,$3 # 6 cycles from $3 load
293
294 ldq $0,16($21)
295 xor $1,$3,$3
296 ldq $1,24($17)
297 xor $3,$4,$4 # 7 cycles from $4 load
298
299 stq $4,0($17)
300 xor $5,$6,$6 # 7 cycles from $6 load
301 xor $7,$22,$22 # 7 cycles from $22 load
302 xor $6,$23,$23 # 7 cycles from $23 load
303
304 ldq $2,24($18)
305 xor $22,$23,$23
306 ldq $3,24($19)
307 xor $24,$25,$25 # 8 cycles from $25 load
308
309 stq $23,8($17)
310 xor $25,$27,$27 # 8 cycles from $27 load
311 ldq $4,24($20)
312 xor $28,$0,$0 # 7 cycles from $0 load
313
314 ldq $5,24($21)
315 xor $27,$0,$0
316 ldq $6,32($17)
317 ldq $7,32($18)
318
319 stq $0,16($17)
320 xor $1,$2,$2 # 6 cycles from $2 load
321 ldq $22,32($19)
322 xor $3,$4,$4 # 4 cycles from $4 load
323
324 ldq $23,32($20)
325 xor $2,$4,$4
326 ldq $24,32($21)
327 ldq $25,40($17)
328
329 ldq $27,40($18)
330 ldq $28,40($19)
331 ldq $0,40($20)
332 xor $4,$5,$5 # 7 cycles from $5 load
333
334 stq $5,24($17)
335 xor $6,$7,$7 # 7 cycles from $7 load
336 ldq $1,40($21)
337 ldq $2,48($17)
338
339 ldq $3,48($18)
340 xor $7,$22,$22 # 7 cycles from $22 load
341 ldq $4,48($19)
342 xor $23,$24,$24 # 6 cycles from $24 load
343
344 ldq $5,48($20)
345 xor $22,$24,$24
346 ldq $6,48($21)
347 xor $25,$27,$27 # 7 cycles from $27 load
348
349 stq $24,32($17)
350 xor $27,$28,$28 # 8 cycles from $28 load
351 ldq $7,56($17)
352 xor $0,$1,$1 # 6 cycles from $1 load
353
354 ldq $22,56($18)
355 ldq $23,56($19)
356 ldq $24,56($20)
357 ldq $25,56($21)
358
359 xor $28,$1,$1
360 xor $2,$3,$3 # 9 cycles from $3 load
361 xor $3,$4,$4 # 9 cycles from $4 load
362 xor $5,$6,$6 # 8 cycles from $6 load
363
364 stq $1,40($17)
365 xor $4,$6,$6
366 xor $7,$22,$22 # 7 cycles from $22 load
367 xor $23,$24,$24 # 6 cycles from $24 load
368
369 stq $6,48($17)
370 xor $22,$24,$24
371 subq $16,1,$16
372 xor $24,$25,$25 # 8 cycles from $25 load
373
374 stq $25,56($17)
375 addq $21,64,$21
376 addq $20,64,$20
377 addq $19,64,$19
378
379 addq $18,64,$18
380 addq $17,64,$17
381 bgt $16,5b
382 ret
383 .end xor_alpha_5
384
385 .align 3
386 .ent xor_alpha_prefetch_2
387 xor_alpha_prefetch_2:
388 .prologue 0
389 srl $16, 6, $16
390
391 ldq $31, 0($17)
392 ldq $31, 0($18)
393
394 ldq $31, 64($17)
395 ldq $31, 64($18)
396
397 ldq $31, 128($17)
398 ldq $31, 128($18)
399
400 ldq $31, 192($17)
401 ldq $31, 192($18)
402 .align 4
403 2:
404 ldq $0,0($17)
405 ldq $1,0($18)
406 ldq $2,8($17)
407 ldq $3,8($18)
408
409 ldq $4,16($17)
410 ldq $5,16($18)
411 ldq $6,24($17)
412 ldq $7,24($18)
413
414 ldq $19,32($17)
415 ldq $20,32($18)
416 ldq $21,40($17)
417 ldq $22,40($18)
418
419 ldq $23,48($17)
420 ldq $24,48($18)
421 ldq $25,56($17)
422 ldq $27,56($18)
423
424 ldq $31,256($17)
425 xor $0,$1,$0 # 8 cycles from $1 load
426 ldq $31,256($18)
427 xor $2,$3,$2
428
429 stq $0,0($17)
430 xor $4,$5,$4
431 stq $2,8($17)
432 xor $6,$7,$6
433
434 stq $4,16($17)
435 xor $19,$20,$19
436 stq $6,24($17)
437 xor $21,$22,$21
438
439 stq $19,32($17)
440 xor $23,$24,$23
441 stq $21,40($17)
442 xor $25,$27,$25
443
444 stq $23,48($17)
445 subq $16,1,$16
446 stq $25,56($17)
447 addq $17,64,$17
448
449 addq $18,64,$18
450 bgt $16,2b
451 ret
452 .end xor_alpha_prefetch_2
453
454 .align 3
455 .ent xor_alpha_prefetch_3
456 xor_alpha_prefetch_3:
457 .prologue 0
458 srl $16, 6, $16
459
460 ldq $31, 0($17)
461 ldq $31, 0($18)
462 ldq $31, 0($19)
463
464 ldq $31, 64($17)
465 ldq $31, 64($18)
466 ldq $31, 64($19)
467
468 ldq $31, 128($17)
469 ldq $31, 128($18)
470 ldq $31, 128($19)
471
472 ldq $31, 192($17)
473 ldq $31, 192($18)
474 ldq $31, 192($19)
475 .align 4
476 3:
477 ldq $0,0($17)
478 ldq $1,0($18)
479 ldq $2,0($19)
480 ldq $3,8($17)
481
482 ldq $4,8($18)
483 ldq $6,16($17)
484 ldq $7,16($18)
485 ldq $21,24($17)
486
487 ldq $22,24($18)
488 ldq $24,32($17)
489 ldq $25,32($18)
490 ldq $5,8($19)
491
492 ldq $20,16($19)
493 ldq $23,24($19)
494 ldq $27,32($19)
495 nop
496
497 xor $0,$1,$1 # 8 cycles from $0 load
498 xor $3,$4,$4 # 7 cycles from $4 load
499 xor $6,$7,$7 # 6 cycles from $7 load
500 xor $21,$22,$22 # 5 cycles from $22 load
501
502 xor $1,$2,$2 # 9 cycles from $2 load
503 xor $24,$25,$25 # 5 cycles from $25 load
504 stq $2,0($17)
505 xor $4,$5,$5 # 6 cycles from $5 load
506
507 stq $5,8($17)
508 xor $7,$20,$20 # 7 cycles from $20 load
509 stq $20,16($17)
510 xor $22,$23,$23 # 7 cycles from $23 load
511
512 stq $23,24($17)
513 xor $25,$27,$27 # 7 cycles from $27 load
514 stq $27,32($17)
515 nop
516
517 ldq $0,40($17)
518 ldq $1,40($18)
519 ldq $3,48($17)
520 ldq $4,48($18)
521
522 ldq $6,56($17)
523 ldq $7,56($18)
524 ldq $2,40($19)
525 ldq $5,48($19)
526
527 ldq $20,56($19)
528 ldq $31,256($17)
529 ldq $31,256($18)
530 ldq $31,256($19)
531
532 xor $0,$1,$1 # 6 cycles from $1 load
533 xor $3,$4,$4 # 5 cycles from $4 load
534 xor $6,$7,$7 # 5 cycles from $7 load
535 xor $1,$2,$2 # 4 cycles from $2 load
536
537 xor $4,$5,$5 # 5 cycles from $5 load
538 xor $7,$20,$20 # 4 cycles from $20 load
539 stq $2,40($17)
540 subq $16,1,$16
541
542 stq $5,48($17)
543 addq $19,64,$19
544 stq $20,56($17)
545 addq $18,64,$18
546
547 addq $17,64,$17
548 bgt $16,3b
549 ret
550 .end xor_alpha_prefetch_3
551
552 .align 3
553 .ent xor_alpha_prefetch_4
554 xor_alpha_prefetch_4:
555 .prologue 0
556 srl $16, 6, $16
557
558 ldq $31, 0($17)
559 ldq $31, 0($18)
560 ldq $31, 0($19)
561 ldq $31, 0($20)
562
563 ldq $31, 64($17)
564 ldq $31, 64($18)
565 ldq $31, 64($19)
566 ldq $31, 64($20)
567
568 ldq $31, 128($17)
569 ldq $31, 128($18)
570 ldq $31, 128($19)
571 ldq $31, 128($20)
572
573 ldq $31, 192($17)
574 ldq $31, 192($18)
575 ldq $31, 192($19)
576 ldq $31, 192($20)
577 .align 4
578 4:
579 ldq $0,0($17)
580 ldq $1,0($18)
581 ldq $2,0($19)
582 ldq $3,0($20)
583
584 ldq $4,8($17)
585 ldq $5,8($18)
586 ldq $6,8($19)
587 ldq $7,8($20)
588
589 ldq $21,16($17)
590 ldq $22,16($18)
591 ldq $23,16($19)
592 ldq $24,16($20)
593
594 ldq $25,24($17)
595 xor $0,$1,$1 # 6 cycles from $1 load
596 ldq $27,24($18)
597 xor $2,$3,$3 # 6 cycles from $3 load
598
599 ldq $0,24($19)
600 xor $1,$3,$3
601 ldq $1,24($20)
602 xor $4,$5,$5 # 7 cycles from $5 load
603
604 stq $3,0($17)
605 xor $6,$7,$7
606 xor $21,$22,$22 # 7 cycles from $22 load
607 xor $5,$7,$7
608
609 stq $7,8($17)
610 xor $23,$24,$24 # 7 cycles from $24 load
611 ldq $2,32($17)
612 xor $22,$24,$24
613
614 ldq $3,32($18)
615 ldq $4,32($19)
616 ldq $5,32($20)
617 xor $25,$27,$27 # 8 cycles from $27 load
618
619 ldq $6,40($17)
620 ldq $7,40($18)
621 ldq $21,40($19)
622 ldq $22,40($20)
623
624 stq $24,16($17)
625 xor $0,$1,$1 # 9 cycles from $1 load
626 xor $2,$3,$3 # 5 cycles from $3 load
627 xor $27,$1,$1
628
629 stq $1,24($17)
630 xor $4,$5,$5 # 5 cycles from $5 load
631 ldq $23,48($17)
632 xor $3,$5,$5
633
634 ldq $24,48($18)
635 ldq $25,48($19)
636 ldq $27,48($20)
637 ldq $0,56($17)
638
639 ldq $1,56($18)
640 ldq $2,56($19)
641 ldq $3,56($20)
642 xor $6,$7,$7 # 8 cycles from $6 load
643
644 ldq $31,256($17)
645 xor $21,$22,$22 # 8 cycles from $22 load
646 ldq $31,256($18)
647 xor $7,$22,$22
648
649 ldq $31,256($19)
650 xor $23,$24,$24 # 6 cycles from $24 load
651 ldq $31,256($20)
652 xor $25,$27,$27 # 6 cycles from $27 load
653
654 stq $5,32($17)
655 xor $24,$27,$27
656 xor $0,$1,$1 # 7 cycles from $1 load
657 xor $2,$3,$3 # 6 cycles from $3 load
658
659 stq $22,40($17)
660 xor $1,$3,$3
661 stq $27,48($17)
662 subq $16,1,$16
663
664 stq $3,56($17)
665 addq $20,64,$20
666 addq $19,64,$19
667 addq $18,64,$18
668
669 addq $17,64,$17
670 bgt $16,4b
671 ret
672 .end xor_alpha_prefetch_4
673
674 .align 3
675 .ent xor_alpha_prefetch_5
676 xor_alpha_prefetch_5:
677 .prologue 0
678 srl $16, 6, $16
679
680 ldq $31, 0($17)
681 ldq $31, 0($18)
682 ldq $31, 0($19)
683 ldq $31, 0($20)
684 ldq $31, 0($21)
685
686 ldq $31, 64($17)
687 ldq $31, 64($18)
688 ldq $31, 64($19)
689 ldq $31, 64($20)
690 ldq $31, 64($21)
691
692 ldq $31, 128($17)
693 ldq $31, 128($18)
694 ldq $31, 128($19)
695 ldq $31, 128($20)
696 ldq $31, 128($21)
697
698 ldq $31, 192($17)
699 ldq $31, 192($18)
700 ldq $31, 192($19)
701 ldq $31, 192($20)
702 ldq $31, 192($21)
703 .align 4
704 5:
705 ldq $0,0($17)
706 ldq $1,0($18)
707 ldq $2,0($19)
708 ldq $3,0($20)
709
710 ldq $4,0($21)
711 ldq $5,8($17)
712 ldq $6,8($18)
713 ldq $7,8($19)
714
715 ldq $22,8($20)
716 ldq $23,8($21)
717 ldq $24,16($17)
718 ldq $25,16($18)
719
720 ldq $27,16($19)
721 xor $0,$1,$1 # 6 cycles from $1 load
722 ldq $28,16($20)
723 xor $2,$3,$3 # 6 cycles from $3 load
724
725 ldq $0,16($21)
726 xor $1,$3,$3
727 ldq $1,24($17)
728 xor $3,$4,$4 # 7 cycles from $4 load
729
730 stq $4,0($17)
731 xor $5,$6,$6 # 7 cycles from $6 load
732 xor $7,$22,$22 # 7 cycles from $22 load
733 xor $6,$23,$23 # 7 cycles from $23 load
734
735 ldq $2,24($18)
736 xor $22,$23,$23
737 ldq $3,24($19)
738 xor $24,$25,$25 # 8 cycles from $25 load
739
740 stq $23,8($17)
741 xor $25,$27,$27 # 8 cycles from $27 load
742 ldq $4,24($20)
743 xor $28,$0,$0 # 7 cycles from $0 load
744
745 ldq $5,24($21)
746 xor $27,$0,$0
747 ldq $6,32($17)
748 ldq $7,32($18)
749
750 stq $0,16($17)
751 xor $1,$2,$2 # 6 cycles from $2 load
752 ldq $22,32($19)
753 xor $3,$4,$4 # 4 cycles from $4 load
754
755 ldq $23,32($20)
756 xor $2,$4,$4
757 ldq $24,32($21)
758 ldq $25,40($17)
759
760 ldq $27,40($18)
761 ldq $28,40($19)
762 ldq $0,40($20)
763 xor $4,$5,$5 # 7 cycles from $5 load
764
765 stq $5,24($17)
766 xor $6,$7,$7 # 7 cycles from $7 load
767 ldq $1,40($21)
768 ldq $2,48($17)
769
770 ldq $3,48($18)
771 xor $7,$22,$22 # 7 cycles from $22 load
772 ldq $4,48($19)
773 xor $23,$24,$24 # 6 cycles from $24 load
774
775 ldq $5,48($20)
776 xor $22,$24,$24
777 ldq $6,48($21)
778 xor $25,$27,$27 # 7 cycles from $27 load
779
780 stq $24,32($17)
781 xor $27,$28,$28 # 8 cycles from $28 load
782 ldq $7,56($17)
783 xor $0,$1,$1 # 6 cycles from $1 load
784
785 ldq $22,56($18)
786 ldq $23,56($19)
787 ldq $24,56($20)
788 ldq $25,56($21)
789
790 ldq $31,256($17)
791 xor $28,$1,$1
792 ldq $31,256($18)
793 xor $2,$3,$3 # 9 cycles from $3 load
794
795 ldq $31,256($19)
796 xor $3,$4,$4 # 9 cycles from $4 load
797 ldq $31,256($20)
798 xor $5,$6,$6 # 8 cycles from $6 load
799
800 stq $1,40($17)
801 xor $4,$6,$6
802 xor $7,$22,$22 # 7 cycles from $22 load
803 xor $23,$24,$24 # 6 cycles from $24 load
804
805 stq $6,48($17)
806 xor $22,$24,$24
807 ldq $31,256($21)
808 xor $24,$25,$25 # 8 cycles from $25 load
809
810 stq $25,56($17)
811 subq $16,1,$16
812 addq $21,64,$21
813 addq $20,64,$20
814
815 addq $19,64,$19
816 addq $18,64,$18
817 addq $17,64,$17
818 bgt $16,5b
819
820 ret
821 .end xor_alpha_prefetch_5
822 ");
823
824 static struct xor_block_template xor_block_alpha = {
825 name: "alpha",
826 do_2: xor_alpha_2,
827 do_3: xor_alpha_3,
828 do_4: xor_alpha_4,
829 do_5: xor_alpha_5,
830 };
831
832 static struct xor_block_template xor_block_alpha_prefetch = {
833 name: "alpha prefetch",
834 do_2: xor_alpha_prefetch_2,
835 do_3: xor_alpha_prefetch_3,
836 do_4: xor_alpha_prefetch_4,
837 do_5: xor_alpha_prefetch_5,
838 };
839
840 /* For grins, also test the generic routines. */
841 #include <asm-generic/xor.h>
842
843 #undef XOR_TRY_TEMPLATES
844 #define XOR_TRY_TEMPLATES \
845 do { \
846 xor_speed(&xor_block_8regs); \
847 xor_speed(&xor_block_32regs); \
848 xor_speed(&xor_block_alpha); \
849 xor_speed(&xor_block_alpha_prefetch); \
850 } while (0)
851
852 /* Force the use of alpha_prefetch if EV6, as it is significantly
853 faster in the cold cache case. */
854 #define XOR_SELECT_TEMPLATE(FASTEST) \
855 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)
Cache object: 53dac27b5dd744f43c9f566ee5bcc15c
|