1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from aesv8-armx.pl. */
3 #include "arm_arch.h"
4
5 #if __ARM_MAX_ARCH__>=7
6 .text
7 .align 5
8 .Lrcon:
9 .long 0x01,0x01,0x01,0x01
10 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
11 .long 0x1b,0x1b,0x1b,0x1b
12
13 .globl aes_v8_set_encrypt_key
14 .type aes_v8_set_encrypt_key,%function
15 .align 5
16 aes_v8_set_encrypt_key:
17 .Lenc_key:
18 stp x29,x30,[sp,#-16]!
19 add x29,sp,#0
20 mov x3,#-1
21 cmp x0,#0
22 b.eq .Lenc_key_abort
23 cmp x2,#0
24 b.eq .Lenc_key_abort
25 mov x3,#-2
26 cmp w1,#128
27 b.lt .Lenc_key_abort
28 cmp w1,#256
29 b.gt .Lenc_key_abort
30 tst w1,#0x3f
31 b.ne .Lenc_key_abort
32
33 adr x3,.Lrcon
34 cmp w1,#192
35
36 eor v0.16b,v0.16b,v0.16b
37 ld1 {v3.16b},[x0],#16
38 mov w1,#8 // reuse w1
39 ld1 {v1.4s,v2.4s},[x3],#32
40
41 b.lt .Loop128
42 b.eq .L192
43 b .L256
44
45 .align 4
46 .Loop128:
47 tbl v6.16b,{v3.16b},v2.16b
48 ext v5.16b,v0.16b,v3.16b,#12
49 st1 {v3.4s},[x2],#16
50 aese v6.16b,v0.16b
51 subs w1,w1,#1
52
53 eor v3.16b,v3.16b,v5.16b
54 ext v5.16b,v0.16b,v5.16b,#12
55 eor v3.16b,v3.16b,v5.16b
56 ext v5.16b,v0.16b,v5.16b,#12
57 eor v6.16b,v6.16b,v1.16b
58 eor v3.16b,v3.16b,v5.16b
59 shl v1.16b,v1.16b,#1
60 eor v3.16b,v3.16b,v6.16b
61 b.ne .Loop128
62
63 ld1 {v1.4s},[x3]
64
65 tbl v6.16b,{v3.16b},v2.16b
66 ext v5.16b,v0.16b,v3.16b,#12
67 st1 {v3.4s},[x2],#16
68 aese v6.16b,v0.16b
69
70 eor v3.16b,v3.16b,v5.16b
71 ext v5.16b,v0.16b,v5.16b,#12
72 eor v3.16b,v3.16b,v5.16b
73 ext v5.16b,v0.16b,v5.16b,#12
74 eor v6.16b,v6.16b,v1.16b
75 eor v3.16b,v3.16b,v5.16b
76 shl v1.16b,v1.16b,#1
77 eor v3.16b,v3.16b,v6.16b
78
79 tbl v6.16b,{v3.16b},v2.16b
80 ext v5.16b,v0.16b,v3.16b,#12
81 st1 {v3.4s},[x2],#16
82 aese v6.16b,v0.16b
83
84 eor v3.16b,v3.16b,v5.16b
85 ext v5.16b,v0.16b,v5.16b,#12
86 eor v3.16b,v3.16b,v5.16b
87 ext v5.16b,v0.16b,v5.16b,#12
88 eor v6.16b,v6.16b,v1.16b
89 eor v3.16b,v3.16b,v5.16b
90 eor v3.16b,v3.16b,v6.16b
91 st1 {v3.4s},[x2]
92 add x2,x2,#0x50
93
94 mov w12,#10
95 b .Ldone
96
97 .align 4
98 .L192:
99 ld1 {v4.8b},[x0],#8
100 movi v6.16b,#8 // borrow v6.16b
101 st1 {v3.4s},[x2],#16
102 sub v2.16b,v2.16b,v6.16b // adjust the mask
103
104 .Loop192:
105 tbl v6.16b,{v4.16b},v2.16b
106 ext v5.16b,v0.16b,v3.16b,#12
107 #ifdef __ARMEB__
108 st1 {v4.4s},[x2],#16
109 sub x2,x2,#8
110 #else
111 st1 {v4.8b},[x2],#8
112 #endif
113 aese v6.16b,v0.16b
114 subs w1,w1,#1
115
116 eor v3.16b,v3.16b,v5.16b
117 ext v5.16b,v0.16b,v5.16b,#12
118 eor v3.16b,v3.16b,v5.16b
119 ext v5.16b,v0.16b,v5.16b,#12
120 eor v3.16b,v3.16b,v5.16b
121
122 dup v5.4s,v3.s[3]
123 eor v5.16b,v5.16b,v4.16b
124 eor v6.16b,v6.16b,v1.16b
125 ext v4.16b,v0.16b,v4.16b,#12
126 shl v1.16b,v1.16b,#1
127 eor v4.16b,v4.16b,v5.16b
128 eor v3.16b,v3.16b,v6.16b
129 eor v4.16b,v4.16b,v6.16b
130 st1 {v3.4s},[x2],#16
131 b.ne .Loop192
132
133 mov w12,#12
134 add x2,x2,#0x20
135 b .Ldone
136
137 .align 4
138 .L256:
139 ld1 {v4.16b},[x0]
140 mov w1,#7
141 mov w12,#14
142 st1 {v3.4s},[x2],#16
143
144 .Loop256:
145 tbl v6.16b,{v4.16b},v2.16b
146 ext v5.16b,v0.16b,v3.16b,#12
147 st1 {v4.4s},[x2],#16
148 aese v6.16b,v0.16b
149 subs w1,w1,#1
150
151 eor v3.16b,v3.16b,v5.16b
152 ext v5.16b,v0.16b,v5.16b,#12
153 eor v3.16b,v3.16b,v5.16b
154 ext v5.16b,v0.16b,v5.16b,#12
155 eor v6.16b,v6.16b,v1.16b
156 eor v3.16b,v3.16b,v5.16b
157 shl v1.16b,v1.16b,#1
158 eor v3.16b,v3.16b,v6.16b
159 st1 {v3.4s},[x2],#16
160 b.eq .Ldone
161
162 dup v6.4s,v3.s[3] // just splat
163 ext v5.16b,v0.16b,v4.16b,#12
164 aese v6.16b,v0.16b
165
166 eor v4.16b,v4.16b,v5.16b
167 ext v5.16b,v0.16b,v5.16b,#12
168 eor v4.16b,v4.16b,v5.16b
169 ext v5.16b,v0.16b,v5.16b,#12
170 eor v4.16b,v4.16b,v5.16b
171
172 eor v4.16b,v4.16b,v6.16b
173 b .Loop256
174
175 .Ldone:
176 str w12,[x2]
177 mov x3,#0
178
179 .Lenc_key_abort:
180 mov x0,x3 // return value
181 ldr x29,[sp],#16
182 ret
183 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
184
185 .globl aes_v8_set_decrypt_key
186 .type aes_v8_set_decrypt_key,%function
187 .align 5
188 aes_v8_set_decrypt_key:
189 .inst 0xd503233f // paciasp
190 stp x29,x30,[sp,#-16]!
191 add x29,sp,#0
192 bl .Lenc_key
193
194 cmp x0,#0
195 b.ne .Ldec_key_abort
196
197 sub x2,x2,#240 // restore original x2
198 mov x4,#-16
199 add x0,x2,x12,lsl#4 // end of key schedule
200
201 ld1 {v0.4s},[x2]
202 ld1 {v1.4s},[x0]
203 st1 {v0.4s},[x0],x4
204 st1 {v1.4s},[x2],#16
205
206 .Loop_imc:
207 ld1 {v0.4s},[x2]
208 ld1 {v1.4s},[x0]
209 aesimc v0.16b,v0.16b
210 aesimc v1.16b,v1.16b
211 st1 {v0.4s},[x0],x4
212 st1 {v1.4s},[x2],#16
213 cmp x0,x2
214 b.hi .Loop_imc
215
216 ld1 {v0.4s},[x2]
217 aesimc v0.16b,v0.16b
218 st1 {v0.4s},[x0]
219
220 eor x0,x0,x0 // return value
221 .Ldec_key_abort:
222 ldp x29,x30,[sp],#16
223 .inst 0xd50323bf // autiasp
224 ret
225 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
226 .globl aes_v8_encrypt
227 .type aes_v8_encrypt,%function
228 .align 5
229 aes_v8_encrypt:
230 ldr w3,[x2,#240]
231 ld1 {v0.4s},[x2],#16
232 ld1 {v2.16b},[x0]
233 sub w3,w3,#2
234 ld1 {v1.4s},[x2],#16
235
236 .Loop_enc:
237 aese v2.16b,v0.16b
238 aesmc v2.16b,v2.16b
239 ld1 {v0.4s},[x2],#16
240 subs w3,w3,#2
241 aese v2.16b,v1.16b
242 aesmc v2.16b,v2.16b
243 ld1 {v1.4s},[x2],#16
244 b.gt .Loop_enc
245
246 aese v2.16b,v0.16b
247 aesmc v2.16b,v2.16b
248 ld1 {v0.4s},[x2]
249 aese v2.16b,v1.16b
250 eor v2.16b,v2.16b,v0.16b
251
252 st1 {v2.16b},[x1]
253 ret
254 .size aes_v8_encrypt,.-aes_v8_encrypt
255 .globl aes_v8_decrypt
256 .type aes_v8_decrypt,%function
257 .align 5
258 aes_v8_decrypt:
259 ldr w3,[x2,#240]
260 ld1 {v0.4s},[x2],#16
261 ld1 {v2.16b},[x0]
262 sub w3,w3,#2
263 ld1 {v1.4s},[x2],#16
264
265 .Loop_dec:
266 aesd v2.16b,v0.16b
267 aesimc v2.16b,v2.16b
268 ld1 {v0.4s},[x2],#16
269 subs w3,w3,#2
270 aesd v2.16b,v1.16b
271 aesimc v2.16b,v2.16b
272 ld1 {v1.4s},[x2],#16
273 b.gt .Loop_dec
274
275 aesd v2.16b,v0.16b
276 aesimc v2.16b,v2.16b
277 ld1 {v0.4s},[x2]
278 aesd v2.16b,v1.16b
279 eor v2.16b,v2.16b,v0.16b
280
281 st1 {v2.16b},[x1]
282 ret
283 .size aes_v8_decrypt,.-aes_v8_decrypt
284 .globl aes_v8_cbc_encrypt
285 .type aes_v8_cbc_encrypt,%function
286 .align 5
287 aes_v8_cbc_encrypt:
288 stp x29,x30,[sp,#-16]!
289 add x29,sp,#0
290 subs x2,x2,#16
291 mov x8,#16
292 b.lo .Lcbc_abort
293 csel x8,xzr,x8,eq
294
295 cmp w5,#0 // en- or decrypting?
296 ldr w5,[x3,#240]
297 and x2,x2,#-16
298 ld1 {v6.16b},[x4]
299 ld1 {v0.16b},[x0],x8
300
301 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
302 sub w5,w5,#6
303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
304 sub w5,w5,#2
305 ld1 {v18.4s,v19.4s},[x7],#32
306 ld1 {v20.4s,v21.4s},[x7],#32
307 ld1 {v22.4s,v23.4s},[x7],#32
308 ld1 {v7.4s},[x7]
309
310 add x7,x3,#32
311 mov w6,w5
312 b.eq .Lcbc_dec
313
314 cmp w5,#2
315 eor v0.16b,v0.16b,v6.16b
316 eor v5.16b,v16.16b,v7.16b
317 b.eq .Lcbc_enc128
318
319 ld1 {v2.4s,v3.4s},[x7]
320 add x7,x3,#16
321 add x6,x3,#16*4
322 add x12,x3,#16*5
323 aese v0.16b,v16.16b
324 aesmc v0.16b,v0.16b
325 add x14,x3,#16*6
326 add x3,x3,#16*7
327 b .Lenter_cbc_enc
328
329 .align 4
330 .Loop_cbc_enc:
331 aese v0.16b,v16.16b
332 aesmc v0.16b,v0.16b
333 st1 {v6.16b},[x1],#16
334 .Lenter_cbc_enc:
335 aese v0.16b,v17.16b
336 aesmc v0.16b,v0.16b
337 aese v0.16b,v2.16b
338 aesmc v0.16b,v0.16b
339 ld1 {v16.4s},[x6]
340 cmp w5,#4
341 aese v0.16b,v3.16b
342 aesmc v0.16b,v0.16b
343 ld1 {v17.4s},[x12]
344 b.eq .Lcbc_enc192
345
346 aese v0.16b,v16.16b
347 aesmc v0.16b,v0.16b
348 ld1 {v16.4s},[x14]
349 aese v0.16b,v17.16b
350 aesmc v0.16b,v0.16b
351 ld1 {v17.4s},[x3]
352 nop
353
354 .Lcbc_enc192:
355 aese v0.16b,v16.16b
356 aesmc v0.16b,v0.16b
357 subs x2,x2,#16
358 aese v0.16b,v17.16b
359 aesmc v0.16b,v0.16b
360 csel x8,xzr,x8,eq
361 aese v0.16b,v18.16b
362 aesmc v0.16b,v0.16b
363 aese v0.16b,v19.16b
364 aesmc v0.16b,v0.16b
365 ld1 {v16.16b},[x0],x8
366 aese v0.16b,v20.16b
367 aesmc v0.16b,v0.16b
368 eor v16.16b,v16.16b,v5.16b
369 aese v0.16b,v21.16b
370 aesmc v0.16b,v0.16b
371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
372 aese v0.16b,v22.16b
373 aesmc v0.16b,v0.16b
374 aese v0.16b,v23.16b
375 eor v6.16b,v0.16b,v7.16b
376 b.hs .Loop_cbc_enc
377
378 st1 {v6.16b},[x1],#16
379 b .Lcbc_done
380
381 .align 5
382 .Lcbc_enc128:
383 ld1 {v2.4s,v3.4s},[x7]
384 aese v0.16b,v16.16b
385 aesmc v0.16b,v0.16b
386 b .Lenter_cbc_enc128
387 .Loop_cbc_enc128:
388 aese v0.16b,v16.16b
389 aesmc v0.16b,v0.16b
390 st1 {v6.16b},[x1],#16
391 .Lenter_cbc_enc128:
392 aese v0.16b,v17.16b
393 aesmc v0.16b,v0.16b
394 subs x2,x2,#16
395 aese v0.16b,v2.16b
396 aesmc v0.16b,v0.16b
397 csel x8,xzr,x8,eq
398 aese v0.16b,v3.16b
399 aesmc v0.16b,v0.16b
400 aese v0.16b,v18.16b
401 aesmc v0.16b,v0.16b
402 aese v0.16b,v19.16b
403 aesmc v0.16b,v0.16b
404 ld1 {v16.16b},[x0],x8
405 aese v0.16b,v20.16b
406 aesmc v0.16b,v0.16b
407 aese v0.16b,v21.16b
408 aesmc v0.16b,v0.16b
409 aese v0.16b,v22.16b
410 aesmc v0.16b,v0.16b
411 eor v16.16b,v16.16b,v5.16b
412 aese v0.16b,v23.16b
413 eor v6.16b,v0.16b,v7.16b
414 b.hs .Loop_cbc_enc128
415
416 st1 {v6.16b},[x1],#16
417 b .Lcbc_done
418 .align 5
419 .Lcbc_dec:
420 ld1 {v18.16b},[x0],#16
421 subs x2,x2,#32 // bias
422 add w6,w5,#2
423 orr v3.16b,v0.16b,v0.16b
424 orr v1.16b,v0.16b,v0.16b
425 orr v19.16b,v18.16b,v18.16b
426 b.lo .Lcbc_dec_tail
427
428 orr v1.16b,v18.16b,v18.16b
429 ld1 {v18.16b},[x0],#16
430 orr v2.16b,v0.16b,v0.16b
431 orr v3.16b,v1.16b,v1.16b
432 orr v19.16b,v18.16b,v18.16b
433
434 .Loop3x_cbc_dec:
435 aesd v0.16b,v16.16b
436 aesimc v0.16b,v0.16b
437 aesd v1.16b,v16.16b
438 aesimc v1.16b,v1.16b
439 aesd v18.16b,v16.16b
440 aesimc v18.16b,v18.16b
441 ld1 {v16.4s},[x7],#16
442 subs w6,w6,#2
443 aesd v0.16b,v17.16b
444 aesimc v0.16b,v0.16b
445 aesd v1.16b,v17.16b
446 aesimc v1.16b,v1.16b
447 aesd v18.16b,v17.16b
448 aesimc v18.16b,v18.16b
449 ld1 {v17.4s},[x7],#16
450 b.gt .Loop3x_cbc_dec
451
452 aesd v0.16b,v16.16b
453 aesimc v0.16b,v0.16b
454 aesd v1.16b,v16.16b
455 aesimc v1.16b,v1.16b
456 aesd v18.16b,v16.16b
457 aesimc v18.16b,v18.16b
458 eor v4.16b,v6.16b,v7.16b
459 subs x2,x2,#0x30
460 eor v5.16b,v2.16b,v7.16b
461 csel x6,x2,x6,lo // x6, w6, is zero at this point
462 aesd v0.16b,v17.16b
463 aesimc v0.16b,v0.16b
464 aesd v1.16b,v17.16b
465 aesimc v1.16b,v1.16b
466 aesd v18.16b,v17.16b
467 aesimc v18.16b,v18.16b
468 eor v17.16b,v3.16b,v7.16b
469 add x0,x0,x6 // x0 is adjusted in such way that
470 // at exit from the loop v1.16b-v18.16b
471 // are loaded with last "words"
472 orr v6.16b,v19.16b,v19.16b
473 mov x7,x3
474 aesd v0.16b,v20.16b
475 aesimc v0.16b,v0.16b
476 aesd v1.16b,v20.16b
477 aesimc v1.16b,v1.16b
478 aesd v18.16b,v20.16b
479 aesimc v18.16b,v18.16b
480 ld1 {v2.16b},[x0],#16
481 aesd v0.16b,v21.16b
482 aesimc v0.16b,v0.16b
483 aesd v1.16b,v21.16b
484 aesimc v1.16b,v1.16b
485 aesd v18.16b,v21.16b
486 aesimc v18.16b,v18.16b
487 ld1 {v3.16b},[x0],#16
488 aesd v0.16b,v22.16b
489 aesimc v0.16b,v0.16b
490 aesd v1.16b,v22.16b
491 aesimc v1.16b,v1.16b
492 aesd v18.16b,v22.16b
493 aesimc v18.16b,v18.16b
494 ld1 {v19.16b},[x0],#16
495 aesd v0.16b,v23.16b
496 aesd v1.16b,v23.16b
497 aesd v18.16b,v23.16b
498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
499 add w6,w5,#2
500 eor v4.16b,v4.16b,v0.16b
501 eor v5.16b,v5.16b,v1.16b
502 eor v18.16b,v18.16b,v17.16b
503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
504 st1 {v4.16b},[x1],#16
505 orr v0.16b,v2.16b,v2.16b
506 st1 {v5.16b},[x1],#16
507 orr v1.16b,v3.16b,v3.16b
508 st1 {v18.16b},[x1],#16
509 orr v18.16b,v19.16b,v19.16b
510 b.hs .Loop3x_cbc_dec
511
512 cmn x2,#0x30
513 b.eq .Lcbc_done
514 nop
515
516 .Lcbc_dec_tail:
517 aesd v1.16b,v16.16b
518 aesimc v1.16b,v1.16b
519 aesd v18.16b,v16.16b
520 aesimc v18.16b,v18.16b
521 ld1 {v16.4s},[x7],#16
522 subs w6,w6,#2
523 aesd v1.16b,v17.16b
524 aesimc v1.16b,v1.16b
525 aesd v18.16b,v17.16b
526 aesimc v18.16b,v18.16b
527 ld1 {v17.4s},[x7],#16
528 b.gt .Lcbc_dec_tail
529
530 aesd v1.16b,v16.16b
531 aesimc v1.16b,v1.16b
532 aesd v18.16b,v16.16b
533 aesimc v18.16b,v18.16b
534 aesd v1.16b,v17.16b
535 aesimc v1.16b,v1.16b
536 aesd v18.16b,v17.16b
537 aesimc v18.16b,v18.16b
538 aesd v1.16b,v20.16b
539 aesimc v1.16b,v1.16b
540 aesd v18.16b,v20.16b
541 aesimc v18.16b,v18.16b
542 cmn x2,#0x20
543 aesd v1.16b,v21.16b
544 aesimc v1.16b,v1.16b
545 aesd v18.16b,v21.16b
546 aesimc v18.16b,v18.16b
547 eor v5.16b,v6.16b,v7.16b
548 aesd v1.16b,v22.16b
549 aesimc v1.16b,v1.16b
550 aesd v18.16b,v22.16b
551 aesimc v18.16b,v18.16b
552 eor v17.16b,v3.16b,v7.16b
553 aesd v1.16b,v23.16b
554 aesd v18.16b,v23.16b
555 b.eq .Lcbc_dec_one
556 eor v5.16b,v5.16b,v1.16b
557 eor v17.16b,v17.16b,v18.16b
558 orr v6.16b,v19.16b,v19.16b
559 st1 {v5.16b},[x1],#16
560 st1 {v17.16b},[x1],#16
561 b .Lcbc_done
562
563 .Lcbc_dec_one:
564 eor v5.16b,v5.16b,v18.16b
565 orr v6.16b,v19.16b,v19.16b
566 st1 {v5.16b},[x1],#16
567
568 .Lcbc_done:
569 st1 {v6.16b},[x4]
570 .Lcbc_abort:
571 ldr x29,[sp],#16
572 ret
573 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
574 .globl aes_v8_ctr32_encrypt_blocks
575 .type aes_v8_ctr32_encrypt_blocks,%function
576 .align 5
577 aes_v8_ctr32_encrypt_blocks:
578 stp x29,x30,[sp,#-16]!
579 add x29,sp,#0
580 ldr w5,[x3,#240]
581
582 ldr w8, [x4, #12]
583 #ifdef __ARMEB__
584 ld1 {v0.16b},[x4]
585 #else
586 ld1 {v0.4s},[x4]
587 #endif
588 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
589 sub w5,w5,#4
590 mov x12,#16
591 cmp x2,#2
592 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
593 sub w5,w5,#2
594 ld1 {v20.4s,v21.4s},[x7],#32
595 ld1 {v22.4s,v23.4s},[x7],#32
596 ld1 {v7.4s},[x7]
597 add x7,x3,#32
598 mov w6,w5
599 csel x12,xzr,x12,lo
600 #ifndef __ARMEB__
601 rev w8, w8
602 #endif
603 orr v1.16b,v0.16b,v0.16b
604 add w10, w8, #1
605 orr v18.16b,v0.16b,v0.16b
606 add w8, w8, #2
607 orr v6.16b,v0.16b,v0.16b
608 rev w10, w10
609 mov v1.s[3],w10
610 b.ls .Lctr32_tail
611 rev w12, w8
612 sub x2,x2,#3 // bias
613 mov v18.s[3],w12
614 b .Loop3x_ctr32
615
616 .align 4
617 .Loop3x_ctr32:
618 aese v0.16b,v16.16b
619 aesmc v0.16b,v0.16b
620 aese v1.16b,v16.16b
621 aesmc v1.16b,v1.16b
622 aese v18.16b,v16.16b
623 aesmc v18.16b,v18.16b
624 ld1 {v16.4s},[x7],#16
625 subs w6,w6,#2
626 aese v0.16b,v17.16b
627 aesmc v0.16b,v0.16b
628 aese v1.16b,v17.16b
629 aesmc v1.16b,v1.16b
630 aese v18.16b,v17.16b
631 aesmc v18.16b,v18.16b
632 ld1 {v17.4s},[x7],#16
633 b.gt .Loop3x_ctr32
634
635 aese v0.16b,v16.16b
636 aesmc v4.16b,v0.16b
637 aese v1.16b,v16.16b
638 aesmc v5.16b,v1.16b
639 ld1 {v2.16b},[x0],#16
640 orr v0.16b,v6.16b,v6.16b
641 aese v18.16b,v16.16b
642 aesmc v18.16b,v18.16b
643 ld1 {v3.16b},[x0],#16
644 orr v1.16b,v6.16b,v6.16b
645 aese v4.16b,v17.16b
646 aesmc v4.16b,v4.16b
647 aese v5.16b,v17.16b
648 aesmc v5.16b,v5.16b
649 ld1 {v19.16b},[x0],#16
650 mov x7,x3
651 aese v18.16b,v17.16b
652 aesmc v17.16b,v18.16b
653 orr v18.16b,v6.16b,v6.16b
654 add w9,w8,#1
655 aese v4.16b,v20.16b
656 aesmc v4.16b,v4.16b
657 aese v5.16b,v20.16b
658 aesmc v5.16b,v5.16b
659 eor v2.16b,v2.16b,v7.16b
660 add w10,w8,#2
661 aese v17.16b,v20.16b
662 aesmc v17.16b,v17.16b
663 eor v3.16b,v3.16b,v7.16b
664 add w8,w8,#3
665 aese v4.16b,v21.16b
666 aesmc v4.16b,v4.16b
667 aese v5.16b,v21.16b
668 aesmc v5.16b,v5.16b
669 eor v19.16b,v19.16b,v7.16b
670 rev w9,w9
671 aese v17.16b,v21.16b
672 aesmc v17.16b,v17.16b
673 mov v0.s[3], w9
674 rev w10,w10
675 aese v4.16b,v22.16b
676 aesmc v4.16b,v4.16b
677 aese v5.16b,v22.16b
678 aesmc v5.16b,v5.16b
679 mov v1.s[3], w10
680 rev w12,w8
681 aese v17.16b,v22.16b
682 aesmc v17.16b,v17.16b
683 mov v18.s[3], w12
684 subs x2,x2,#3
685 aese v4.16b,v23.16b
686 aese v5.16b,v23.16b
687 aese v17.16b,v23.16b
688
689 eor v2.16b,v2.16b,v4.16b
690 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
691 st1 {v2.16b},[x1],#16
692 eor v3.16b,v3.16b,v5.16b
693 mov w6,w5
694 st1 {v3.16b},[x1],#16
695 eor v19.16b,v19.16b,v17.16b
696 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
697 st1 {v19.16b},[x1],#16
698 b.hs .Loop3x_ctr32
699
700 adds x2,x2,#3
701 b.eq .Lctr32_done
702 cmp x2,#1
703 mov x12,#16
704 csel x12,xzr,x12,eq
705
706 .Lctr32_tail:
707 aese v0.16b,v16.16b
708 aesmc v0.16b,v0.16b
709 aese v1.16b,v16.16b
710 aesmc v1.16b,v1.16b
711 ld1 {v16.4s},[x7],#16
712 subs w6,w6,#2
713 aese v0.16b,v17.16b
714 aesmc v0.16b,v0.16b
715 aese v1.16b,v17.16b
716 aesmc v1.16b,v1.16b
717 ld1 {v17.4s},[x7],#16
718 b.gt .Lctr32_tail
719
720 aese v0.16b,v16.16b
721 aesmc v0.16b,v0.16b
722 aese v1.16b,v16.16b
723 aesmc v1.16b,v1.16b
724 aese v0.16b,v17.16b
725 aesmc v0.16b,v0.16b
726 aese v1.16b,v17.16b
727 aesmc v1.16b,v1.16b
728 ld1 {v2.16b},[x0],x12
729 aese v0.16b,v20.16b
730 aesmc v0.16b,v0.16b
731 aese v1.16b,v20.16b
732 aesmc v1.16b,v1.16b
733 ld1 {v3.16b},[x0]
734 aese v0.16b,v21.16b
735 aesmc v0.16b,v0.16b
736 aese v1.16b,v21.16b
737 aesmc v1.16b,v1.16b
738 eor v2.16b,v2.16b,v7.16b
739 aese v0.16b,v22.16b
740 aesmc v0.16b,v0.16b
741 aese v1.16b,v22.16b
742 aesmc v1.16b,v1.16b
743 eor v3.16b,v3.16b,v7.16b
744 aese v0.16b,v23.16b
745 aese v1.16b,v23.16b
746
747 cmp x2,#1
748 eor v2.16b,v2.16b,v0.16b
749 eor v3.16b,v3.16b,v1.16b
750 st1 {v2.16b},[x1],#16
751 b.eq .Lctr32_done
752 st1 {v3.16b},[x1]
753
754 .Lctr32_done:
755 ldr x29,[sp],#16
756 ret
757 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
758 #endif
Cache object: 1119f88668cc618ca058a7e939303004
|