support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <machine/asm.h>
   28 #include <machine/asmacros.h>
   29 __FBSDID("$FreeBSD: releng/6.1/sys/arm/arm/support.S 144967 2005-04-12 22:46:09Z cognet $");
   30 
   31 #include "assym.s"
   32 
   33 /*
   34  * memset: Sets a block of memory to the specified value
   35  *
   36  * On entry:
   37  *   r0 - dest address
   38  *   r1 - byte to write
   39  *   r2 - number of bytes to write
   40  *
   41  * On exit:
   42  *   r0 - dest address
   43  */
   44 /* LINTSTUB: Func: void bzero(void *, size_t) */
   45 ENTRY(bzero)
   46         mov     r3, #0x00
   47         b       do_memset
   48 
   49 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
   50 ENTRY(memset)
   51         and     r3, r1, #0xff           /* We deal with bytes */
   52         mov     r1, r2
   53 do_memset:
   54         cmp     r1, #0x04               /* Do we have less than 4 bytes */
   55         mov     ip, r0
   56         blt     .Lmemset_lessthanfour
   57 
   58         /* Ok first we will word align the address */
   59         ands    r2, ip, #0x03           /* Get the bottom two bits */
   60         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
   61 
   62         /* We are now word aligned */
   63 .Lmemset_wordaligned:
   64         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
   65 #ifdef __XSCALE__
   66         tst     ip, #0x04               /* Quad-align for Xscale */
   67 #else
   68         cmp     r1, #0x10
   69 #endif
   70         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
   71 #ifdef __XSCALE__
   72         subne   r1, r1, #0x04           /* Quad-align if necessary */
   73         strne   r3, [ip], #0x04
   74         cmp     r1, #0x10
   75 #endif
   76         blt     .Lmemset_loop4          /* If less than 16 then use words */
   77         mov     r2, r3                  /* Duplicate data */
   78         cmp     r1, #0x80               /* If < 128 then skip the big loop */
   79         blt     .Lmemset_loop32
   80 
   81         /* Do 128 bytes at a time */
   82 .Lmemset_loop128:
   83         subs    r1, r1, #0x80
   84 #ifdef __XSCALE__
   85         strged  r2, [ip], #0x08
   86         strged  r2, [ip], #0x08
   87         strged  r2, [ip], #0x08
   88         strged  r2, [ip], #0x08
   89         strged  r2, [ip], #0x08
   90         strged  r2, [ip], #0x08
   91         strged  r2, [ip], #0x08
   92         strged  r2, [ip], #0x08
   93         strged  r2, [ip], #0x08
   94         strged  r2, [ip], #0x08
   95         strged  r2, [ip], #0x08
   96         strged  r2, [ip], #0x08
   97         strged  r2, [ip], #0x08
   98         strged  r2, [ip], #0x08
   99         strged  r2, [ip], #0x08
  100         strged  r2, [ip], #0x08
  101 #else
  102         stmgeia ip!, {r2-r3}
  103         stmgeia ip!, {r2-r3}
  104         stmgeia ip!, {r2-r3}
  105         stmgeia ip!, {r2-r3}
  106         stmgeia ip!, {r2-r3}
  107         stmgeia ip!, {r2-r3}
  108         stmgeia ip!, {r2-r3}
  109         stmgeia ip!, {r2-r3}
  110         stmgeia ip!, {r2-r3}
  111         stmgeia ip!, {r2-r3}
  112         stmgeia ip!, {r2-r3}
  113         stmgeia ip!, {r2-r3}
  114         stmgeia ip!, {r2-r3}
  115         stmgeia ip!, {r2-r3}
  116         stmgeia ip!, {r2-r3}
  117         stmgeia ip!, {r2-r3}
  118 #endif
  119         bgt     .Lmemset_loop128
  120         RETeq                   /* Zero length so just exit */
  121 
  122         add     r1, r1, #0x80           /* Adjust for extra sub */
  123 
  124         /* Do 32 bytes at a time */
  125 .Lmemset_loop32:
  126         subs    r1, r1, #0x20
  127 #ifdef __XSCALE__
  128         strged  r2, [ip], #0x08
  129         strged  r2, [ip], #0x08
  130         strged  r2, [ip], #0x08
  131         strged  r2, [ip], #0x08
  132 #else
  133         stmgeia ip!, {r2-r3}
  134         stmgeia ip!, {r2-r3}
  135         stmgeia ip!, {r2-r3}
  136         stmgeia ip!, {r2-r3}
  137 #endif
  138         bgt     .Lmemset_loop32
  139         RETeq                   /* Zero length so just exit */
  140 
  141         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  142 
  143         /* Deal with 16 bytes or more */
  144 #ifdef __XSCALE__
  145         strged  r2, [ip], #0x08
  146         strged  r2, [ip], #0x08
  147 #else
  148         stmgeia ip!, {r2-r3}
  149         stmgeia ip!, {r2-r3}
  150 #endif
  151         RETeq                   /* Zero length so just exit */
  152 
  153         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  154 
  155         /* We have at least 4 bytes so copy as words */
  156 .Lmemset_loop4:
  157         subs    r1, r1, #0x04
  158         strge   r3, [ip], #0x04
  159         bgt     .Lmemset_loop4
  160         RETeq                   /* Zero length so just exit */
  161 
  162 #ifdef __XSCALE__
  163         /* Compensate for 64-bit alignment check */
  164         adds    r1, r1, #0x04
  165         RETeq
  166         cmp     r1, #2
  167 #else
  168         cmp     r1, #-2
  169 #endif
  170 
  171         strb    r3, [ip], #0x01         /* Set 1 byte */
  172         strgeb  r3, [ip], #0x01         /* Set another byte */
  173         strgtb  r3, [ip]                /* and a third */
  174         RET                     /* Exit */
  175 
  176 .Lmemset_wordunaligned:
  177         rsb     r2, r2, #0x004
  178         strb    r3, [ip], #0x01         /* Set 1 byte */
  179         cmp     r2, #0x02
  180         strgeb  r3, [ip], #0x01         /* Set another byte */
  181         sub     r1, r1, r2
  182         strgtb  r3, [ip], #0x01         /* and a third */
  183         cmp     r1, #0x04               /* More than 4 bytes left? */
  184         bge     .Lmemset_wordaligned    /* Yup */
  185 
  186 .Lmemset_lessthanfour:
  187         cmp     r1, #0x00
  188         RETeq                   /* Zero length so exit */
  189         strb    r3, [ip], #0x01         /* Set 1 byte */
  190         cmp     r1, #0x02
  191         strgeb  r3, [ip], #0x01         /* Set another byte */
  192         strgtb  r3, [ip]                /* and a third */
  193         RET                     /* Exit */
  194 
  195 ENTRY(bcmp)
  196         mov     ip, r0
  197         cmp     r2, #0x06
  198         beq     .Lmemcmp_6bytes
  199         mov     r0, #0x00
  200 
  201         /* Are both addresses aligned the same way? */
  202         cmp     r2, #0x00
  203         eornes  r3, ip, r1
  204         RETeq                   /* len == 0, or same addresses! */
  205         tst     r3, #0x03
  206         subne   r2, r2, #0x01
  207         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  208 
  209         /* Word-align the addresses, if necessary */
  210         sub     r3, r1, #0x05
  211         ands    r3, r3, #0x03
  212         add     r3, r3, r3, lsl #1
  213         addne   pc, pc, r3, lsl #3
  214         nop
  215 
  216         /* Compare up to 3 bytes */
  217         ldrb    r0, [ip], #0x01
  218         ldrb    r3, [r1], #0x01
  219         subs    r0, r0, r3
  220         RETne
  221         subs    r2, r2, #0x01
  222         RETeq
  223 
  224         /* Compare up to 2 bytes */
  225         ldrb    r0, [ip], #0x01
  226         ldrb    r3, [r1], #0x01
  227         subs    r0, r0, r3
  228         RETne
  229         subs    r2, r2, #0x01
  230         RETeq
  231 
  232         /* Compare 1 byte */
  233         ldrb    r0, [ip], #0x01
  234         ldrb    r3, [r1], #0x01
  235         subs    r0, r0, r3
  236         RETne
  237         subs    r2, r2, #0x01
  238         RETeq
  239 
  240         /* Compare 4 bytes at a time, if possible */
  241         subs    r2, r2, #0x04
  242         bcc     .Lmemcmp_bytewise
  243 .Lmemcmp_word_aligned:
  244         ldr     r0, [ip], #0x04
  245         ldr     r3, [r1], #0x04
  246         subs    r2, r2, #0x04
  247         cmpcs   r0, r3
  248         beq     .Lmemcmp_word_aligned
  249         sub     r0, r0, r3
  250 
  251         /* Correct for extra subtraction, and check if done */
  252         adds    r2, r2, #0x04
  253         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  254         RETeq                   /* Yup. Just return */
  255 
  256         /* Re-do the final word byte-wise */
  257         sub     ip, ip, #0x04
  258         sub     r1, r1, #0x04
  259 
  260 .Lmemcmp_bytewise:
  261         add     r2, r2, #0x03
  262 .Lmemcmp_bytewise2:
  263         ldrb    r0, [ip], #0x01
  264         ldrb    r3, [r1], #0x01
  265         subs    r2, r2, #0x01
  266         cmpcs   r0, r3
  267         beq     .Lmemcmp_bytewise2
  268         sub     r0, r0, r3
  269         RET
  270 
  271         /*
  272          * 6 byte compares are very common, thanks to the network stack.
  273          * This code is hand-scheduled to reduce the number of stalls for
  274          * load results. Everything else being equal, this will be ~32%
  275          * faster than a byte-wise memcmp.
  276          */
  277         .align  5
  278 .Lmemcmp_6bytes:
  279         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  280         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  281         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  282         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  283         ldreqb  r3, [ip, #0x01]         /* r3 = b1#1 */
  284         RETne                   /* Return if mismatch on #0 */
  285         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  286         ldreqb  r3, [r1, #0x02]         /* r3 = b2#2 */
  287         ldreqb  r0, [ip, #0x02]         /* r0 = b1#2 */
  288         RETne                   /* Return if mismatch on #1 */
  289         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  290         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  291         ldreqb  r3, [ip, #0x03]         /* r3 = b1#3 */
  292         RETne                   /* Return if mismatch on #2 */
  293         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  294         ldreqb  r3, [r1, #0x04]         /* r3 = b2#4 */
  295         ldreqb  r0, [ip, #0x04]         /* r0 = b1#4 */
  296         RETne                   /* Return if mismatch on #3 */
  297         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  298         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  299         ldreqb  r3, [ip, #0x05]         /* r3 = b1#5 */
  300         RETne                   /* Return if mismatch on #4 */
  301         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  302         RET
  303 
  304 ENTRY(bcopy)
  305         /* switch the source and destination registers */
  306         eor     r0, r1, r0 
  307         eor     r1, r0, r1 
  308         eor     r0, r1, r0 
  309 ENTRY(memmove)
  310         /* Do the buffers overlap? */
  311         cmp     r0, r1
  312         RETeq           /* Bail now if src/dst are the same */
  313         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  314         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  315         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  316         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  317 
  318         /* Determine copy direction */
  319         cmp     r1, r0
  320         bcc     .Lmemmove_backwards
  321 
  322         moveq   r0, #0                  /* Quick abort for len=0 */
  323         RETeq
  324 
  325         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  326         subs    r2, r2, #4
  327         blt     .Lmemmove_fl4           /* less than 4 bytes */
  328         ands    r12, r0, #3
  329         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  330         ands    r12, r1, #3
  331         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  332 
  333 .Lmemmove_ft8:
  334         /* We have aligned source and destination */
  335         subs    r2, r2, #8
  336         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  337         subs    r2, r2, #0x14         
  338         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  339         stmdb   sp!, {r4}               /* borrow r4 */
  340 
  341         /* blat 32 bytes at a time */
  342         /* XXX for really big copies perhaps we should use more registers */
  343 .Lmemmove_floop32:      
  344         ldmia   r1!, {r3, r4, r12, lr}
  345         stmia   r0!, {r3, r4, r12, lr}
  346         ldmia   r1!, {r3, r4, r12, lr}
  347         stmia   r0!, {r3, r4, r12, lr}
  348         subs    r2, r2, #0x20         
  349         bge     .Lmemmove_floop32
  350 
  351         cmn     r2, #0x10
  352         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  353         stmgeia r0!, {r3, r4, r12, lr}
  354         subge   r2, r2, #0x10         
  355         ldmia   sp!, {r4}               /* return r4 */
  356 
  357 .Lmemmove_fl32:
  358         adds    r2, r2, #0x14         
  359 
  360         /* blat 12 bytes at a time */
  361 .Lmemmove_floop12:
  362         ldmgeia r1!, {r3, r12, lr}
  363         stmgeia r0!, {r3, r12, lr}
  364         subges  r2, r2, #0x0c         
  365         bge     .Lmemmove_floop12
  366 
  367 .Lmemmove_fl12:
  368         adds    r2, r2, #8
  369         blt     .Lmemmove_fl4
  370 
  371         subs    r2, r2, #4
  372         ldrlt   r3, [r1], #4
  373         strlt   r3, [r0], #4
  374         ldmgeia r1!, {r3, r12}
  375         stmgeia r0!, {r3, r12}
  376         subge   r2, r2, #4
  377 
  378 .Lmemmove_fl4:
  379         /* less than 4 bytes to go */
  380         adds    r2, r2, #4
  381         ldmeqia sp!, {r0, pc}           /* done */
  382 
  383         /* copy the crud byte at a time */
  384         cmp     r2, #2
  385         ldrb    r3, [r1], #1
  386         strb    r3, [r0], #1
  387         ldrgeb  r3, [r1], #1
  388         strgeb  r3, [r0], #1
  389         ldrgtb  r3, [r1], #1
  390         strgtb  r3, [r0], #1
  391         ldmia   sp!, {r0, pc}
  392 
  393         /* erg - unaligned destination */
  394 .Lmemmove_fdestul:
  395         rsb     r12, r12, #4
  396         cmp     r12, #2
  397 
  398         /* align destination with byte copies */
  399         ldrb    r3, [r1], #1
  400         strb    r3, [r0], #1
  401         ldrgeb  r3, [r1], #1
  402         strgeb  r3, [r0], #1
  403         ldrgtb  r3, [r1], #1
  404         strgtb  r3, [r0], #1
  405         subs    r2, r2, r12
  406         blt     .Lmemmove_fl4           /* less the 4 bytes */
  407 
  408         ands    r12, r1, #3
  409         beq     .Lmemmove_ft8           /* we have an aligned source */
  410 
  411         /* erg - unaligned source */
  412         /* This is where it gets nasty ... */
  413 .Lmemmove_fsrcul:
  414         bic     r1, r1, #3
  415         ldr     lr, [r1], #4
  416         cmp     r12, #2
  417         bgt     .Lmemmove_fsrcul3
  418         beq     .Lmemmove_fsrcul2
  419         cmp     r2, #0x0c            
  420         blt     .Lmemmove_fsrcul1loop4
  421         sub     r2, r2, #0x0c         
  422         stmdb   sp!, {r4, r5}
  423 
  424 .Lmemmove_fsrcul1loop16:
  425 #ifdef __ARMEB__
  426         mov     r3, lr, lsl #8
  427 #else
  428         mov     r3, lr, lsr #8
  429 #endif
  430         ldmia   r1!, {r4, r5, r12, lr}
  431 #ifdef __ARMEB__
  432         orr     r3, r3, r4, lsr #24
  433         mov     r4, r4, lsl #8
  434         orr     r4, r4, r5, lsr #24
  435         mov     r5, r5, lsl #8
  436         orr     r5, r5, r12, lsr #24
  437         mov     r12, r12, lsl #8
  438         orr     r12, r12, lr, lsr #24
  439 #else
  440         orr     r3, r3, r4, lsl #24
  441         mov     r4, r4, lsr #8
  442         orr     r4, r4, r5, lsl #24
  443         mov     r5, r5, lsr #8
  444         orr     r5, r5, r12, lsl #24
  445         mov     r12, r12, lsr #8
  446         orr     r12, r12, lr, lsl #24
  447 #endif
  448         stmia   r0!, {r3-r5, r12}
  449         subs    r2, r2, #0x10         
  450         bge     .Lmemmove_fsrcul1loop16
  451         ldmia   sp!, {r4, r5}
  452         adds    r2, r2, #0x0c         
  453         blt     .Lmemmove_fsrcul1l4
  454 
  455 .Lmemmove_fsrcul1loop4:
  456 #ifdef __ARMEB__
  457         mov     r12, lr, lsl #8
  458 #else
  459         mov     r12, lr, lsr #8
  460 #endif
  461         ldr     lr, [r1], #4
  462 #ifdef __ARMEB__
  463         orr     r12, r12, lr, lsr #24
  464 #else
  465         orr     r12, r12, lr, lsl #24
  466 #endif
  467         str     r12, [r0], #4
  468         subs    r2, r2, #4
  469         bge     .Lmemmove_fsrcul1loop4
  470 
  471 .Lmemmove_fsrcul1l4:
  472         sub     r1, r1, #3
  473         b       .Lmemmove_fl4
  474 
  475 .Lmemmove_fsrcul2:
  476         cmp     r2, #0x0c            
  477         blt     .Lmemmove_fsrcul2loop4
  478         sub     r2, r2, #0x0c         
  479         stmdb   sp!, {r4, r5}
  480 
  481 .Lmemmove_fsrcul2loop16:
  482 #ifdef __ARMEB__
  483         mov     r3, lr, lsl #16
  484 #else
  485         mov     r3, lr, lsr #16
  486 #endif
  487         ldmia   r1!, {r4, r5, r12, lr}
  488 #ifdef __ARMEB__
  489         orr     r3, r3, r4, lsr #16
  490         mov     r4, r4, lsl #16
  491         orr     r4, r4, r5, lsr #16
  492         mov     r5, r5, lsl #16
  493         orr     r5, r5, r12, lsr #16
  494         mov     r12, r12, lsl #16
  495         orr     r12, r12, lr, lsr #16
  496 #else
  497         orr     r3, r3, r4, lsl #16
  498         mov     r4, r4, lsr #16
  499         orr     r4, r4, r5, lsl #16
  500         mov     r5, r5, lsr #16
  501         orr     r5, r5, r12, lsl #16
  502         mov     r12, r12, lsr #16
  503         orr     r12, r12, lr, lsl #16
  504 #endif
  505         stmia   r0!, {r3-r5, r12}
  506         subs    r2, r2, #0x10         
  507         bge     .Lmemmove_fsrcul2loop16
  508         ldmia   sp!, {r4, r5}
  509         adds    r2, r2, #0x0c         
  510         blt     .Lmemmove_fsrcul2l4
  511 
  512 .Lmemmove_fsrcul2loop4:
  513 #ifdef __ARMEB__
  514         mov     r12, lr, lsl #16
  515 #else
  516         mov     r12, lr, lsr #16
  517 #endif
  518         ldr     lr, [r1], #4
  519 #ifdef __ARMEB__
  520         orr     r12, r12, lr, lsr #16
  521 #else
  522         orr     r12, r12, lr, lsl #16
  523 #endif
  524         str     r12, [r0], #4
  525         subs    r2, r2, #4
  526         bge     .Lmemmove_fsrcul2loop4
  527 
  528 .Lmemmove_fsrcul2l4:
  529         sub     r1, r1, #2
  530         b       .Lmemmove_fl4
  531 
  532 .Lmemmove_fsrcul3:
  533         cmp     r2, #0x0c            
  534         blt     .Lmemmove_fsrcul3loop4
  535         sub     r2, r2, #0x0c         
  536         stmdb   sp!, {r4, r5}
  537 
  538 .Lmemmove_fsrcul3loop16:
  539 #ifdef __ARMEB__
  540         mov     r3, lr, lsl #24
  541 #else
  542         mov     r3, lr, lsr #24
  543 #endif
  544         ldmia   r1!, {r4, r5, r12, lr}
  545 #ifdef __ARMEB__
  546         orr     r3, r3, r4, lsr #8
  547         mov     r4, r4, lsl #24
  548         orr     r4, r4, r5, lsr #8
  549         mov     r5, r5, lsl #24
  550         orr     r5, r5, r12, lsr #8
  551         mov     r12, r12, lsl #24
  552         orr     r12, r12, lr, lsr #8
  553 #else
  554         orr     r3, r3, r4, lsl #8
  555         mov     r4, r4, lsr #24
  556         orr     r4, r4, r5, lsl #8
  557         mov     r5, r5, lsr #24
  558         orr     r5, r5, r12, lsl #8
  559         mov     r12, r12, lsr #24
  560         orr     r12, r12, lr, lsl #8
  561 #endif
  562         stmia   r0!, {r3-r5, r12}
  563         subs    r2, r2, #0x10         
  564         bge     .Lmemmove_fsrcul3loop16
  565         ldmia   sp!, {r4, r5}
  566         adds    r2, r2, #0x0c         
  567         blt     .Lmemmove_fsrcul3l4
  568 
  569 .Lmemmove_fsrcul3loop4:
  570 #ifdef __ARMEB__
  571         mov     r12, lr, lsl #24
  572 #else
  573         mov     r12, lr, lsr #24
  574 #endif
  575         ldr     lr, [r1], #4
  576 #ifdef __ARMEB__
  577         orr     r12, r12, lr, lsr #8
  578 #else
  579         orr     r12, r12, lr, lsl #8
  580 #endif
  581         str     r12, [r0], #4
  582         subs    r2, r2, #4
  583         bge     .Lmemmove_fsrcul3loop4
  584 
  585 .Lmemmove_fsrcul3l4:
  586         sub     r1, r1, #1
  587         b       .Lmemmove_fl4
  588 
  589 .Lmemmove_backwards:
  590         add     r1, r1, r2
  591         add     r0, r0, r2
  592         subs    r2, r2, #4
  593         blt     .Lmemmove_bl4           /* less than 4 bytes */
  594         ands    r12, r0, #3
  595         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  596         ands    r12, r1, #3
  597         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  598 
  599 .Lmemmove_bt8:
  600         /* We have aligned source and destination */
  601         subs    r2, r2, #8
  602         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  603         stmdb   sp!, {r4, lr}
  604         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  605         blt     .Lmemmove_bl32
  606 
  607         /* blat 32 bytes at a time */
  608         /* XXX for really big copies perhaps we should use more registers */
  609 .Lmemmove_bloop32:
  610         ldmdb   r1!, {r3, r4, r12, lr}
  611         stmdb   r0!, {r3, r4, r12, lr}
  612         ldmdb   r1!, {r3, r4, r12, lr}
  613         stmdb   r0!, {r3, r4, r12, lr}
  614         subs    r2, r2, #0x20         
  615         bge     .Lmemmove_bloop32
  616 
  617 .Lmemmove_bl32:
  618         cmn     r2, #0x10            
  619         ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  620         stmgedb r0!, {r3, r4, r12, lr}
  621         subge   r2, r2, #0x10         
  622         adds    r2, r2, #0x14         
  623         ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  624         stmgedb r0!, {r3, r12, lr}
  625         subge   r2, r2, #0x0c         
  626         ldmia   sp!, {r4, lr}
  627 
  628 .Lmemmove_bl12:
  629         adds    r2, r2, #8
  630         blt     .Lmemmove_bl4
  631         subs    r2, r2, #4
  632         ldrlt   r3, [r1, #-4]!
  633         strlt   r3, [r0, #-4]!
  634         ldmgedb r1!, {r3, r12}
  635         stmgedb r0!, {r3, r12}
  636         subge   r2, r2, #4
  637 
  638 .Lmemmove_bl4:
  639         /* less than 4 bytes to go */
  640         adds    r2, r2, #4
  641         RETeq                   /* done */
  642 
  643         /* copy the crud byte at a time */
  644         cmp     r2, #2
  645         ldrb    r3, [r1, #-1]!
  646         strb    r3, [r0, #-1]!
  647         ldrgeb  r3, [r1, #-1]!
  648         strgeb  r3, [r0, #-1]!
  649         ldrgtb  r3, [r1, #-1]!
  650         strgtb  r3, [r0, #-1]!
  651         RET
  652 
  653         /* erg - unaligned destination */
  654 .Lmemmove_bdestul:
  655         cmp     r12, #2
  656 
  657         /* align destination with byte copies */
  658         ldrb    r3, [r1, #-1]!
  659         strb    r3, [r0, #-1]!
  660         ldrgeb  r3, [r1, #-1]!
  661         strgeb  r3, [r0, #-1]!
  662         ldrgtb  r3, [r1, #-1]!
  663         strgtb  r3, [r0, #-1]!
  664         subs    r2, r2, r12
  665         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  666         ands    r12, r1, #3
  667         beq     .Lmemmove_bt8           /* we have an aligned source */
  668 
  669         /* erg - unaligned source */
  670         /* This is where it gets nasty ... */
  671 .Lmemmove_bsrcul:
  672         bic     r1, r1, #3
  673         ldr     r3, [r1, #0]
  674         cmp     r12, #2
  675         blt     .Lmemmove_bsrcul1
  676         beq     .Lmemmove_bsrcul2
  677         cmp     r2, #0x0c            
  678         blt     .Lmemmove_bsrcul3loop4
  679         sub     r2, r2, #0x0c         
  680         stmdb   sp!, {r4, r5, lr}
  681 
  682 .Lmemmove_bsrcul3loop16:
  683 #ifdef __ARMEB__
  684         mov     lr, r3, lsr #8
  685 #else
  686         mov     lr, r3, lsl #8
  687 #endif
  688         ldmdb   r1!, {r3-r5, r12}
  689 #ifdef __ARMEB__
  690         orr     lr, lr, r12, lsl #24
  691         mov     r12, r12, lsr #8
  692         orr     r12, r12, r5, lsl #24
  693         mov     r5, r5, lsr #8
  694         orr     r5, r5, r4, lsl #24
  695         mov     r4, r4, lsr #8
  696         orr     r4, r4, r3, lsl #24
  697 #else
  698         orr     lr, lr, r12, lsr #24
  699         mov     r12, r12, lsl #8
  700         orr     r12, r12, r5, lsr #24
  701         mov     r5, r5, lsl #8
  702         orr     r5, r5, r4, lsr #24
  703         mov     r4, r4, lsl #8
  704         orr     r4, r4, r3, lsr #24
  705 #endif
  706         stmdb   r0!, {r4, r5, r12, lr}
  707         subs    r2, r2, #0x10         
  708         bge     .Lmemmove_bsrcul3loop16
  709         ldmia   sp!, {r4, r5, lr}
  710         adds    r2, r2, #0x0c         
  711         blt     .Lmemmove_bsrcul3l4
  712 
  713 .Lmemmove_bsrcul3loop4:
  714 #ifdef __ARMEB__
  715         mov     r12, r3, lsr #8
  716 #else
  717         mov     r12, r3, lsl #8
  718 #endif
  719         ldr     r3, [r1, #-4]!
  720 #ifdef __ARMEB__
  721         orr     r12, r12, r3, lsl #24
  722 #else
  723         orr     r12, r12, r3, lsr #24
  724 #endif
  725         str     r12, [r0, #-4]!
  726         subs    r2, r2, #4
  727         bge     .Lmemmove_bsrcul3loop4
  728 
  729 .Lmemmove_bsrcul3l4:
  730         add     r1, r1, #3
  731         b       .Lmemmove_bl4
  732 
  733 .Lmemmove_bsrcul2:
  734         cmp     r2, #0x0c            
  735         blt     .Lmemmove_bsrcul2loop4
  736         sub     r2, r2, #0x0c         
  737         stmdb   sp!, {r4, r5, lr}
  738 
  739 .Lmemmove_bsrcul2loop16:
  740 #ifdef __ARMEB__
  741         mov     lr, r3, lsr #16
  742 #else
  743         mov     lr, r3, lsl #16
  744 #endif
  745         ldmdb   r1!, {r3-r5, r12}
  746 #ifdef __ARMEB__
  747         orr     lr, lr, r12, lsl #16
  748         mov     r12, r12, lsr #16
  749         orr     r12, r12, r5, lsl #16
  750         mov     r5, r5, lsr #16
  751         orr     r5, r5, r4, lsl #16
  752         mov     r4, r4, lsr #16
  753         orr     r4, r4, r3, lsl #16
  754 #else
  755         orr     lr, lr, r12, lsr #16
  756         mov     r12, r12, lsl #16
  757         orr     r12, r12, r5, lsr #16
  758         mov     r5, r5, lsl #16
  759         orr     r5, r5, r4, lsr #16
  760         mov     r4, r4, lsl #16
  761         orr     r4, r4, r3, lsr #16
  762 #endif
  763         stmdb   r0!, {r4, r5, r12, lr}
  764         subs    r2, r2, #0x10         
  765         bge     .Lmemmove_bsrcul2loop16
  766         ldmia   sp!, {r4, r5, lr}
  767         adds    r2, r2, #0x0c         
  768         blt     .Lmemmove_bsrcul2l4
  769 
  770 .Lmemmove_bsrcul2loop4:
  771 #ifdef __ARMEB__
  772         mov     r12, r3, lsr #16
  773 #else
  774         mov     r12, r3, lsl #16
  775 #endif
  776         ldr     r3, [r1, #-4]!
  777 #ifdef __ARMEB__
  778         orr     r12, r12, r3, lsl #16
  779 #else
  780         orr     r12, r12, r3, lsr #16
  781 #endif
  782         str     r12, [r0, #-4]!
  783         subs    r2, r2, #4
  784         bge     .Lmemmove_bsrcul2loop4
  785 
  786 .Lmemmove_bsrcul2l4:
  787         add     r1, r1, #2
  788         b       .Lmemmove_bl4
  789 
  790 .Lmemmove_bsrcul1:
  791         cmp     r2, #0x0c            
  792         blt     .Lmemmove_bsrcul1loop4
  793         sub     r2, r2, #0x0c         
  794         stmdb   sp!, {r4, r5, lr}
  795 
  796 .Lmemmove_bsrcul1loop32:
  797 #ifdef __ARMEB__
  798         mov     lr, r3, lsr #24
  799 #else
  800         mov     lr, r3, lsl #24
  801 #endif
  802         ldmdb   r1!, {r3-r5, r12}
  803 #ifdef __ARMEB__
  804         orr     lr, lr, r12, lsl #8
  805         mov     r12, r12, lsr #24
  806         orr     r12, r12, r5, lsl #8
  807         mov     r5, r5, lsr #24
  808         orr     r5, r5, r4, lsl #8
  809         mov     r4, r4, lsr #24
  810         orr     r4, r4, r3, lsl #8
  811 #else
  812         orr     lr, lr, r12, lsr #8
  813         mov     r12, r12, lsl #24
  814         orr     r12, r12, r5, lsr #8
  815         mov     r5, r5, lsl #24
  816         orr     r5, r5, r4, lsr #8
  817         mov     r4, r4, lsl #24
  818         orr     r4, r4, r3, lsr #8
  819 #endif
  820         stmdb   r0!, {r4, r5, r12, lr}
  821         subs    r2, r2, #0x10         
  822         bge     .Lmemmove_bsrcul1loop32
  823         ldmia   sp!, {r4, r5, lr}
  824         adds    r2, r2, #0x0c         
  825         blt     .Lmemmove_bsrcul1l4
  826 
  827 .Lmemmove_bsrcul1loop4:
  828 #ifdef __ARMEB__
  829         mov     r12, r3, lsr #24
  830 #else
  831         mov     r12, r3, lsl #24
  832 #endif
  833         ldr     r3, [r1, #-4]!
  834 #ifdef __ARMEB__
  835         orr     r12, r12, r3, lsl #8
  836 #else
  837         orr     r12, r12, r3, lsr #8
  838 #endif
  839         str     r12, [r0, #-4]!
  840         subs    r2, r2, #4
  841         bge     .Lmemmove_bsrcul1loop4
  842 
  843 .Lmemmove_bsrcul1l4:
  844         add     r1, r1, #1
  845         b       .Lmemmove_bl4
  846 
  847 #if !defined(__XSCALE__)
  848 ENTRY(memcpy)
  849         /* save leaf functions having to store this away */
  850         stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
  851 
  852         subs    r2, r2, #4
  853         blt     .Lmemcpy_l4             /* less than 4 bytes */
  854         ands    r12, r0, #3
  855         bne     .Lmemcpy_destul         /* oh unaligned destination addr */
  856         ands    r12, r1, #3
  857         bne     .Lmemcpy_srcul          /* oh unaligned source addr */
  858 
  859 .Lmemcpy_t8:
  860         /* We have aligned source and destination */
  861         subs    r2, r2, #8
  862         blt     .Lmemcpy_l12            /* less than 12 bytes (4 from above) */
  863         subs    r2, r2, #0x14         
  864         blt     .Lmemcpy_l32            /* less than 32 bytes (12 from above) */
  865         stmdb   sp!, {r4}               /* borrow r4 */
  866 
  867         /* blat 32 bytes at a time */
  868         /* XXX for really big copies perhaps we should use more registers */
  869 .Lmemcpy_loop32:        
  870         ldmia   r1!, {r3, r4, r12, lr}
  871         stmia   r0!, {r3, r4, r12, lr}
  872         ldmia   r1!, {r3, r4, r12, lr}
  873         stmia   r0!, {r3, r4, r12, lr}
  874         subs    r2, r2, #0x20         
  875         bge     .Lmemcpy_loop32
  876 
  877         cmn     r2, #0x10
  878         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  879         stmgeia r0!, {r3, r4, r12, lr}
  880         subge   r2, r2, #0x10         
  881         ldmia   sp!, {r4}               /* return r4 */
  882 
  883 .Lmemcpy_l32:
  884         adds    r2, r2, #0x14         
  885 
  886         /* blat 12 bytes at a time */
  887 .Lmemcpy_loop12:
  888         ldmgeia r1!, {r3, r12, lr}
  889         stmgeia r0!, {r3, r12, lr}
  890         subges  r2, r2, #0x0c         
  891         bge     .Lmemcpy_loop12
  892 
  893 .Lmemcpy_l12:
  894         adds    r2, r2, #8
  895         blt     .Lmemcpy_l4
  896 
  897         subs    r2, r2, #4
  898         ldrlt   r3, [r1], #4
  899         strlt   r3, [r0], #4
  900         ldmgeia r1!, {r3, r12}
  901         stmgeia r0!, {r3, r12}
  902         subge   r2, r2, #4
  903 
  904 .Lmemcpy_l4:
  905         /* less than 4 bytes to go */
  906         adds    r2, r2, #4
  907 #ifdef __APCS_26_
  908         ldmeqia sp!, {r0, pc}^          /* done */
  909 #else
  910         ldmeqia sp!, {r0, pc}           /* done */
  911 #endif
  912         /* copy the crud byte at a time */
  913         cmp     r2, #2
  914         ldrb    r3, [r1], #1
  915         strb    r3, [r0], #1
  916         ldrgeb  r3, [r1], #1
  917         strgeb  r3, [r0], #1
  918         ldrgtb  r3, [r1], #1
  919         strgtb  r3, [r0], #1
  920         ldmia   sp!, {r0, pc}
  921 
  922         /* erg - unaligned destination */
  923 .Lmemcpy_destul:
  924         rsb     r12, r12, #4
  925         cmp     r12, #2
  926 
  927         /* align destination with byte copies */
  928         ldrb    r3, [r1], #1
  929         strb    r3, [r0], #1
  930         ldrgeb  r3, [r1], #1
  931         strgeb  r3, [r0], #1
  932         ldrgtb  r3, [r1], #1
  933         strgtb  r3, [r0], #1
  934         subs    r2, r2, r12
  935         blt     .Lmemcpy_l4             /* less the 4 bytes */
  936 
  937         ands    r12, r1, #3
  938         beq     .Lmemcpy_t8             /* we have an aligned source */
  939 
  940         /* erg - unaligned source */
  941         /* This is where it gets nasty ... */
  942 .Lmemcpy_srcul:
  943         bic     r1, r1, #3
  944         ldr     lr, [r1], #4
  945         cmp     r12, #2
  946         bgt     .Lmemcpy_srcul3
  947         beq     .Lmemcpy_srcul2
  948         cmp     r2, #0x0c            
  949         blt     .Lmemcpy_srcul1loop4
  950         sub     r2, r2, #0x0c         
  951         stmdb   sp!, {r4, r5}
  952 
  953 .Lmemcpy_srcul1loop16:
  954         mov     r3, lr, lsr #8
  955         ldmia   r1!, {r4, r5, r12, lr}
  956         orr     r3, r3, r4, lsl #24
  957         mov     r4, r4, lsr #8
  958         orr     r4, r4, r5, lsl #24
  959         mov     r5, r5, lsr #8
  960         orr     r5, r5, r12, lsl #24
  961         mov     r12, r12, lsr #8
  962         orr     r12, r12, lr, lsl #24
  963         stmia   r0!, {r3-r5, r12}
  964         subs    r2, r2, #0x10         
  965         bge     .Lmemcpy_srcul1loop16
  966         ldmia   sp!, {r4, r5}
  967         adds    r2, r2, #0x0c         
  968         blt     .Lmemcpy_srcul1l4
  969 
  970 .Lmemcpy_srcul1loop4:
  971         mov     r12, lr, lsr #8
  972         ldr     lr, [r1], #4
  973         orr     r12, r12, lr, lsl #24
  974         str     r12, [r0], #4
  975         subs    r2, r2, #4
  976         bge     .Lmemcpy_srcul1loop4
  977 
  978 .Lmemcpy_srcul1l4:
  979         sub     r1, r1, #3
  980         b       .Lmemcpy_l4
  981 
  982 .Lmemcpy_srcul2:
  983         cmp     r2, #0x0c            
  984         blt     .Lmemcpy_srcul2loop4
  985         sub     r2, r2, #0x0c         
  986         stmdb   sp!, {r4, r5}
  987 
  988 .Lmemcpy_srcul2loop16:
  989         mov     r3, lr, lsr #16
  990         ldmia   r1!, {r4, r5, r12, lr}
  991         orr     r3, r3, r4, lsl #16
  992         mov     r4, r4, lsr #16
  993         orr     r4, r4, r5, lsl #16
  994         mov     r5, r5, lsr #16
  995         orr     r5, r5, r12, lsl #16
  996         mov     r12, r12, lsr #16
  997         orr     r12, r12, lr, lsl #16
  998         stmia   r0!, {r3-r5, r12}
  999         subs    r2, r2, #0x10         
 1000         bge     .Lmemcpy_srcul2loop16
 1001         ldmia   sp!, {r4, r5}
 1002         adds    r2, r2, #0x0c         
 1003         blt     .Lmemcpy_srcul2l4
 1004 
 1005 .Lmemcpy_srcul2loop4:
 1006         mov     r12, lr, lsr #16
 1007         ldr     lr, [r1], #4
 1008         orr     r12, r12, lr, lsl #16
 1009         str     r12, [r0], #4
 1010         subs    r2, r2, #4
 1011         bge     .Lmemcpy_srcul2loop4
 1012 
 1013 .Lmemcpy_srcul2l4:
 1014         sub     r1, r1, #2
 1015         b       .Lmemcpy_l4
 1016 
 1017 .Lmemcpy_srcul3:
 1018         cmp     r2, #0x0c            
 1019         blt     .Lmemcpy_srcul3loop4
 1020         sub     r2, r2, #0x0c         
 1021         stmdb   sp!, {r4, r5}
 1022 
 1023 .Lmemcpy_srcul3loop16:
 1024         mov     r3, lr, lsr #24
 1025         ldmia   r1!, {r4, r5, r12, lr}
 1026         orr     r3, r3, r4, lsl #8
 1027         mov     r4, r4, lsr #24
 1028         orr     r4, r4, r5, lsl #8
 1029         mov     r5, r5, lsr #24
 1030         orr     r5, r5, r12, lsl #8
 1031         mov     r12, r12, lsr #24
 1032         orr     r12, r12, lr, lsl #8
 1033         stmia   r0!, {r3-r5, r12}
 1034         subs    r2, r2, #0x10         
 1035         bge     .Lmemcpy_srcul3loop16
 1036         ldmia   sp!, {r4, r5}
 1037         adds    r2, r2, #0x0c         
 1038         blt     .Lmemcpy_srcul3l4
 1039 
 1040 .Lmemcpy_srcul3loop4:
 1041         mov     r12, lr, lsr #24
 1042         ldr     lr, [r1], #4
 1043         orr     r12, r12, lr, lsl #8
 1044         str     r12, [r0], #4
 1045         subs    r2, r2, #4
 1046         bge     .Lmemcpy_srcul3loop4
 1047 
 1048 .Lmemcpy_srcul3l4:
 1049         sub     r1, r1, #1
 1050         b       .Lmemcpy_l4
 1051 #else
 1052 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 1053 ENTRY(memcpy)
 1054         pld     [r1]
 1055         cmp     r2, #0x0c
 1056         ble     .Lmemcpy_short          /* <= 12 bytes */
 1057         mov     r3, r0                  /* We must not clobber r0 */
 1058 
 1059         /* Word-align the destination buffer */
 1060         ands    ip, r3, #0x03           /* Already word aligned? */
 1061         beq     .Lmemcpy_wordaligned    /* Yup */
 1062         cmp     ip, #0x02
 1063         ldrb    ip, [r1], #0x01
 1064         sub     r2, r2, #0x01
 1065         strb    ip, [r3], #0x01
 1066         ldrleb  ip, [r1], #0x01
 1067         suble   r2, r2, #0x01
 1068         strleb  ip, [r3], #0x01
 1069         ldrltb  ip, [r1], #0x01
 1070         sublt   r2, r2, #0x01
 1071         strltb  ip, [r3], #0x01
 1072 
 1073         /* Destination buffer is now word aligned */
 1074 .Lmemcpy_wordaligned:
 1075         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 1076         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 1077 
 1078         /* Quad-align the destination buffer */
 1079         tst     r3, #0x07               /* Already quad aligned? */
 1080         ldrne   ip, [r1], #0x04
 1081         stmfd   sp!, {r4-r9}            /* Free up some registers */
 1082         subne   r2, r2, #0x04
 1083         strne   ip, [r3], #0x04
 1084 
 1085         /* Destination buffer quad aligned, source is at least word aligned */
 1086         subs    r2, r2, #0x80
 1087         blt     .Lmemcpy_w_lessthan128
 1088 
 1089         /* Copy 128 bytes at a time */
 1090 .Lmemcpy_w_loop128:
 1091         ldr     r4, [r1], #0x04         /* LD:00-03 */
 1092         ldr     r5, [r1], #0x04         /* LD:04-07 */
 1093         pld     [r1, #0x18]             /* Prefetch 0x20 */
 1094         ldr     r6, [r1], #0x04         /* LD:08-0b */
 1095         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 1096         ldr     r8, [r1], #0x04         /* LD:10-13 */
 1097         ldr     r9, [r1], #0x04         /* LD:14-17 */
 1098         strd    r4, [r3], #0x08         /* ST:00-07 */
 1099         ldr     r4, [r1], #0x04         /* LD:18-1b */
 1100         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 1101         strd    r6, [r3], #0x08         /* ST:08-0f */
 1102         ldr     r6, [r1], #0x04         /* LD:20-23 */
 1103         ldr     r7, [r1], #0x04         /* LD:24-27 */
 1104         pld     [r1, #0x18]             /* Prefetch 0x40 */
 1105         strd    r8, [r3], #0x08         /* ST:10-17 */
 1106         ldr     r8, [r1], #0x04         /* LD:28-2b */
 1107         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 1108         strd    r4, [r3], #0x08         /* ST:18-1f */
 1109         ldr     r4, [r1], #0x04         /* LD:30-33 */
 1110         ldr     r5, [r1], #0x04         /* LD:34-37 */
 1111         strd    r6, [r3], #0x08         /* ST:20-27 */
 1112         ldr     r6, [r1], #0x04         /* LD:38-3b */
 1113         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 1114         strd    r8, [r3], #0x08         /* ST:28-2f */
 1115         ldr     r8, [r1], #0x04         /* LD:40-43 */
 1116         ldr     r9, [r1], #0x04         /* LD:44-47 */
 1117         pld     [r1, #0x18]             /* Prefetch 0x60 */
 1118         strd    r4, [r3], #0x08         /* ST:30-37 */
 1119         ldr     r4, [r1], #0x04         /* LD:48-4b */
 1120         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 1121         strd    r6, [r3], #0x08         /* ST:38-3f */
 1122         ldr     r6, [r1], #0x04         /* LD:50-53 */
 1123         ldr     r7, [r1], #0x04         /* LD:54-57 */
 1124         strd    r8, [r3], #0x08         /* ST:40-47 */
 1125         ldr     r8, [r1], #0x04         /* LD:58-5b */
 1126         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 1127         strd    r4, [r3], #0x08         /* ST:48-4f */
 1128         ldr     r4, [r1], #0x04         /* LD:60-63 */
 1129         ldr     r5, [r1], #0x04         /* LD:64-67 */
 1130         pld     [r1, #0x18]             /* Prefetch 0x80 */
 1131         strd    r6, [r3], #0x08         /* ST:50-57 */
 1132         ldr     r6, [r1], #0x04         /* LD:68-6b */
 1133         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 1134         strd    r8, [r3], #0x08         /* ST:58-5f */
 1135         ldr     r8, [r1], #0x04         /* LD:70-73 */
 1136         ldr     r9, [r1], #0x04         /* LD:74-77 */
 1137         strd    r4, [r3], #0x08         /* ST:60-67 */
 1138         ldr     r4, [r1], #0x04         /* LD:78-7b */
 1139         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 1140         strd    r6, [r3], #0x08         /* ST:68-6f */
 1141         strd    r8, [r3], #0x08         /* ST:70-77 */
 1142         subs    r2, r2, #0x80
 1143         strd    r4, [r3], #0x08         /* ST:78-7f */
 1144         bge     .Lmemcpy_w_loop128
 1145 
 1146 .Lmemcpy_w_lessthan128:
 1147         adds    r2, r2, #0x80           /* Adjust for extra sub */
 1148         ldmeqfd sp!, {r4-r9}
 1149         RETeq                   /* Return now if done */
 1150         subs    r2, r2, #0x20
 1151         blt     .Lmemcpy_w_lessthan32
 1152 
 1153         /* Copy 32 bytes at a time */
 1154 .Lmemcpy_w_loop32:
 1155         ldr     r4, [r1], #0x04
 1156         ldr     r5, [r1], #0x04
 1157         pld     [r1, #0x18]
 1158         ldr     r6, [r1], #0x04
 1159         ldr     r7, [r1], #0x04
 1160         ldr     r8, [r1], #0x04
 1161         ldr     r9, [r1], #0x04
 1162         strd    r4, [r3], #0x08
 1163         ldr     r4, [r1], #0x04
 1164         ldr     r5, [r1], #0x04
 1165         strd    r6, [r3], #0x08
 1166         strd    r8, [r3], #0x08
 1167         subs    r2, r2, #0x20
 1168         strd    r4, [r3], #0x08
 1169         bge     .Lmemcpy_w_loop32
 1170 
 1171 .Lmemcpy_w_lessthan32:
 1172         adds    r2, r2, #0x20           /* Adjust for extra sub */
 1173         ldmeqfd sp!, {r4-r9}
 1174         RETeq                   /* Return now if done */
 1175 
 1176         and     r4, r2, #0x18
 1177         rsbs    r4, r4, #0x18
 1178         addne   pc, pc, r4, lsl #1
 1179         nop
 1180 
 1181         /* At least 24 bytes remaining */
 1182         ldr     r4, [r1], #0x04
 1183         ldr     r5, [r1], #0x04
 1184         sub     r2, r2, #0x08
 1185         strd    r4, [r3], #0x08
 1186 
 1187         /* At least 16 bytes remaining */
 1188         ldr     r4, [r1], #0x04
 1189         ldr     r5, [r1], #0x04
 1190         sub     r2, r2, #0x08
 1191         strd    r4, [r3], #0x08
 1192 
 1193         /* At least 8 bytes remaining */
 1194         ldr     r4, [r1], #0x04
 1195         ldr     r5, [r1], #0x04
 1196         subs    r2, r2, #0x08
 1197         strd    r4, [r3], #0x08
 1198 
 1199         /* Less than 8 bytes remaining */
 1200         ldmfd   sp!, {r4-r9}
 1201         RETeq                   /* Return now if done */
 1202         subs    r2, r2, #0x04
 1203         ldrge   ip, [r1], #0x04
 1204         strge   ip, [r3], #0x04
 1205         RETeq                   /* Return now if done */
 1206         addlt   r2, r2, #0x04
 1207         ldrb    ip, [r1], #0x01
 1208         cmp     r2, #0x02
 1209         ldrgeb  r2, [r1], #0x01
 1210         strb    ip, [r3], #0x01
 1211         ldrgtb  ip, [r1]
 1212         strgeb  r2, [r3], #0x01
 1213         strgtb  ip, [r3]
 1214         RET
 1215 
 1216 
 1217 /*
 1218  * At this point, it has not been possible to word align both buffers.
 1219  * The destination buffer is word aligned, but the source buffer is not.
 1220  */
 1221 .Lmemcpy_bad_align:
 1222         stmfd   sp!, {r4-r7}
 1223         bic     r1, r1, #0x03
 1224         cmp     ip, #2
 1225         ldr     ip, [r1], #0x04
 1226         bgt     .Lmemcpy_bad3
 1227         beq     .Lmemcpy_bad2
 1228         b       .Lmemcpy_bad1
 1229 
 1230 .Lmemcpy_bad1_loop16:
 1231 #ifdef __ARMEB__
 1232         mov     r4, ip, lsl #8
 1233 #else
 1234         mov     r4, ip, lsr #8
 1235 #endif
 1236         ldr     r5, [r1], #0x04
 1237         pld     [r1, #0x018]
 1238         ldr     r6, [r1], #0x04
 1239         ldr     r7, [r1], #0x04
 1240         ldr     ip, [r1], #0x04
 1241 #ifdef __ARMEB__
 1242         orr     r4, r4, r5, lsr #24
 1243         mov     r5, r5, lsl #8
 1244         orr     r5, r5, r6, lsr #24
 1245         mov     r6, r6, lsl #8
 1246         orr     r6, r6, r7, lsr #24
 1247         mov     r7, r7, lsl #8
 1248         orr     r7, r7, ip, lsr #24
 1249 #else
 1250         orr     r4, r4, r5, lsl #24
 1251         mov     r5, r5, lsr #8
 1252         orr     r5, r5, r6, lsl #24
 1253         mov     r6, r6, lsr #8
 1254         orr     r6, r6, r7, lsl #24
 1255         mov     r7, r7, lsr #8
 1256         orr     r7, r7, ip, lsl #24
 1257 #endif
 1258         str     r4, [r3], #0x04
 1259         str     r5, [r3], #0x04
 1260         str     r6, [r3], #0x04
 1261         str     r7, [r3], #0x04
 1262 .Lmemcpy_bad1:
 1263         subs    r2, r2, #0x10         
 1264         bge     .Lmemcpy_bad1_loop16
 1265 
 1266         adds    r2, r2, #0x10         
 1267         ldmeqfd sp!, {r4-r7}
 1268         RETeq                   /* Return now if done */
 1269         subs    r2, r2, #0x04
 1270         sublt   r1, r1, #0x03
 1271         blt     .Lmemcpy_bad_done
 1272 
 1273 .Lmemcpy_bad1_loop4:
 1274 #ifdef __ARMEB__
 1275         mov     r4, ip, lsl #8
 1276 #else
 1277         mov     r4, ip, lsr #8
 1278 #endif
 1279         ldr     ip, [r1], #0x04
 1280         subs    r2, r2, #0x04
 1281 #ifdef __ARMEB__
 1282         orr     r4, r4, ip, lsr #24
 1283 #else
 1284         orr     r4, r4, ip, lsl #24
 1285 #endif
 1286         str     r4, [r3], #0x04
 1287         bge     .Lmemcpy_bad1_loop4
 1288         sub     r1, r1, #0x03
 1289         b       .Lmemcpy_bad_done
 1290 
 1291 .Lmemcpy_bad2_loop16:
 1292 #ifdef __ARMEB__
 1293         mov     r4, ip, lsl #16
 1294 #else
 1295         mov     r4, ip, lsr #16
 1296 #endif
 1297         ldr     r5, [r1], #0x04
 1298         pld     [r1, #0x018]
 1299         ldr     r6, [r1], #0x04
 1300         ldr     r7, [r1], #0x04
 1301         ldr     ip, [r1], #0x04
 1302 #ifdef __ARMEB__
 1303         orr     r4, r4, r5, lsr #16
 1304         mov     r5, r5, lsl #16
 1305         orr     r5, r5, r6, lsr #16
 1306         mov     r6, r6, lsl #16
 1307         orr     r6, r6, r7, lsr #16
 1308         mov     r7, r7, lsl #16
 1309         orr     r7, r7, ip, lsr #16
 1310 #else
 1311         orr     r4, r4, r5, lsl #16
 1312         mov     r5, r5, lsr #16
 1313         orr     r5, r5, r6, lsl #16
 1314         mov     r6, r6, lsr #16
 1315         orr     r6, r6, r7, lsl #16
 1316         mov     r7, r7, lsr #16
 1317         orr     r7, r7, ip, lsl #16
 1318 #endif
 1319         str     r4, [r3], #0x04
 1320         str     r5, [r3], #0x04
 1321         str     r6, [r3], #0x04
 1322         str     r7, [r3], #0x04
 1323 .Lmemcpy_bad2:
 1324         subs    r2, r2, #0x10         
 1325         bge     .Lmemcpy_bad2_loop16
 1326 
 1327         adds    r2, r2, #0x10         
 1328         ldmeqfd sp!, {r4-r7}
 1329         RETeq                   /* Return now if done */
 1330         subs    r2, r2, #0x04
 1331         sublt   r1, r1, #0x02
 1332         blt     .Lmemcpy_bad_done
 1333 
 1334 .Lmemcpy_bad2_loop4:
 1335 #ifdef __ARMEB__
 1336         mov     r4, ip, lsl #16
 1337 #else
 1338         mov     r4, ip, lsr #16
 1339 #endif
 1340         ldr     ip, [r1], #0x04
 1341         subs    r2, r2, #0x04
 1342 #ifdef __ARMEB__
 1343         orr     r4, r4, ip, lsr #16
 1344 #else
 1345         orr     r4, r4, ip, lsl #16
 1346 #endif
 1347         str     r4, [r3], #0x04
 1348         bge     .Lmemcpy_bad2_loop4
 1349         sub     r1, r1, #0x02
 1350         b       .Lmemcpy_bad_done
 1351 
 1352 .Lmemcpy_bad3_loop16:
 1353 #ifdef __ARMEB__
 1354         mov     r4, ip, lsl #24
 1355 #else
 1356         mov     r4, ip, lsr #24
 1357 #endif
 1358         ldr     r5, [r1], #0x04
 1359         pld     [r1, #0x018]
 1360         ldr     r6, [r1], #0x04
 1361         ldr     r7, [r1], #0x04
 1362         ldr     ip, [r1], #0x04
 1363 #ifdef __ARMEB__
 1364         orr     r4, r4, r5, lsr #8
 1365         mov     r5, r5, lsl #24
 1366         orr     r5, r5, r6, lsr #8
 1367         mov     r6, r6, lsl #24
 1368         orr     r6, r6, r7, lsr #8
 1369         mov     r7, r7, lsl #24
 1370         orr     r7, r7, ip, lsr #8
 1371 #else
 1372         orr     r4, r4, r5, lsl #8
 1373         mov     r5, r5, lsr #24
 1374         orr     r5, r5, r6, lsl #8
 1375         mov     r6, r6, lsr #24
 1376         orr     r6, r6, r7, lsl #8
 1377         mov     r7, r7, lsr #24
 1378         orr     r7, r7, ip, lsl #8
 1379 #endif
 1380         str     r4, [r3], #0x04
 1381         str     r5, [r3], #0x04
 1382         str     r6, [r3], #0x04
 1383         str     r7, [r3], #0x04
 1384 .Lmemcpy_bad3:
 1385         subs    r2, r2, #0x10         
 1386         bge     .Lmemcpy_bad3_loop16
 1387 
 1388         adds    r2, r2, #0x10         
 1389         ldmeqfd sp!, {r4-r7}
 1390         RETeq                   /* Return now if done */
 1391         subs    r2, r2, #0x04
 1392         sublt   r1, r1, #0x01
 1393         blt     .Lmemcpy_bad_done
 1394 
 1395 .Lmemcpy_bad3_loop4:
 1396 #ifdef __ARMEB__
 1397         mov     r4, ip, lsl #24
 1398 #else
 1399         mov     r4, ip, lsr #24
 1400 #endif
 1401         ldr     ip, [r1], #0x04
 1402         subs    r2, r2, #0x04
 1403 #ifdef __ARMEB__
 1404         orr     r4, r4, ip, lsr #8
 1405 #else
 1406         orr     r4, r4, ip, lsl #8
 1407 #endif
 1408         str     r4, [r3], #0x04
 1409         bge     .Lmemcpy_bad3_loop4
 1410         sub     r1, r1, #0x01
 1411 
 1412 .Lmemcpy_bad_done:
 1413         ldmfd   sp!, {r4-r7}
 1414         adds    r2, r2, #0x04
 1415         RETeq
 1416         ldrb    ip, [r1], #0x01
 1417         cmp     r2, #0x02
 1418         ldrgeb  r2, [r1], #0x01
 1419         strb    ip, [r3], #0x01
 1420         ldrgtb  ip, [r1]
 1421         strgeb  r2, [r3], #0x01
 1422         strgtb  ip, [r3]
 1423         RET
 1424 
 1425 
 1426 /*
 1427  * Handle short copies (less than 16 bytes), possibly misaligned.
 1428  * Some of these are *very* common, thanks to the network stack,
 1429  * and so are handled specially.
 1430  */
 1431 .Lmemcpy_short:
 1432         add     pc, pc, r2, lsl #2
 1433         nop
 1434         RET                     /* 0x00 */
 1435         b       .Lmemcpy_bytewise       /* 0x01 */
 1436         b       .Lmemcpy_bytewise       /* 0x02 */
 1437         b       .Lmemcpy_bytewise       /* 0x03 */
 1438         b       .Lmemcpy_4              /* 0x04 */
 1439         b       .Lmemcpy_bytewise       /* 0x05 */
 1440         b       .Lmemcpy_6              /* 0x06 */
 1441         b       .Lmemcpy_bytewise       /* 0x07 */
 1442         b       .Lmemcpy_8              /* 0x08 */
 1443         b       .Lmemcpy_bytewise       /* 0x09 */
 1444         b       .Lmemcpy_bytewise       /* 0x0a */
 1445         b       .Lmemcpy_bytewise       /* 0x0b */
 1446         b       .Lmemcpy_c              /* 0x0c */
 1447 .Lmemcpy_bytewise:
 1448         mov     r3, r0                  /* We must not clobber r0 */
 1449         ldrb    ip, [r1], #0x01
 1450 1:      subs    r2, r2, #0x01
 1451         strb    ip, [r3], #0x01
 1452         ldrneb  ip, [r1], #0x01
 1453         bne     1b
 1454         RET
 1455 
 1456 /******************************************************************************
 1457  * Special case for 4 byte copies
 1458  */
 1459 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1460 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1461         LMEMCPY_4_PAD
 1462 .Lmemcpy_4:
 1463         and     r2, r1, #0x03
 1464         orr     r2, r2, r0, lsl #2
 1465         ands    r2, r2, #0x0f
 1466         sub     r3, pc, #0x14
 1467         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1468 
 1469 /*
 1470  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1471  */
 1472         ldr     r2, [r1]
 1473         str     r2, [r0]
 1474         RET
 1475         LMEMCPY_4_PAD
 1476 
 1477 /*
 1478  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1479  */
 1480         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1481         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1482 #ifdef __ARMEB__
 1483         mov     r3, r3, lsl #8          /* r3 = 012. */
 1484         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 1485 #else
 1486         mov     r3, r3, lsr #8          /* r3 = .210 */
 1487         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1488 #endif
 1489         str     r3, [r0]
 1490         RET
 1491         LMEMCPY_4_PAD
 1492 
 1493 /*
 1494  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1495  */
 1496 #ifdef __ARMEB__
 1497         ldrh    r3, [r1]
 1498         ldrh    r2, [r1, #0x02]
 1499 #else
 1500         ldrh    r3, [r1, #0x02]
 1501         ldrh    r2, [r1]
 1502 #endif
 1503         orr     r3, r2, r3, lsl #16
 1504         str     r3, [r0]
 1505         RET
 1506         LMEMCPY_4_PAD
 1507 
 1508 /*
 1509  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1510  */
 1511         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1512         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1513 #ifdef __ARMEB__
 1514         mov     r3, r3, lsl #24         /* r3 = 0... */
 1515         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 1516 #else
 1517         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1518         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1519 #endif
 1520         str     r3, [r0]
 1521         RET
 1522         LMEMCPY_4_PAD
 1523 
 1524 /*
 1525  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1526  */
 1527         ldr     r2, [r1]
 1528 #ifdef __ARMEB__
 1529         strb    r2, [r0, #0x03]
 1530         mov     r3, r2, lsr #8
 1531         mov     r1, r2, lsr #24
 1532         strb    r1, [r0]
 1533 #else
 1534         strb    r2, [r0]
 1535         mov     r3, r2, lsr #8
 1536         mov     r1, r2, lsr #24
 1537         strb    r1, [r0, #0x03]
 1538 #endif
 1539         strh    r3, [r0, #0x01]
 1540         RET
 1541         LMEMCPY_4_PAD
 1542 
 1543 /*
 1544  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1545  */
 1546         ldrb    r2, [r1]
 1547         ldrh    r3, [r1, #0x01]
 1548         ldrb    r1, [r1, #0x03]
 1549         strb    r2, [r0]
 1550         strh    r3, [r0, #0x01]
 1551         strb    r1, [r0, #0x03]
 1552         RET
 1553         LMEMCPY_4_PAD
 1554 
 1555 /*
 1556  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1557  */
 1558         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1559         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1560 #ifdef __ARMEB__
 1561         mov     r1, r2, lsr #8          /* r1 = ...0 */
 1562         strb    r1, [r0]
 1563         mov     r2, r2, lsl #8          /* r2 = .01. */
 1564         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
 1565 #else
 1566         strb    r2, [r0]
 1567         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1568         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1569         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1570 #endif
 1571         strh    r2, [r0, #0x01]
 1572         strb    r3, [r0, #0x03]
 1573         RET
 1574         LMEMCPY_4_PAD
 1575 
 1576 /*
 1577  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1578  */
 1579         ldrb    r2, [r1]
 1580         ldrh    r3, [r1, #0x01]
 1581         ldrb    r1, [r1, #0x03]
 1582         strb    r2, [r0]
 1583         strh    r3, [r0, #0x01]
 1584         strb    r1, [r0, #0x03]
 1585         RET
 1586         LMEMCPY_4_PAD
 1587 
 1588 /*
 1589  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1590  */
 1591         ldr     r2, [r1]
 1592 #ifdef __ARMEB__
 1593         strh    r2, [r0, #0x02]
 1594         mov     r3, r2, lsr #16
 1595         strh    r3, [r0]
 1596 #else
 1597         strh    r2, [r0]
 1598         mov     r3, r2, lsr #16
 1599         strh    r3, [r0, #0x02]
 1600 #endif
 1601         RET
 1602         LMEMCPY_4_PAD
 1603 
 1604 /*
 1605  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1606  */
 1607         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1608         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1609         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1610         strh    r1, [r0]
 1611 #ifdef __ARMEB__
 1612         mov     r2, r2, lsl #8          /* r2 = 012. */
 1613         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1614 #else
 1615         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1616         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1617 #endif
 1618         strh    r2, [r0, #0x02]
 1619         RET
 1620         LMEMCPY_4_PAD
 1621 
 1622 /*
 1623  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1624  */
 1625         ldrh    r2, [r1]
 1626         ldrh    r3, [r1, #0x02]
 1627         strh    r2, [r0]
 1628         strh    r3, [r0, #0x02]
 1629         RET
 1630         LMEMCPY_4_PAD
 1631 
 1632 /*
 1633  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1634  */
 1635         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1636         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1637         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1638         strh    r1, [r0, #0x02]
 1639 #ifdef __ARMEB__
 1640         mov     r3, r3, lsr #24         /* r3 = ...1 */
 1641         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
 1642 #else
 1643         mov     r3, r3, lsl #8          /* r3 = 321. */
 1644         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1645 #endif
 1646         strh    r3, [r0]
 1647         RET
 1648         LMEMCPY_4_PAD
 1649 
 1650 /*
 1651  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1652  */
 1653         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1654 #ifdef __ARMEB__
 1655         strb    r2, [r0, #0x03]
 1656         mov     r3, r2, lsr #8
 1657         mov     r1, r2, lsr #24
 1658         strh    r3, [r0, #0x01]
 1659         strb    r1, [r0]
 1660 #else
 1661         strb    r2, [r0]
 1662         mov     r3, r2, lsr #8
 1663         mov     r1, r2, lsr #24
 1664         strh    r3, [r0, #0x01]
 1665         strb    r1, [r0, #0x03]
 1666 #endif
 1667         RET
 1668         LMEMCPY_4_PAD
 1669 
 1670 /*
 1671  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1672  */
 1673         ldrb    r2, [r1]
 1674         ldrh    r3, [r1, #0x01]
 1675         ldrb    r1, [r1, #0x03]
 1676         strb    r2, [r0]
 1677         strh    r3, [r0, #0x01]
 1678         strb    r1, [r0, #0x03]
 1679         RET
 1680         LMEMCPY_4_PAD
 1681 
 1682 /*
 1683  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1684  */
 1685 #ifdef __ARMEB__
 1686         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1687         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1688         strb    r3, [r0, #0x03]
 1689         mov     r3, r3, lsr #8          /* r3 = ...2 */
 1690         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
 1691         strh    r3, [r0, #0x01]
 1692         mov     r2, r2, lsr #8          /* r2 = ...0 */
 1693         strb    r2, [r0]
 1694 #else
 1695         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1696         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1697         strb    r2, [r0]
 1698         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1699         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1700         strh    r2, [r0, #0x01]
 1701         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1702         strb    r3, [r0, #0x03]
 1703 #endif
 1704         RET
 1705         LMEMCPY_4_PAD
 1706 
 1707 /*
 1708  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1709  */
 1710         ldrb    r2, [r1]
 1711         ldrh    r3, [r1, #0x01]
 1712         ldrb    r1, [r1, #0x03]
 1713         strb    r2, [r0]
 1714         strh    r3, [r0, #0x01]
 1715         strb    r1, [r0, #0x03]
 1716         RET
 1717         LMEMCPY_4_PAD
 1718 
 1719 
 1720 /******************************************************************************
 1721  * Special case for 6 byte copies
 1722  */
 1723 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1724 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1725         LMEMCPY_6_PAD
 1726 .Lmemcpy_6:
 1727         and     r2, r1, #0x03
 1728         orr     r2, r2, r0, lsl #2
 1729         ands    r2, r2, #0x0f
 1730         sub     r3, pc, #0x14
 1731         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1732 
 1733 /*
 1734  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1735  */
 1736         ldr     r2, [r1]
 1737         ldrh    r3, [r1, #0x04]
 1738         str     r2, [r0]
 1739         strh    r3, [r0, #0x04]
 1740         RET
 1741         LMEMCPY_6_PAD
 1742 
 1743 /*
 1744  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1745  */
 1746         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1747         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1748 #ifdef __ARMEB__
 1749         mov     r2, r2, lsl #8          /* r2 = 012. */
 1750         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1751 #else
 1752         mov     r2, r2, lsr #8          /* r2 = .210 */
 1753         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1754 #endif
 1755         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1756         str     r2, [r0]
 1757         strh    r3, [r0, #0x04]
 1758         RET
 1759         LMEMCPY_6_PAD
 1760 
 1761 /*
 1762  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1763  */
 1764         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1765         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1766 #ifdef __ARMEB__
 1767         mov     r1, r3, lsr #16         /* r1 = ..23 */
 1768         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
 1769         str     r1, [r0]
 1770         strh    r3, [r0, #0x04]
 1771 #else
 1772         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1773         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1774         str     r2, [r0]
 1775         strh    r1, [r0, #0x04]
 1776 #endif
 1777         RET
 1778         LMEMCPY_6_PAD
 1779 
 1780 /*
 1781  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1782  */
 1783         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1784         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1785         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1786 #ifdef __ARMEB__
 1787         mov     r2, r2, lsl #24         /* r2 = 0... */
 1788         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 1789         mov     r3, r3, lsl #8          /* r3 = 234. */
 1790         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
 1791 #else
 1792         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1793         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1794         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1795         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1796 #endif
 1797         str     r2, [r0]
 1798         strh    r1, [r0, #0x04]
 1799         RET
 1800         LMEMCPY_6_PAD
 1801 
 1802 /*
 1803  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1804  */
 1805         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1806         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1807         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1808         strh    r1, [r0, #0x01]
 1809 #ifdef __ARMEB__
 1810         mov     r1, r3, lsr #24         /* r1 = ...0 */
 1811         strb    r1, [r0]
 1812         mov     r3, r3, lsl #8          /* r3 = 123. */
 1813         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
 1814 #else
 1815         strb    r3, [r0]
 1816         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1817         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1818         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1819 #endif
 1820         strh    r3, [r0, #0x03]
 1821         strb    r2, [r0, #0x05]
 1822         RET
 1823         LMEMCPY_6_PAD
 1824 
 1825 /*
 1826  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1827  */
 1828         ldrb    r2, [r1]
 1829         ldrh    r3, [r1, #0x01]
 1830         ldrh    ip, [r1, #0x03]
 1831         ldrb    r1, [r1, #0x05]
 1832         strb    r2, [r0]
 1833         strh    r3, [r0, #0x01]
 1834         strh    ip, [r0, #0x03]
 1835         strb    r1, [r0, #0x05]
 1836         RET
 1837         LMEMCPY_6_PAD
 1838 
 1839 /*
 1840  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1841  */
 1842         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1843         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1844 #ifdef __ARMEB__
 1845         mov     r3, r2, lsr #8          /* r3 = ...0 */
 1846         strb    r3, [r0]
 1847         strb    r1, [r0, #0x05]
 1848         mov     r3, r1, lsr #8          /* r3 = .234 */
 1849         strh    r3, [r0, #0x03]
 1850         mov     r3, r2, lsl #8          /* r3 = .01. */
 1851         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
 1852         strh    r3, [r0, #0x01]
 1853 #else
 1854         strb    r2, [r0]
 1855         mov     r3, r1, lsr #24
 1856         strb    r3, [r0, #0x05]
 1857         mov     r3, r1, lsr #8          /* r3 = .543 */
 1858         strh    r3, [r0, #0x03]
 1859         mov     r3, r2, lsr #8          /* r3 = ...1 */
 1860         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 1861         strh    r3, [r0, #0x01]
 1862 #endif
 1863         RET
 1864         LMEMCPY_6_PAD
 1865 
 1866 /*
 1867  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1868  */
 1869         ldrb    r2, [r1]
 1870         ldrh    r3, [r1, #0x01]
 1871         ldrh    ip, [r1, #0x03]
 1872         ldrb    r1, [r1, #0x05]
 1873         strb    r2, [r0]
 1874         strh    r3, [r0, #0x01]
 1875         strh    ip, [r0, #0x03]
 1876         strb    r1, [r0, #0x05]
 1877         RET
 1878         LMEMCPY_6_PAD
 1879 
 1880 /*
 1881  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1882  */
 1883 #ifdef __ARMEB__
 1884         ldr     r2, [r1]                /* r2 = 0123 */
 1885         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
 1886         mov     r1, r2, lsr #16         /* r1 = ..01 */
 1887         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
 1888         strh    r1, [r0]
 1889         str     r3, [r0, #0x02]
 1890 #else
 1891         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 1892         ldr     r3, [r1]                /* r3 = 3210 */
 1893         mov     r2, r2, lsl #16         /* r2 = 54.. */
 1894         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 1895         strh    r3, [r0]
 1896         str     r2, [r0, #0x02]
 1897 #endif
 1898         RET
 1899         LMEMCPY_6_PAD
 1900 
 1901 /*
 1902  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1903  */
 1904         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1905         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 1906         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1907 #ifdef __ARMEB__
 1908         mov     r2, r2, lsr #8          /* r2 = .345 */
 1909         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
 1910 #else
 1911         mov     r2, r2, lsl #8          /* r2 = 543. */
 1912         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 1913 #endif
 1914         strh    r1, [r0]
 1915         str     r2, [r0, #0x02]
 1916         RET
 1917         LMEMCPY_6_PAD
 1918 
 1919 /*
 1920  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1921  */
 1922         ldrh    r2, [r1]
 1923         ldr     r3, [r1, #0x02]
 1924         strh    r2, [r0]
 1925         str     r3, [r0, #0x02]
 1926         RET
 1927         LMEMCPY_6_PAD
 1928 
 1929 /*
 1930  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1931  */
 1932         ldrb    r3, [r1]                /* r3 = ...0 */
 1933         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1934         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 1935 #ifdef __ARMEB__
 1936         mov     r3, r3, lsl #8          /* r3 = ..0. */
 1937         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
 1938         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
 1939 #else
 1940         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1941         mov     r1, r1, lsl #24         /* r1 = 5... */
 1942         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 1943 #endif
 1944         strh    r3, [r0]
 1945         str     r1, [r0, #0x02]
 1946         RET
 1947         LMEMCPY_6_PAD
 1948 
 1949 /*
 1950  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1951  */
 1952         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1953         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 1954 #ifdef __ARMEB__
 1955         mov     r3, r2, lsr #24         /* r3 = ...0 */
 1956         strb    r3, [r0]
 1957         mov     r2, r2, lsl #8          /* r2 = 123. */
 1958         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 1959 #else
 1960         strb    r2, [r0]
 1961         mov     r2, r2, lsr #8          /* r2 = .321 */
 1962         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 1963         mov     r1, r1, lsr #8          /* r1 = ...5 */
 1964 #endif
 1965         str     r2, [r0, #0x01]
 1966         strb    r1, [r0, #0x05]
 1967         RET
 1968         LMEMCPY_6_PAD
 1969 
 1970 /*
 1971  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1972  */
 1973         ldrb    r2, [r1]
 1974         ldrh    r3, [r1, #0x01]
 1975         ldrh    ip, [r1, #0x03]
 1976         ldrb    r1, [r1, #0x05]
 1977         strb    r2, [r0]
 1978         strh    r3, [r0, #0x01]
 1979         strh    ip, [r0, #0x03]
 1980         strb    r1, [r0, #0x05]
 1981         RET
 1982         LMEMCPY_6_PAD
 1983 
 1984 /*
 1985  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1986  */
 1987         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1988         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1989 #ifdef __ARMEB__
 1990         mov     r3, r2, lsr #8          /* r3 = ...0 */
 1991         strb    r3, [r0]
 1992         mov     r2, r2, lsl #24         /* r2 = 1... */
 1993         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 1994 #else
 1995         strb    r2, [r0]
 1996         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1997         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 1998         mov     r1, r1, lsr #24         /* r1 = ...5 */
 1999 #endif
 2000         str     r2, [r0, #0x01]
 2001         strb    r1, [r0, #0x05]
 2002         RET
 2003         LMEMCPY_6_PAD
 2004 
 2005 /*
 2006  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2007  */
 2008         ldrb    r2, [r1]
 2009         ldr     r3, [r1, #0x01]
 2010         ldrb    r1, [r1, #0x05]
 2011         strb    r2, [r0]
 2012         str     r3, [r0, #0x01]
 2013         strb    r1, [r0, #0x05]
 2014         RET
 2015         LMEMCPY_6_PAD
 2016 
 2017 
 2018 /******************************************************************************
 2019  * Special case for 8 byte copies
 2020  */
 2021 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 2022 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 2023         LMEMCPY_8_PAD
 2024 .Lmemcpy_8:
 2025         and     r2, r1, #0x03
 2026         orr     r2, r2, r0, lsl #2
 2027         ands    r2, r2, #0x0f
 2028         sub     r3, pc, #0x14
 2029         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 2030 
 2031 /*
 2032  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2033  */
 2034         ldr     r2, [r1]
 2035         ldr     r3, [r1, #0x04]
 2036         str     r2, [r0]
 2037         str     r3, [r0, #0x04]
 2038         RET
 2039         LMEMCPY_8_PAD
 2040 
 2041 /*
 2042  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2043  */
 2044         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2045         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 2046         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2047 #ifdef __ARMEB__
 2048         mov     r3, r3, lsl #8          /* r3 = 012. */
 2049         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 2050         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
 2051 #else
 2052         mov     r3, r3, lsr #8          /* r3 = .210 */
 2053         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 2054         mov     r1, r1, lsl #24         /* r1 = 7... */
 2055         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 2056 #endif
 2057         str     r3, [r0]
 2058         str     r2, [r0, #0x04]
 2059         RET
 2060         LMEMCPY_8_PAD
 2061 
 2062 /*
 2063  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2064  */
 2065         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2066         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2067         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2068 #ifdef __ARMEB__
 2069         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2070         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2071         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
 2072 #else
 2073         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2074         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2075         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 2076 #endif
 2077         str     r2, [r0]
 2078         str     r3, [r0, #0x04]
 2079         RET
 2080         LMEMCPY_8_PAD
 2081 
 2082 /*
 2083  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2084  */
 2085         ldrb    r3, [r1]                /* r3 = ...0 */
 2086         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2087         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 2088 #ifdef __ARMEB__
 2089         mov     r3, r3, lsl #24         /* r3 = 0... */
 2090         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 2091         mov     r2, r2, lsl #24         /* r2 = 4... */
 2092         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
 2093 #else
 2094         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2095         mov     r2, r2, lsr #24         /* r2 = ...4 */
 2096         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 2097 #endif
 2098         str     r3, [r0]
 2099         str     r2, [r0, #0x04]
 2100         RET
 2101         LMEMCPY_8_PAD
 2102 
 2103 /*
 2104  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 2105  */
 2106         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 2107         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 2108 #ifdef __ARMEB__
 2109         mov     r1, r3, lsr #24         /* r1 = ...0 */
 2110         strb    r1, [r0]
 2111         mov     r1, r3, lsr #8          /* r1 = .012 */
 2112         strb    r2, [r0, #0x07]
 2113         mov     r3, r3, lsl #24         /* r3 = 3... */
 2114         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
 2115 #else
 2116         strb    r3, [r0]
 2117         mov     r1, r2, lsr #24         /* r1 = ...7 */
 2118         strb    r1, [r0, #0x07]
 2119         mov     r1, r3, lsr #8          /* r1 = .321 */
 2120         mov     r3, r3, lsr #24         /* r3 = ...3 */
 2121         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 2122 #endif
 2123         strh    r1, [r0, #0x01]
 2124         str     r3, [r0, #0x03]
 2125         RET
 2126         LMEMCPY_8_PAD
 2127 
 2128 /*
 2129  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 2130  */
 2131         ldrb    r2, [r1]
 2132         ldrh    r3, [r1, #0x01]
 2133         ldr     ip, [r1, #0x03]
 2134         ldrb    r1, [r1, #0x07]
 2135         strb    r2, [r0]
 2136         strh    r3, [r0, #0x01]
 2137         str     ip, [r0, #0x03]
 2138         strb    r1, [r0, #0x07]
 2139         RET
 2140         LMEMCPY_8_PAD
 2141 
 2142 /*
 2143  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 2144  */
 2145         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2146         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2147         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2148 #ifdef __ARMEB__
 2149         mov     ip, r2, lsr #8          /* ip = ...0 */
 2150         strb    ip, [r0]
 2151         mov     ip, r2, lsl #8          /* ip = .01. */
 2152         orr     ip, ip, r3, lsr #24     /* ip = .012 */
 2153         strb    r1, [r0, #0x07]
 2154         mov     r3, r3, lsl #8          /* r3 = 345. */
 2155         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
 2156 #else
 2157         strb    r2, [r0]                /* 0 */
 2158         mov     ip, r1, lsr #8          /* ip = ...7 */
 2159         strb    ip, [r0, #0x07]         /* 7 */
 2160         mov     ip, r2, lsr #8          /* ip = ...1 */
 2161         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2162         mov     r3, r3, lsr #8          /* r3 = .543 */
 2163         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 2164 #endif
 2165         strh    ip, [r0, #0x01]
 2166         str     r3, [r0, #0x03]
 2167         RET
 2168         LMEMCPY_8_PAD
 2169 
 2170 /*
 2171  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2172  */
 2173         ldrb    r3, [r1]                /* r3 = ...0 */
 2174         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2175         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 2176         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2177         strb    r3, [r0]
 2178         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 2179 #ifdef __ARMEB__
 2180         strh    r3, [r0, #0x01]
 2181         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
 2182 #else
 2183         strh    ip, [r0, #0x01]
 2184         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 2185 #endif
 2186         str     r2, [r0, #0x03]
 2187         strb    r1, [r0, #0x07]
 2188         RET
 2189         LMEMCPY_8_PAD
 2190 
 2191 /*
 2192  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2193  */
 2194         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2195         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2196         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2197 #ifdef __ARMEB__
 2198         strh    r1, [r0]
 2199         mov     r1, r3, lsr #16         /* r1 = ..45 */
 2200         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
 2201 #else
 2202         strh    r2, [r0]
 2203         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 2204         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2205 #endif
 2206         str     r2, [r0, #0x02]
 2207         strh    r3, [r0, #0x06]
 2208         RET
 2209         LMEMCPY_8_PAD
 2210 
 2211 /*
 2212  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2213  */
 2214         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2215         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2216         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 2217         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2218         strh    r1, [r0]
 2219 #ifdef __ARMEB__
 2220         mov     r1, r2, lsl #24         /* r1 = 2... */
 2221         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
 2222         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
 2223 #else
 2224         mov     r1, r2, lsr #24         /* r1 = ...2 */
 2225         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 2226         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2227         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 2228 #endif
 2229         str     r1, [r0, #0x02]
 2230         strh    r3, [r0, #0x06]
 2231         RET
 2232         LMEMCPY_8_PAD
 2233 
 2234 /*
 2235  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2236  */
 2237         ldrh    r2, [r1]
 2238         ldr     ip, [r1, #0x02]
 2239         ldrh    r3, [r1, #0x06]
 2240         strh    r2, [r0]
 2241         str     ip, [r0, #0x02]
 2242         strh    r3, [r0, #0x06]
 2243         RET
 2244         LMEMCPY_8_PAD
 2245 
 2246 /*
 2247  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2248  */
 2249         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 2250         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2251         ldrb    ip, [r1]                /* ip = ...0 */
 2252         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 2253         strh    r1, [r0, #0x06]
 2254 #ifdef __ARMEB__
 2255         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2256         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
 2257         mov     r2, r2, lsr #24         /* r2 = ...1 */
 2258         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
 2259 #else
 2260         mov     r3, r3, lsl #24         /* r3 = 5... */
 2261         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 2262         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 2263 #endif
 2264         str     r3, [r0, #0x02]
 2265         strh    r2, [r0]
 2266         RET
 2267         LMEMCPY_8_PAD
 2268 
 2269 /*
 2270  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2271  */
 2272         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2273         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2274         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 2275         strh    r1, [r0, #0x05]
 2276 #ifdef __ARMEB__
 2277         strb    r3, [r0, #0x07]
 2278         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2279         strb    r1, [r0]
 2280         mov     r2, r2, lsl #8          /* r2 = 123. */
 2281         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
 2282         str     r2, [r0, #0x01]
 2283 #else
 2284         strb    r2, [r0]
 2285         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2286         strb    r1, [r0, #0x07]
 2287         mov     r2, r2, lsr #8          /* r2 = .321 */
 2288         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 2289         str     r2, [r0, #0x01]
 2290 #endif
 2291         RET
 2292         LMEMCPY_8_PAD
 2293 
 2294 /*
 2295  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2296  */
 2297         ldrb    r3, [r1]                /* r3 = ...0 */
 2298         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 2299         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2300         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2301         strb    r3, [r0]
 2302         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 2303 #ifdef __ARMEB__
 2304         strh    ip, [r0, #0x05]
 2305         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
 2306 #else
 2307         strh    r3, [r0, #0x05]
 2308         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 2309 #endif
 2310         str     r2, [r0, #0x01]
 2311         strb    r1, [r0, #0x07]
 2312         RET
 2313         LMEMCPY_8_PAD
 2314 
 2315 /*
 2316  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2317  */
 2318         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2319         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2320         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2321 #ifdef __ARMEB__
 2322         mov     ip, r2, lsr #8          /* ip = ...0 */
 2323         strb    ip, [r0]
 2324         mov     ip, r2, lsl #24         /* ip = 1... */
 2325         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
 2326         strb    r1, [r0, #0x07]
 2327         mov     r1, r1, lsr #8          /* r1 = ...6 */
 2328         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
 2329 #else
 2330         strb    r2, [r0]
 2331         mov     ip, r2, lsr #8          /* ip = ...1 */
 2332         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2333         mov     r2, r1, lsr #8          /* r2 = ...7 */
 2334         strb    r2, [r0, #0x07]
 2335         mov     r1, r1, lsl #8          /* r1 = .76. */
 2336         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 2337 #endif
 2338         str     ip, [r0, #0x01]
 2339         strh    r1, [r0, #0x05]
 2340         RET
 2341         LMEMCPY_8_PAD
 2342 
 2343 /*
 2344  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2345  */
 2346         ldrb    r2, [r1]
 2347         ldr     ip, [r1, #0x01]
 2348         ldrh    r3, [r1, #0x05]
 2349         ldrb    r1, [r1, #0x07]
 2350         strb    r2, [r0]
 2351         str     ip, [r0, #0x01]
 2352         strh    r3, [r0, #0x05]
 2353         strb    r1, [r0, #0x07]
 2354         RET
 2355         LMEMCPY_8_PAD
 2356 
 2357 /******************************************************************************
 2358  * Special case for 12 byte copies
 2359  */
 2360 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 2361 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 2362         LMEMCPY_C_PAD
 2363 .Lmemcpy_c:
 2364         and     r2, r1, #0x03
 2365         orr     r2, r2, r0, lsl #2
 2366         ands    r2, r2, #0x0f
 2367         sub     r3, pc, #0x14
 2368         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 2369 
 2370 /*
 2371  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2372  */
 2373         ldr     r2, [r1]
 2374         ldr     r3, [r1, #0x04]
 2375         ldr     r1, [r1, #0x08]
 2376         str     r2, [r0]
 2377         str     r3, [r0, #0x04]
 2378         str     r1, [r0, #0x08]
 2379         RET
 2380         LMEMCPY_C_PAD
 2381 
 2382 /*
 2383  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2384  */
 2385         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 2386         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2387         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2388         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2389 #ifdef __ARMEB__
 2390         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
 2391         str     r2, [r0, #0x08]
 2392         mov     r2, ip, lsr #24         /* r2 = ...7 */
 2393         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
 2394         mov     r1, r1, lsl #8          /* r1 = 012. */
 2395         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
 2396 #else
 2397         mov     r2, r2, lsl #24         /* r2 = B... */
 2398         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 2399         str     r2, [r0, #0x08]
 2400         mov     r2, ip, lsl #24         /* r2 = 7... */
 2401         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 2402         mov     r1, r1, lsr #8          /* r1 = .210 */
 2403         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 2404 #endif
 2405         str     r2, [r0, #0x04]
 2406         str     r1, [r0]
 2407         RET
 2408         LMEMCPY_C_PAD
 2409 
 2410 /*
 2411  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2412  */
 2413         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2414         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2415         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2416         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2417 #ifdef __ARMEB__
 2418         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2419         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2420         str     r2, [r0]
 2421         mov     r3, r3, lsl #16         /* r3 = 45.. */
 2422         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
 2423         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
 2424 #else
 2425         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2426         str     r2, [r0]
 2427         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2428         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 2429         mov     r1, r1, lsl #16         /* r1 = BA.. */
 2430         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 2431 #endif
 2432         str     r3, [r0, #0x04]
 2433         str     r1, [r0, #0x08]
 2434         RET
 2435         LMEMCPY_C_PAD
 2436 
 2437 /*
 2438  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2439  */
 2440         ldrb    r2, [r1]                /* r2 = ...0 */
 2441         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2442         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2443         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2444 #ifdef __ARMEB__
 2445         mov     r2, r2, lsl #24         /* r2 = 0... */
 2446         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 2447         str     r2, [r0]
 2448         mov     r3, r3, lsl #24         /* r3 = 4... */
 2449         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
 2450         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2451         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
 2452 #else
 2453         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 2454         str     r2, [r0]
 2455         mov     r3, r3, lsr #24         /* r3 = ...4 */
 2456         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 2457         mov     r1, r1, lsl #8          /* r1 = BA9. */
 2458         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 2459 #endif
 2460         str     r3, [r0, #0x04]
 2461         str     r1, [r0, #0x08]
 2462         RET
 2463         LMEMCPY_C_PAD
 2464 
 2465 /*
 2466  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 2467  */
 2468         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2469         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2470         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 2471         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 2472         strh    r1, [r0, #0x01]
 2473 #ifdef __ARMEB__
 2474         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2475         strb    r1, [r0]
 2476         mov     r1, r2, lsl #24         /* r1 = 3... */
 2477         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
 2478         mov     r1, r3, lsl #24         /* r1 = 7... */
 2479         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
 2480 #else
 2481         strb    r2, [r0]
 2482         mov     r1, r2, lsr #24         /* r1 = ...3 */
 2483         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 2484         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2485         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 2486         mov     ip, ip, lsr #24         /* ip = ...B */
 2487 #endif
 2488         str     r2, [r0, #0x03]
 2489         str     r1, [r0, #0x07]
 2490         strb    ip, [r0, #0x0b]
 2491         RET
 2492         LMEMCPY_C_PAD
 2493 
 2494 /*
 2495  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 2496  */
 2497         ldrb    r2, [r1]
 2498         ldrh    r3, [r1, #0x01]
 2499         ldr     ip, [r1, #0x03]
 2500         strb    r2, [r0]
 2501         ldr     r2, [r1, #0x07]
 2502         ldrb    r1, [r1, #0x0b]
 2503         strh    r3, [r0, #0x01]
 2504         str     ip, [r0, #0x03]
 2505         str     r2, [r0, #0x07]
 2506         strb    r1, [r0, #0x0b]
 2507         RET
 2508         LMEMCPY_C_PAD
 2509 
 2510 /*
 2511  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 2512  */
 2513         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2514         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2515         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2516         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2517 #ifdef __ARMEB__
 2518         mov     r2, r2, ror #8          /* r2 = 1..0 */
 2519         strb    r2, [r0]
 2520         mov     r2, r2, lsr #16         /* r2 = ..1. */
 2521         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
 2522         strh    r2, [r0, #0x01]
 2523         mov     r2, r3, lsl #8          /* r2 = 345. */
 2524         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
 2525         mov     r2, ip, lsl #8          /* r2 = 789. */
 2526         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
 2527 #else
 2528         strb    r2, [r0]
 2529         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2530         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2531         strh    r2, [r0, #0x01]
 2532         mov     r2, r3, lsr #8          /* r2 = .543 */
 2533         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 2534         mov     r2, ip, lsr #8          /* r2 = .987 */
 2535         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 2536         mov     r1, r1, lsr #8          /* r1 = ...B */
 2537 #endif
 2538         str     r3, [r0, #0x03]
 2539         str     r2, [r0, #0x07]
 2540         strb    r1, [r0, #0x0b]
 2541         RET
 2542         LMEMCPY_C_PAD
 2543 
 2544 /*
 2545  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 2546  */
 2547         ldrb    r2, [r1]
 2548         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2549         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2550         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2551         strb    r2, [r0]
 2552 #ifdef __ARMEB__
 2553         mov     r2, r3, lsr #16         /* r2 = ..12 */
 2554         strh    r2, [r0, #0x01]
 2555         mov     r3, r3, lsl #16         /* r3 = 34.. */
 2556         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
 2557         mov     ip, ip, lsl #16         /* ip = 78.. */
 2558         orr     ip, ip, r1, lsr #16     /* ip = 789A */
 2559         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2560 #else
 2561         strh    r3, [r0, #0x01]
 2562         mov     r3, r3, lsr #16         /* r3 = ..43 */
 2563         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 2564         mov     ip, ip, lsr #16         /* ip = ..87 */
 2565         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 2566         mov     r1, r1, lsr #16         /* r1 = ..xB */
 2567 #endif
 2568         str     r3, [r0, #0x03]
 2569         str     ip, [r0, #0x07]
 2570         strb    r1, [r0, #0x0b]
 2571         RET
 2572         LMEMCPY_C_PAD
 2573 
 2574 /*
 2575  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2576  */
 2577         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 2578         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2579         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 2580         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2581 #ifdef __ARMEB__
 2582         strh    r1, [r0]
 2583         mov     r1, ip, lsl #16         /* r1 = 23.. */
 2584         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
 2585         mov     r3, r3, lsl #16         /* r3 = 67.. */
 2586         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
 2587 #else
 2588         strh    ip, [r0]
 2589         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 2590         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2591         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 2592         mov     r2, r2, lsr #16         /* r2 = ..BA */
 2593 #endif
 2594         str     r1, [r0, #0x02]
 2595         str     r3, [r0, #0x06]
 2596         strh    r2, [r0, #0x0a]
 2597         RET
 2598         LMEMCPY_C_PAD
 2599 
 2600 /*
 2601  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 2602  */
 2603         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2604         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2605         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 2606         strh    ip, [r0]
 2607         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2608         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 2609 #ifdef __ARMEB__
 2610         mov     r2, r2, lsl #24         /* r2 = 2... */
 2611         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
 2612         mov     r3, r3, lsl #24         /* r3 = 6... */
 2613         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
 2614         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
 2615 #else
 2616         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2617         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2618         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2619         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2620         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2621         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2622 #endif
 2623         str     r2, [r0, #0x02]
 2624         str     r3, [r0, #0x06]
 2625         strh    r1, [r0, #0x0a]
 2626         RET
 2627         LMEMCPY_C_PAD
 2628 
 2629 /*
 2630  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2631  */
 2632         ldrh    r2, [r1]
 2633         ldr     r3, [r1, #0x02]
 2634         ldr     ip, [r1, #0x06]
 2635         ldrh    r1, [r1, #0x0a]
 2636         strh    r2, [r0]
 2637         str     r3, [r0, #0x02]
 2638         str     ip, [r0, #0x06]
 2639         strh    r1, [r0, #0x0a]
 2640         RET
 2641         LMEMCPY_C_PAD
 2642 
 2643 /*
 2644  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2645  */
 2646         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2647         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2648         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2649         strh    ip, [r0, #0x0a]
 2650         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2651         ldrb    r1, [r1]                /* r1 = ...0 */
 2652 #ifdef __ARMEB__
 2653         mov     r2, r2, lsr #24         /* r2 = ...9 */
 2654         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
 2655         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2656         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
 2657         mov     r1, r1, lsl #8          /* r1 = ..0. */
 2658         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
 2659 #else
 2660         mov     r2, r2, lsl #24         /* r2 = 9... */
 2661         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2662         mov     r3, r3, lsl #24         /* r3 = 5... */
 2663         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2664         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2665 #endif
 2666         str     r2, [r0, #0x06]
 2667         str     r3, [r0, #0x02]
 2668         strh    r1, [r0]
 2669         RET
 2670         LMEMCPY_C_PAD
 2671 
 2672 /*
 2673  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2674  */
 2675         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2676         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2677         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2678 #ifdef __ARMEB__
 2679         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2680         strb    r3, [r0]
 2681         mov     r2, r2, lsl #8          /* r2 = 123. */
 2682         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
 2683         str     r2, [r0, #0x01]
 2684         mov     r2, ip, lsl #8          /* r2 = 567. */
 2685         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
 2686         str     r2, [r0, #0x05]
 2687         mov     r2, r1, lsr #8          /* r2 = ..9A */
 2688         strh    r2, [r0, #0x09]
 2689         strb    r1, [r0, #0x0b]
 2690 #else
 2691         strb    r2, [r0]
 2692         mov     r3, r2, lsr #8          /* r3 = .321 */
 2693         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2694         str     r3, [r0, #0x01]
 2695         mov     r3, ip, lsr #8          /* r3 = .765 */
 2696         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2697         str     r3, [r0, #0x05]
 2698         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2699         strh    r1, [r0, #0x09]
 2700         mov     r1, r1, lsr #16         /* r1 = ...B */
 2701         strb    r1, [r0, #0x0b]
 2702 #endif
 2703         RET
 2704         LMEMCPY_C_PAD
 2705 
 2706 /*
 2707  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2708  */
 2709         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2710         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2711         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2712         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2713         strb    r2, [r0, #0x0b]
 2714 #ifdef __ARMEB__
 2715         strh    r3, [r0, #0x09]
 2716         mov     r3, r3, lsr #16         /* r3 = ..78 */
 2717         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
 2718         mov     ip, ip, lsr #16         /* ip = ..34 */
 2719         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
 2720         mov     r1, r1, lsr #16         /* r1 = ..x0 */
 2721 #else
 2722         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2723         strh    r2, [r0, #0x09]
 2724         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2725         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2726         mov     ip, ip, lsl #16         /* ip = 43.. */
 2727         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2728         mov     r1, r1, lsr #8          /* r1 = .210 */
 2729 #endif
 2730         str     r3, [r0, #0x05]
 2731         str     ip, [r0, #0x01]
 2732         strb    r1, [r0]
 2733         RET
 2734         LMEMCPY_C_PAD
 2735 
 2736 /*
 2737  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2738  */
 2739 #ifdef __ARMEB__
 2740         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
 2741         ldr     ip, [r1, #0x06]         /* ip = 6789 */
 2742         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
 2743         ldrh    r1, [r1]                /* r1 = ..01 */
 2744         strb    r2, [r0, #0x0b]
 2745         mov     r2, r2, lsr #8          /* r2 = ...A */
 2746         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
 2747         mov     ip, ip, lsr #8          /* ip = .678 */
 2748         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
 2749         mov     r3, r3, lsr #8          /* r3 = .234 */
 2750         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
 2751         mov     r1, r1, lsr #8          /* r1 = ...0 */
 2752         strb    r1, [r0]
 2753         str     r3, [r0, #0x01]
 2754         str     ip, [r0, #0x05]
 2755         strh    r2, [r0, #0x09]
 2756 #else
 2757         ldrh    r2, [r1]                /* r2 = ..10 */
 2758         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2759         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2760         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2761         strb    r2, [r0]
 2762         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2763         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2764         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2765         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2766         mov     ip, ip, lsr #24         /* ip = ...9 */
 2767         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2768         mov     r1, r1, lsr #8          /* r1 = ...B */
 2769         str     r2, [r0, #0x01]
 2770         str     r3, [r0, #0x05]
 2771         strh    ip, [r0, #0x09]
 2772         strb    r1, [r0, #0x0b]
 2773 #endif
 2774         RET
 2775         LMEMCPY_C_PAD
 2776 
 2777 /*
 2778  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2779  */
 2780         ldrb    r2, [r1]
 2781         ldr     r3, [r1, #0x01]
 2782         ldr     ip, [r1, #0x05]
 2783         strb    r2, [r0]
 2784         ldrh    r2, [r1, #0x09]
 2785         ldrb    r1, [r1, #0x0b]
 2786         str     r3, [r0, #0x01]
 2787         str     ip, [r0, #0x05]
 2788         strh    r2, [r0, #0x09]
 2789         strb    r1, [r0, #0x0b]
 2790         RET
 2791 #endif /* __XSCALE__ */
 2792 
 2793 #ifdef GPROF
 2794 
 2795 ENTRY(user)
 2796         nop
 2797 ENTRY(btrap)
 2798         nop
 2799 ENTRY(etrap)
 2800         nop
 2801 ENTRY(bintr)
 2802         nop
 2803 ENTRY(eintr)
 2804         nop
 2805 
 2806 #endif
Cache object: aa8c4c33c0105536bf8323fa371062bf
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S