support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  *
   76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   86  * POSSIBILITY OF SUCH DAMAGE.
   87  */
   88 
   89 #include <machine/asm.h>
   90 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/support.S 275767 2014-12-14 16:28:53Z andrew $");
   91 
   92 #include "assym.s"
   93 
   94         .syntax unified
   95 
   96 .L_arm_memcpy:
   97         .word   _C_LABEL(_arm_memcpy)
   98 .L_arm_bzero:
   99         .word   _C_LABEL(_arm_bzero)
  100 .L_min_memcpy_size:
  101         .word   _C_LABEL(_min_memcpy_size)
  102 .L_min_bzero_size:
  103         .word   _C_LABEL(_min_bzero_size)
  104 /*
  105  * memset: Sets a block of memory to the specified value
  106  *
  107  * On entry:
  108  *   r0 - dest address
  109  *   r1 - byte to write
  110  *   r2 - number of bytes to write
  111  *
  112  * On exit:
  113  *   r0 - dest address
  114  */
  115 /* LINTSTUB: Func: void bzero(void *, size_t) */
  116 ENTRY(bzero)
  117         ldr     r3, .L_arm_bzero
  118         ldr     r3, [r3]
  119         cmp     r3, #0
  120         beq     .Lnormal0
  121         ldr     r2, .L_min_bzero_size
  122         ldr     r2, [r2]
  123         cmp     r1, r2
  124         blt     .Lnormal0
  125         stmfd   sp!, {r0, r1, lr}
  126         mov     r2, #0
  127         mov     lr, pc
  128         mov     pc, r3
  129         cmp     r0, #0
  130         ldmfd   sp!, {r0, r1, lr}
  131         RETeq
  132 .Lnormal0:
  133         mov     r3, #0x00
  134         b       do_memset
  135 END(bzero)
  136 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  137 ENTRY(memset)
  138         and     r3, r1, #0xff           /* We deal with bytes */
  139         mov     r1, r2
  140 do_memset:
  141         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  142         mov     ip, r0
  143         blt     .Lmemset_lessthanfour
  144 
  145         /* Ok first we will word align the address */
  146         ands    r2, ip, #0x03           /* Get the bottom two bits */
  147         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  148 
  149         /* We are now word aligned */
  150 .Lmemset_wordaligned:
  151         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  152 #ifdef _ARM_ARCH_5E
  153         tst     ip, #0x04               /* Quad-align for armv5e */
  154 #else
  155         cmp     r1, #0x10
  156 #endif
  157         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  158 #ifdef _ARM_ARCH_5E
  159         subne   r1, r1, #0x04           /* Quad-align if necessary */
  160         strne   r3, [ip], #0x04
  161         cmp     r1, #0x10
  162 #endif
  163         blt     .Lmemset_loop4          /* If less than 16 then use words */
  164         mov     r2, r3                  /* Duplicate data */
  165         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  166         blt     .Lmemset_loop32
  167 
  168         /* Do 128 bytes at a time */
  169 .Lmemset_loop128:
  170         subs    r1, r1, #0x80
  171 #ifdef _ARM_ARCH_5E
  172         strdge  r2, [ip], #0x08
  173         strdge  r2, [ip], #0x08
  174         strdge  r2, [ip], #0x08
  175         strdge  r2, [ip], #0x08
  176         strdge  r2, [ip], #0x08
  177         strdge  r2, [ip], #0x08
  178         strdge  r2, [ip], #0x08
  179         strdge  r2, [ip], #0x08
  180         strdge  r2, [ip], #0x08
  181         strdge  r2, [ip], #0x08
  182         strdge  r2, [ip], #0x08
  183         strdge  r2, [ip], #0x08
  184         strdge  r2, [ip], #0x08
  185         strdge  r2, [ip], #0x08
  186         strdge  r2, [ip], #0x08
  187         strdge  r2, [ip], #0x08
  188 #else
  189         stmiage ip!, {r2-r3}
  190         stmiage ip!, {r2-r3}
  191         stmiage ip!, {r2-r3}
  192         stmiage ip!, {r2-r3}
  193         stmiage ip!, {r2-r3}
  194         stmiage ip!, {r2-r3}
  195         stmiage ip!, {r2-r3}
  196         stmiage ip!, {r2-r3}
  197         stmiage ip!, {r2-r3}
  198         stmiage ip!, {r2-r3}
  199         stmiage ip!, {r2-r3}
  200         stmiage ip!, {r2-r3}
  201         stmiage ip!, {r2-r3}
  202         stmiage ip!, {r2-r3}
  203         stmiage ip!, {r2-r3}
  204         stmiage ip!, {r2-r3}
  205 #endif
  206         bgt     .Lmemset_loop128
  207         RETeq                   /* Zero length so just exit */
  208 
  209         add     r1, r1, #0x80           /* Adjust for extra sub */
  210 
  211         /* Do 32 bytes at a time */
  212 .Lmemset_loop32:
  213         subs    r1, r1, #0x20
  214 #ifdef _ARM_ARCH_5E
  215         strdge  r2, [ip], #0x08
  216         strdge  r2, [ip], #0x08
  217         strdge  r2, [ip], #0x08
  218         strdge  r2, [ip], #0x08
  219 #else
  220         stmiage ip!, {r2-r3}
  221         stmiage ip!, {r2-r3}
  222         stmiage ip!, {r2-r3}
  223         stmiage ip!, {r2-r3}
  224 #endif
  225         bgt     .Lmemset_loop32
  226         RETeq                   /* Zero length so just exit */
  227 
  228         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  229 
  230         /* Deal with 16 bytes or more */
  231 #ifdef _ARM_ARCH_5E
  232         strdge  r2, [ip], #0x08
  233         strdge  r2, [ip], #0x08
  234 #else
  235         stmiage ip!, {r2-r3}
  236         stmiage ip!, {r2-r3}
  237 #endif
  238         RETeq                   /* Zero length so just exit */
  239 
  240         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  241 
  242         /* We have at least 4 bytes so copy as words */
  243 .Lmemset_loop4:
  244         subs    r1, r1, #0x04
  245         strge   r3, [ip], #0x04
  246         bgt     .Lmemset_loop4
  247         RETeq                   /* Zero length so just exit */
  248 
  249 #ifdef _ARM_ARCH_5E
  250         /* Compensate for 64-bit alignment check */
  251         adds    r1, r1, #0x04
  252         RETeq
  253         cmp     r1, #2
  254 #else
  255         cmp     r1, #-2
  256 #endif
  257 
  258         strb    r3, [ip], #0x01         /* Set 1 byte */
  259         strbge  r3, [ip], #0x01         /* Set another byte */
  260         strbgt  r3, [ip]                /* and a third */
  261         RET                     /* Exit */
  262 
  263 .Lmemset_wordunaligned:
  264         rsb     r2, r2, #0x004
  265         strb    r3, [ip], #0x01         /* Set 1 byte */
  266         cmp     r2, #0x02
  267         strbge  r3, [ip], #0x01         /* Set another byte */
  268         sub     r1, r1, r2
  269         strbgt  r3, [ip], #0x01         /* and a third */
  270         cmp     r1, #0x04               /* More than 4 bytes left? */
  271         bge     .Lmemset_wordaligned    /* Yup */
  272 
  273 .Lmemset_lessthanfour:
  274         cmp     r1, #0x00
  275         RETeq                   /* Zero length so exit */
  276         strb    r3, [ip], #0x01         /* Set 1 byte */
  277         cmp     r1, #0x02
  278         strbge  r3, [ip], #0x01         /* Set another byte */
  279         strbgt  r3, [ip]                /* and a third */
  280         RET                     /* Exit */
  281 EEND(memset)
  282 END(bzero)
  283 
  284 ENTRY(bcmp)
  285         mov     ip, r0
  286         cmp     r2, #0x06
  287         beq     .Lmemcmp_6bytes
  288         mov     r0, #0x00
  289 
  290         /* Are both addresses aligned the same way? */
  291         cmp     r2, #0x00
  292         eorsne  r3, ip, r1
  293         RETeq                   /* len == 0, or same addresses! */
  294         tst     r3, #0x03
  295         subne   r2, r2, #0x01
  296         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  297 
  298         /* Word-align the addresses, if necessary */
  299         sub     r3, r1, #0x05
  300         ands    r3, r3, #0x03
  301         add     r3, r3, r3, lsl #1
  302         addne   pc, pc, r3, lsl #3
  303         nop
  304 
  305         /* Compare up to 3 bytes */
  306         ldrb    r0, [ip], #0x01
  307         ldrb    r3, [r1], #0x01
  308         subs    r0, r0, r3
  309         RETne
  310         subs    r2, r2, #0x01
  311         RETeq
  312 
  313         /* Compare up to 2 bytes */
  314         ldrb    r0, [ip], #0x01
  315         ldrb    r3, [r1], #0x01
  316         subs    r0, r0, r3
  317         RETne
  318         subs    r2, r2, #0x01
  319         RETeq
  320 
  321         /* Compare 1 byte */
  322         ldrb    r0, [ip], #0x01
  323         ldrb    r3, [r1], #0x01
  324         subs    r0, r0, r3
  325         RETne
  326         subs    r2, r2, #0x01
  327         RETeq
  328 
  329         /* Compare 4 bytes at a time, if possible */
  330         subs    r2, r2, #0x04
  331         bcc     .Lmemcmp_bytewise
  332 .Lmemcmp_word_aligned:
  333         ldr     r0, [ip], #0x04
  334         ldr     r3, [r1], #0x04
  335         subs    r2, r2, #0x04
  336         cmpcs   r0, r3
  337         beq     .Lmemcmp_word_aligned
  338         sub     r0, r0, r3
  339 
  340         /* Correct for extra subtraction, and check if done */
  341         adds    r2, r2, #0x04
  342         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  343         RETeq                   /* Yup. Just return */
  344 
  345         /* Re-do the final word byte-wise */
  346         sub     ip, ip, #0x04
  347         sub     r1, r1, #0x04
  348 
  349 .Lmemcmp_bytewise:
  350         add     r2, r2, #0x03
  351 .Lmemcmp_bytewise2:
  352         ldrb    r0, [ip], #0x01
  353         ldrb    r3, [r1], #0x01
  354         subs    r2, r2, #0x01
  355         cmpcs   r0, r3
  356         beq     .Lmemcmp_bytewise2
  357         sub     r0, r0, r3
  358         RET
  359 
  360         /*
  361          * 6 byte compares are very common, thanks to the network stack.
  362          * This code is hand-scheduled to reduce the number of stalls for
  363          * load results. Everything else being equal, this will be ~32%
  364          * faster than a byte-wise memcmp.
  365          */
  366         .align  5
  367 .Lmemcmp_6bytes:
  368         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  369         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  370         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  371         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  372         ldrbeq  r3, [ip, #0x01]         /* r3 = b1#1 */
  373         RETne                   /* Return if mismatch on #0 */
  374         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  375         ldrbeq  r3, [r1, #0x02]         /* r3 = b2#2 */
  376         ldrbeq  r0, [ip, #0x02]         /* r0 = b1#2 */
  377         RETne                   /* Return if mismatch on #1 */
  378         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  379         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  380         ldrbeq  r3, [ip, #0x03]         /* r3 = b1#3 */
  381         RETne                   /* Return if mismatch on #2 */
  382         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  383         ldrbeq  r3, [r1, #0x04]         /* r3 = b2#4 */
  384         ldrbeq  r0, [ip, #0x04]         /* r0 = b1#4 */
  385         RETne                   /* Return if mismatch on #3 */
  386         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  387         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  388         ldrbeq  r3, [ip, #0x05]         /* r3 = b1#5 */
  389         RETne                   /* Return if mismatch on #4 */
  390         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  391         RET
  392 END(bcmp)
  393 
  394 ENTRY(bcopy)
  395         /* switch the source and destination registers */
  396         eor     r0, r1, r0
  397         eor     r1, r0, r1
  398         eor     r0, r1, r0
  399 EENTRY(memmove)
  400         /* Do the buffers overlap? */
  401         cmp     r0, r1
  402         RETeq           /* Bail now if src/dst are the same */
  403         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  404         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  405         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  406         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  407 
  408         /* Determine copy direction */
  409         cmp     r1, r0
  410         bcc     .Lmemmove_backwards
  411 
  412         moveq   r0, #0                  /* Quick abort for len=0 */
  413         RETeq
  414 
  415         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  416         subs    r2, r2, #4
  417         blt     .Lmemmove_fl4           /* less than 4 bytes */
  418         ands    r12, r0, #3
  419         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  420         ands    r12, r1, #3
  421         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  422 
  423 .Lmemmove_ft8:
  424         /* We have aligned source and destination */
  425         subs    r2, r2, #8
  426         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  427         subs    r2, r2, #0x14
  428         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  429         stmdb   sp!, {r4}               /* borrow r4 */
  430 
  431         /* blat 32 bytes at a time */
  432         /* XXX for really big copies perhaps we should use more registers */
  433 .Lmemmove_floop32:      
  434         ldmia   r1!, {r3, r4, r12, lr}
  435         stmia   r0!, {r3, r4, r12, lr}
  436         ldmia   r1!, {r3, r4, r12, lr}
  437         stmia   r0!, {r3, r4, r12, lr}
  438         subs    r2, r2, #0x20
  439         bge     .Lmemmove_floop32
  440 
  441         cmn     r2, #0x10
  442         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  443         stmiage r0!, {r3, r4, r12, lr}
  444         subge   r2, r2, #0x10
  445         ldmia   sp!, {r4}               /* return r4 */
  446 
  447 .Lmemmove_fl32:
  448         adds    r2, r2, #0x14
  449 
  450         /* blat 12 bytes at a time */
  451 .Lmemmove_floop12:
  452         ldmiage r1!, {r3, r12, lr}
  453         stmiage r0!, {r3, r12, lr}
  454         subsge  r2, r2, #0x0c
  455         bge     .Lmemmove_floop12
  456 
  457 .Lmemmove_fl12:
  458         adds    r2, r2, #8
  459         blt     .Lmemmove_fl4
  460 
  461         subs    r2, r2, #4
  462         ldrlt   r3, [r1], #4
  463         strlt   r3, [r0], #4
  464         ldmiage r1!, {r3, r12}
  465         stmiage r0!, {r3, r12}
  466         subge   r2, r2, #4
  467 
  468 .Lmemmove_fl4:
  469         /* less than 4 bytes to go */
  470         adds    r2, r2, #4
  471         ldmiaeq sp!, {r0, pc}           /* done */
  472 
  473         /* copy the crud byte at a time */
  474         cmp     r2, #2
  475         ldrb    r3, [r1], #1
  476         strb    r3, [r0], #1
  477         ldrbge  r3, [r1], #1
  478         strbge  r3, [r0], #1
  479         ldrbgt  r3, [r1], #1
  480         strbgt  r3, [r0], #1
  481         ldmia   sp!, {r0, pc}
  482 
  483         /* erg - unaligned destination */
  484 .Lmemmove_fdestul:
  485         rsb     r12, r12, #4
  486         cmp     r12, #2
  487 
  488         /* align destination with byte copies */
  489         ldrb    r3, [r1], #1
  490         strb    r3, [r0], #1
  491         ldrbge  r3, [r1], #1
  492         strbge  r3, [r0], #1
  493         ldrbgt  r3, [r1], #1
  494         strbgt  r3, [r0], #1
  495         subs    r2, r2, r12
  496         blt     .Lmemmove_fl4           /* less the 4 bytes */
  497 
  498         ands    r12, r1, #3
  499         beq     .Lmemmove_ft8           /* we have an aligned source */
  500 
  501         /* erg - unaligned source */
  502         /* This is where it gets nasty ... */
  503 .Lmemmove_fsrcul:
  504         bic     r1, r1, #3
  505         ldr     lr, [r1], #4
  506         cmp     r12, #2
  507         bgt     .Lmemmove_fsrcul3
  508         beq     .Lmemmove_fsrcul2
  509         cmp     r2, #0x0c
  510         blt     .Lmemmove_fsrcul1loop4
  511         sub     r2, r2, #0x0c
  512         stmdb   sp!, {r4, r5}
  513 
  514 .Lmemmove_fsrcul1loop16:
  515 #ifdef __ARMEB__
  516         mov     r3, lr, lsl #8
  517 #else
  518         mov     r3, lr, lsr #8
  519 #endif
  520         ldmia   r1!, {r4, r5, r12, lr}
  521 #ifdef __ARMEB__
  522         orr     r3, r3, r4, lsr #24
  523         mov     r4, r4, lsl #8
  524         orr     r4, r4, r5, lsr #24
  525         mov     r5, r5, lsl #8
  526         orr     r5, r5, r12, lsr #24
  527         mov     r12, r12, lsl #8
  528         orr     r12, r12, lr, lsr #24
  529 #else
  530         orr     r3, r3, r4, lsl #24
  531         mov     r4, r4, lsr #8
  532         orr     r4, r4, r5, lsl #24
  533         mov     r5, r5, lsr #8
  534         orr     r5, r5, r12, lsl #24
  535         mov     r12, r12, lsr #8
  536         orr     r12, r12, lr, lsl #24
  537 #endif
  538         stmia   r0!, {r3-r5, r12}
  539         subs    r2, r2, #0x10
  540         bge     .Lmemmove_fsrcul1loop16
  541         ldmia   sp!, {r4, r5}
  542         adds    r2, r2, #0x0c
  543         blt     .Lmemmove_fsrcul1l4
  544 
  545 .Lmemmove_fsrcul1loop4:
  546 #ifdef __ARMEB__
  547         mov     r12, lr, lsl #8
  548 #else
  549         mov     r12, lr, lsr #8
  550 #endif
  551         ldr     lr, [r1], #4
  552 #ifdef __ARMEB__
  553         orr     r12, r12, lr, lsr #24
  554 #else
  555         orr     r12, r12, lr, lsl #24
  556 #endif
  557         str     r12, [r0], #4
  558         subs    r2, r2, #4
  559         bge     .Lmemmove_fsrcul1loop4
  560 
  561 .Lmemmove_fsrcul1l4:
  562         sub     r1, r1, #3
  563         b       .Lmemmove_fl4
  564 
  565 .Lmemmove_fsrcul2:
  566         cmp     r2, #0x0c
  567         blt     .Lmemmove_fsrcul2loop4
  568         sub     r2, r2, #0x0c
  569         stmdb   sp!, {r4, r5}
  570 
  571 .Lmemmove_fsrcul2loop16:
  572 #ifdef __ARMEB__
  573         mov     r3, lr, lsl #16
  574 #else
  575         mov     r3, lr, lsr #16
  576 #endif
  577         ldmia   r1!, {r4, r5, r12, lr}
  578 #ifdef __ARMEB__
  579         orr     r3, r3, r4, lsr #16
  580         mov     r4, r4, lsl #16
  581         orr     r4, r4, r5, lsr #16
  582         mov     r5, r5, lsl #16
  583         orr     r5, r5, r12, lsr #16
  584         mov     r12, r12, lsl #16
  585         orr     r12, r12, lr, lsr #16
  586 #else
  587         orr     r3, r3, r4, lsl #16
  588         mov     r4, r4, lsr #16
  589         orr     r4, r4, r5, lsl #16
  590         mov     r5, r5, lsr #16
  591         orr     r5, r5, r12, lsl #16
  592         mov     r12, r12, lsr #16
  593         orr     r12, r12, lr, lsl #16
  594 #endif
  595         stmia   r0!, {r3-r5, r12}
  596         subs    r2, r2, #0x10
  597         bge     .Lmemmove_fsrcul2loop16
  598         ldmia   sp!, {r4, r5}
  599         adds    r2, r2, #0x0c
  600         blt     .Lmemmove_fsrcul2l4
  601 
  602 .Lmemmove_fsrcul2loop4:
  603 #ifdef __ARMEB__
  604         mov     r12, lr, lsl #16
  605 #else
  606         mov     r12, lr, lsr #16
  607 #endif
  608         ldr     lr, [r1], #4
  609 #ifdef __ARMEB__
  610         orr     r12, r12, lr, lsr #16
  611 #else
  612         orr     r12, r12, lr, lsl #16
  613 #endif
  614         str     r12, [r0], #4
  615         subs    r2, r2, #4
  616         bge     .Lmemmove_fsrcul2loop4
  617 
  618 .Lmemmove_fsrcul2l4:
  619         sub     r1, r1, #2
  620         b       .Lmemmove_fl4
  621 
  622 .Lmemmove_fsrcul3:
  623         cmp     r2, #0x0c
  624         blt     .Lmemmove_fsrcul3loop4
  625         sub     r2, r2, #0x0c
  626         stmdb   sp!, {r4, r5}
  627 
  628 .Lmemmove_fsrcul3loop16:
  629 #ifdef __ARMEB__
  630         mov     r3, lr, lsl #24
  631 #else
  632         mov     r3, lr, lsr #24
  633 #endif
  634         ldmia   r1!, {r4, r5, r12, lr}
  635 #ifdef __ARMEB__
  636         orr     r3, r3, r4, lsr #8
  637         mov     r4, r4, lsl #24
  638         orr     r4, r4, r5, lsr #8
  639         mov     r5, r5, lsl #24
  640         orr     r5, r5, r12, lsr #8
  641         mov     r12, r12, lsl #24
  642         orr     r12, r12, lr, lsr #8
  643 #else
  644         orr     r3, r3, r4, lsl #8
  645         mov     r4, r4, lsr #24
  646         orr     r4, r4, r5, lsl #8
  647         mov     r5, r5, lsr #24
  648         orr     r5, r5, r12, lsl #8
  649         mov     r12, r12, lsr #24
  650         orr     r12, r12, lr, lsl #8
  651 #endif
  652         stmia   r0!, {r3-r5, r12}
  653         subs    r2, r2, #0x10
  654         bge     .Lmemmove_fsrcul3loop16
  655         ldmia   sp!, {r4, r5}
  656         adds    r2, r2, #0x0c
  657         blt     .Lmemmove_fsrcul3l4
  658 
  659 .Lmemmove_fsrcul3loop4:
  660 #ifdef __ARMEB__
  661         mov     r12, lr, lsl #24
  662 #else
  663         mov     r12, lr, lsr #24
  664 #endif
  665         ldr     lr, [r1], #4
  666 #ifdef __ARMEB__
  667         orr     r12, r12, lr, lsr #8
  668 #else
  669         orr     r12, r12, lr, lsl #8
  670 #endif
  671         str     r12, [r0], #4
  672         subs    r2, r2, #4
  673         bge     .Lmemmove_fsrcul3loop4
  674 
  675 .Lmemmove_fsrcul3l4:
  676         sub     r1, r1, #1
  677         b       .Lmemmove_fl4
  678 
  679 .Lmemmove_backwards:
  680         add     r1, r1, r2
  681         add     r0, r0, r2
  682         subs    r2, r2, #4
  683         blt     .Lmemmove_bl4           /* less than 4 bytes */
  684         ands    r12, r0, #3
  685         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  686         ands    r12, r1, #3
  687         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  688 
  689 .Lmemmove_bt8:
  690         /* We have aligned source and destination */
  691         subs    r2, r2, #8
  692         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  693         stmdb   sp!, {r4, lr}
  694         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  695         blt     .Lmemmove_bl32
  696 
  697         /* blat 32 bytes at a time */
  698         /* XXX for really big copies perhaps we should use more registers */
  699 .Lmemmove_bloop32:
  700         ldmdb   r1!, {r3, r4, r12, lr}
  701         stmdb   r0!, {r3, r4, r12, lr}
  702         ldmdb   r1!, {r3, r4, r12, lr}
  703         stmdb   r0!, {r3, r4, r12, lr}
  704         subs    r2, r2, #0x20
  705         bge     .Lmemmove_bloop32
  706 
  707 .Lmemmove_bl32:
  708         cmn     r2, #0x10
  709         ldmdbge r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  710         stmdbge r0!, {r3, r4, r12, lr}
  711         subge   r2, r2, #0x10
  712         adds    r2, r2, #0x14
  713         ldmdbge r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  714         stmdbge r0!, {r3, r12, lr}
  715         subge   r2, r2, #0x0c
  716         ldmia   sp!, {r4, lr}
  717 
  718 .Lmemmove_bl12:
  719         adds    r2, r2, #8
  720         blt     .Lmemmove_bl4
  721         subs    r2, r2, #4
  722         ldrlt   r3, [r1, #-4]!
  723         strlt   r3, [r0, #-4]!
  724         ldmdbge r1!, {r3, r12}
  725         stmdbge r0!, {r3, r12}
  726         subge   r2, r2, #4
  727 
  728 .Lmemmove_bl4:
  729         /* less than 4 bytes to go */
  730         adds    r2, r2, #4
  731         RETeq                   /* done */
  732 
  733         /* copy the crud byte at a time */
  734         cmp     r2, #2
  735         ldrb    r3, [r1, #-1]!
  736         strb    r3, [r0, #-1]!
  737         ldrbge  r3, [r1, #-1]!
  738         strbge  r3, [r0, #-1]!
  739         ldrbgt  r3, [r1, #-1]!
  740         strbgt  r3, [r0, #-1]!
  741         RET
  742 
  743         /* erg - unaligned destination */
  744 .Lmemmove_bdestul:
  745         cmp     r12, #2
  746 
  747         /* align destination with byte copies */
  748         ldrb    r3, [r1, #-1]!
  749         strb    r3, [r0, #-1]!
  750         ldrbge  r3, [r1, #-1]!
  751         strbge  r3, [r0, #-1]!
  752         ldrbgt  r3, [r1, #-1]!
  753         strbgt  r3, [r0, #-1]!
  754         subs    r2, r2, r12
  755         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  756         ands    r12, r1, #3
  757         beq     .Lmemmove_bt8           /* we have an aligned source */
  758 
  759         /* erg - unaligned source */
  760         /* This is where it gets nasty ... */
  761 .Lmemmove_bsrcul:
  762         bic     r1, r1, #3
  763         ldr     r3, [r1, #0]
  764         cmp     r12, #2
  765         blt     .Lmemmove_bsrcul1
  766         beq     .Lmemmove_bsrcul2
  767         cmp     r2, #0x0c
  768         blt     .Lmemmove_bsrcul3loop4
  769         sub     r2, r2, #0x0c
  770         stmdb   sp!, {r4, r5, lr}
  771 
  772 .Lmemmove_bsrcul3loop16:
  773 #ifdef __ARMEB__
  774         mov     lr, r3, lsr #8
  775 #else
  776         mov     lr, r3, lsl #8
  777 #endif
  778         ldmdb   r1!, {r3-r5, r12}
  779 #ifdef __ARMEB__
  780         orr     lr, lr, r12, lsl #24
  781         mov     r12, r12, lsr #8
  782         orr     r12, r12, r5, lsl #24
  783         mov     r5, r5, lsr #8
  784         orr     r5, r5, r4, lsl #24
  785         mov     r4, r4, lsr #8
  786         orr     r4, r4, r3, lsl #24
  787 #else
  788         orr     lr, lr, r12, lsr #24
  789         mov     r12, r12, lsl #8
  790         orr     r12, r12, r5, lsr #24
  791         mov     r5, r5, lsl #8
  792         orr     r5, r5, r4, lsr #24
  793         mov     r4, r4, lsl #8
  794         orr     r4, r4, r3, lsr #24
  795 #endif
  796         stmdb   r0!, {r4, r5, r12, lr}
  797         subs    r2, r2, #0x10
  798         bge     .Lmemmove_bsrcul3loop16
  799         ldmia   sp!, {r4, r5, lr}
  800         adds    r2, r2, #0x0c
  801         blt     .Lmemmove_bsrcul3l4
  802 
  803 .Lmemmove_bsrcul3loop4:
  804 #ifdef __ARMEB__
  805         mov     r12, r3, lsr #8
  806 #else
  807         mov     r12, r3, lsl #8
  808 #endif
  809         ldr     r3, [r1, #-4]!
  810 #ifdef __ARMEB__
  811         orr     r12, r12, r3, lsl #24
  812 #else
  813         orr     r12, r12, r3, lsr #24
  814 #endif
  815         str     r12, [r0, #-4]!
  816         subs    r2, r2, #4
  817         bge     .Lmemmove_bsrcul3loop4
  818 
  819 .Lmemmove_bsrcul3l4:
  820         add     r1, r1, #3
  821         b       .Lmemmove_bl4
  822 
  823 .Lmemmove_bsrcul2:
  824         cmp     r2, #0x0c
  825         blt     .Lmemmove_bsrcul2loop4
  826         sub     r2, r2, #0x0c
  827         stmdb   sp!, {r4, r5, lr}
  828 
  829 .Lmemmove_bsrcul2loop16:
  830 #ifdef __ARMEB__
  831         mov     lr, r3, lsr #16
  832 #else
  833         mov     lr, r3, lsl #16
  834 #endif
  835         ldmdb   r1!, {r3-r5, r12}
  836 #ifdef __ARMEB__
  837         orr     lr, lr, r12, lsl #16
  838         mov     r12, r12, lsr #16
  839         orr     r12, r12, r5, lsl #16
  840         mov     r5, r5, lsr #16
  841         orr     r5, r5, r4, lsl #16
  842         mov     r4, r4, lsr #16
  843         orr     r4, r4, r3, lsl #16
  844 #else
  845         orr     lr, lr, r12, lsr #16
  846         mov     r12, r12, lsl #16
  847         orr     r12, r12, r5, lsr #16
  848         mov     r5, r5, lsl #16
  849         orr     r5, r5, r4, lsr #16
  850         mov     r4, r4, lsl #16
  851         orr     r4, r4, r3, lsr #16
  852 #endif
  853         stmdb   r0!, {r4, r5, r12, lr}
  854         subs    r2, r2, #0x10
  855         bge     .Lmemmove_bsrcul2loop16
  856         ldmia   sp!, {r4, r5, lr}
  857         adds    r2, r2, #0x0c
  858         blt     .Lmemmove_bsrcul2l4
  859 
  860 .Lmemmove_bsrcul2loop4:
  861 #ifdef __ARMEB__
  862         mov     r12, r3, lsr #16
  863 #else
  864         mov     r12, r3, lsl #16
  865 #endif
  866         ldr     r3, [r1, #-4]!
  867 #ifdef __ARMEB__
  868         orr     r12, r12, r3, lsl #16
  869 #else
  870         orr     r12, r12, r3, lsr #16
  871 #endif
  872         str     r12, [r0, #-4]!
  873         subs    r2, r2, #4
  874         bge     .Lmemmove_bsrcul2loop4
  875 
  876 .Lmemmove_bsrcul2l4:
  877         add     r1, r1, #2
  878         b       .Lmemmove_bl4
  879 
  880 .Lmemmove_bsrcul1:
  881         cmp     r2, #0x0c
  882         blt     .Lmemmove_bsrcul1loop4
  883         sub     r2, r2, #0x0c
  884         stmdb   sp!, {r4, r5, lr}
  885 
  886 .Lmemmove_bsrcul1loop32:
  887 #ifdef __ARMEB__
  888         mov     lr, r3, lsr #24
  889 #else
  890         mov     lr, r3, lsl #24
  891 #endif
  892         ldmdb   r1!, {r3-r5, r12}
  893 #ifdef __ARMEB__
  894         orr     lr, lr, r12, lsl #8
  895         mov     r12, r12, lsr #24
  896         orr     r12, r12, r5, lsl #8
  897         mov     r5, r5, lsr #24
  898         orr     r5, r5, r4, lsl #8
  899         mov     r4, r4, lsr #24
  900         orr     r4, r4, r3, lsl #8
  901 #else
  902         orr     lr, lr, r12, lsr #8
  903         mov     r12, r12, lsl #24
  904         orr     r12, r12, r5, lsr #8
  905         mov     r5, r5, lsl #24
  906         orr     r5, r5, r4, lsr #8
  907         mov     r4, r4, lsl #24
  908         orr     r4, r4, r3, lsr #8
  909 #endif
  910         stmdb   r0!, {r4, r5, r12, lr}
  911         subs    r2, r2, #0x10
  912         bge     .Lmemmove_bsrcul1loop32
  913         ldmia   sp!, {r4, r5, lr}
  914         adds    r2, r2, #0x0c
  915         blt     .Lmemmove_bsrcul1l4
  916 
  917 .Lmemmove_bsrcul1loop4:
  918 #ifdef __ARMEB__
  919         mov     r12, r3, lsr #24
  920 #else
  921         mov     r12, r3, lsl #24
  922 #endif
  923         ldr     r3, [r1, #-4]!
  924 #ifdef __ARMEB__
  925         orr     r12, r12, r3, lsl #8
  926 #else
  927         orr     r12, r12, r3, lsr #8
  928 #endif
  929         str     r12, [r0, #-4]!
  930         subs    r2, r2, #4
  931         bge     .Lmemmove_bsrcul1loop4
  932 
  933 .Lmemmove_bsrcul1l4:
  934         add     r1, r1, #1
  935         b       .Lmemmove_bl4
  936 EEND(memmove)
  937 END(bcopy)
  938 
  939 #if !defined(_ARM_ARCH_5E)
  940 ENTRY(memcpy)
  941         /* save leaf functions having to store this away */
  942         /* Do not check arm_memcpy if we're running from flash */
  943 #if defined(FLASHADDR) && defined(PHYSADDR)
  944 #if FLASHADDR > PHYSADDR
  945         ldr     r3, =FLASHADDR
  946         cmp     r3, pc
  947         bls     .Lnormal
  948 #else
  949         ldr     r3, =FLASHADDR
  950         cmp     r3, pc
  951         bhi     .Lnormal
  952 #endif
  953 #endif
  954         ldr     r3, .L_arm_memcpy
  955         ldr     r3, [r3]
  956         cmp     r3, #0
  957         beq     .Lnormal
  958         ldr     r3, .L_min_memcpy_size
  959         ldr     r3, [r3]
  960         cmp     r2, r3
  961         blt     .Lnormal
  962         stmfd   sp!, {r0-r2, r4, lr}
  963         mov     r3, #0
  964         ldr     r4, .L_arm_memcpy
  965         mov     lr, pc
  966         ldr     pc, [r4]
  967         cmp     r0, #0
  968         ldmfd   sp!, {r0-r2, r4, lr}
  969         RETeq
  970 
  971 .Lnormal:
  972         stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
  973 
  974         subs    r2, r2, #4
  975         blt     .Lmemcpy_l4             /* less than 4 bytes */
  976         ands    r12, r0, #3
  977         bne     .Lmemcpy_destul         /* oh unaligned destination addr */
  978         ands    r12, r1, #3
  979         bne     .Lmemcpy_srcul          /* oh unaligned source addr */
  980 
  981 .Lmemcpy_t8:
  982         /* We have aligned source and destination */
  983         subs    r2, r2, #8
  984         blt     .Lmemcpy_l12            /* less than 12 bytes (4 from above) */
  985         subs    r2, r2, #0x14
  986         blt     .Lmemcpy_l32            /* less than 32 bytes (12 from above) */
  987         stmdb   sp!, {r4}               /* borrow r4 */
  988 
  989         /* blat 32 bytes at a time */
  990         /* XXX for really big copies perhaps we should use more registers */
  991 .Lmemcpy_loop32:        
  992         ldmia   r1!, {r3, r4, r12, lr}
  993         stmia   r0!, {r3, r4, r12, lr}
  994         ldmia   r1!, {r3, r4, r12, lr}
  995         stmia   r0!, {r3, r4, r12, lr}
  996         subs    r2, r2, #0x20
  997         bge     .Lmemcpy_loop32
  998 
  999         cmn     r2, #0x10
 1000         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 1001         stmiage r0!, {r3, r4, r12, lr}
 1002         subge   r2, r2, #0x10
 1003         ldmia   sp!, {r4}               /* return r4 */
 1004 
 1005 .Lmemcpy_l32:
 1006         adds    r2, r2, #0x14
 1007 
 1008         /* blat 12 bytes at a time */
 1009 .Lmemcpy_loop12:
 1010         ldmiage r1!, {r3, r12, lr}
 1011         stmiage r0!, {r3, r12, lr}
 1012         subsge  r2, r2, #0x0c
 1013         bge     .Lmemcpy_loop12
 1014 
 1015 .Lmemcpy_l12:
 1016         adds    r2, r2, #8
 1017         blt     .Lmemcpy_l4
 1018 
 1019         subs    r2, r2, #4
 1020         ldrlt   r3, [r1], #4
 1021         strlt   r3, [r0], #4
 1022         ldmiage r1!, {r3, r12}
 1023         stmiage r0!, {r3, r12}
 1024         subge   r2, r2, #4
 1025 
 1026 .Lmemcpy_l4:
 1027         /* less than 4 bytes to go */
 1028         adds    r2, r2, #4
 1029 #ifdef __APCS_26_
 1030         ldmiaeq sp!, {r0, pc}^          /* done */
 1031 #else
 1032         ldmiaeq sp!, {r0, pc}           /* done */
 1033 #endif
 1034         /* copy the crud byte at a time */
 1035         cmp     r2, #2
 1036         ldrb    r3, [r1], #1
 1037         strb    r3, [r0], #1
 1038         ldrbge  r3, [r1], #1
 1039         strbge  r3, [r0], #1
 1040         ldrbgt  r3, [r1], #1
 1041         strbgt  r3, [r0], #1
 1042         ldmia   sp!, {r0, pc}
 1043 
 1044         /* erg - unaligned destination */
 1045 .Lmemcpy_destul:
 1046         rsb     r12, r12, #4
 1047         cmp     r12, #2
 1048 
 1049         /* align destination with byte copies */
 1050         ldrb    r3, [r1], #1
 1051         strb    r3, [r0], #1
 1052         ldrbge  r3, [r1], #1
 1053         strbge  r3, [r0], #1
 1054         ldrbgt  r3, [r1], #1
 1055         strbgt  r3, [r0], #1
 1056         subs    r2, r2, r12
 1057         blt     .Lmemcpy_l4             /* less the 4 bytes */
 1058 
 1059         ands    r12, r1, #3
 1060         beq     .Lmemcpy_t8             /* we have an aligned source */
 1061 
 1062         /* erg - unaligned source */
 1063         /* This is where it gets nasty ... */
 1064 .Lmemcpy_srcul:
 1065         bic     r1, r1, #3
 1066         ldr     lr, [r1], #4
 1067         cmp     r12, #2
 1068         bgt     .Lmemcpy_srcul3
 1069         beq     .Lmemcpy_srcul2
 1070         cmp     r2, #0x0c
 1071         blt     .Lmemcpy_srcul1loop4
 1072         sub     r2, r2, #0x0c
 1073         stmdb   sp!, {r4, r5}
 1074 
 1075 .Lmemcpy_srcul1loop16:
 1076         mov     r3, lr, lsr #8
 1077         ldmia   r1!, {r4, r5, r12, lr}
 1078         orr     r3, r3, r4, lsl #24
 1079         mov     r4, r4, lsr #8
 1080         orr     r4, r4, r5, lsl #24
 1081         mov     r5, r5, lsr #8
 1082         orr     r5, r5, r12, lsl #24
 1083         mov     r12, r12, lsr #8
 1084         orr     r12, r12, lr, lsl #24
 1085         stmia   r0!, {r3-r5, r12}
 1086         subs    r2, r2, #0x10
 1087         bge     .Lmemcpy_srcul1loop16
 1088         ldmia   sp!, {r4, r5}
 1089         adds    r2, r2, #0x0c
 1090         blt     .Lmemcpy_srcul1l4
 1091 
 1092 .Lmemcpy_srcul1loop4:
 1093         mov     r12, lr, lsr #8
 1094         ldr     lr, [r1], #4
 1095         orr     r12, r12, lr, lsl #24
 1096         str     r12, [r0], #4
 1097         subs    r2, r2, #4
 1098         bge     .Lmemcpy_srcul1loop4
 1099 
 1100 .Lmemcpy_srcul1l4:
 1101         sub     r1, r1, #3
 1102         b       .Lmemcpy_l4
 1103 
 1104 .Lmemcpy_srcul2:
 1105         cmp     r2, #0x0c
 1106         blt     .Lmemcpy_srcul2loop4
 1107         sub     r2, r2, #0x0c
 1108         stmdb   sp!, {r4, r5}
 1109 
 1110 .Lmemcpy_srcul2loop16:
 1111         mov     r3, lr, lsr #16
 1112         ldmia   r1!, {r4, r5, r12, lr}
 1113         orr     r3, r3, r4, lsl #16
 1114         mov     r4, r4, lsr #16
 1115         orr     r4, r4, r5, lsl #16
 1116         mov     r5, r5, lsr #16
 1117         orr     r5, r5, r12, lsl #16
 1118         mov     r12, r12, lsr #16
 1119         orr     r12, r12, lr, lsl #16
 1120         stmia   r0!, {r3-r5, r12}
 1121         subs    r2, r2, #0x10
 1122         bge     .Lmemcpy_srcul2loop16
 1123         ldmia   sp!, {r4, r5}
 1124         adds    r2, r2, #0x0c
 1125         blt     .Lmemcpy_srcul2l4
 1126 
 1127 .Lmemcpy_srcul2loop4:
 1128         mov     r12, lr, lsr #16
 1129         ldr     lr, [r1], #4
 1130         orr     r12, r12, lr, lsl #16
 1131         str     r12, [r0], #4
 1132         subs    r2, r2, #4
 1133         bge     .Lmemcpy_srcul2loop4
 1134 
 1135 .Lmemcpy_srcul2l4:
 1136         sub     r1, r1, #2
 1137         b       .Lmemcpy_l4
 1138 
 1139 .Lmemcpy_srcul3:
 1140         cmp     r2, #0x0c
 1141         blt     .Lmemcpy_srcul3loop4
 1142         sub     r2, r2, #0x0c
 1143         stmdb   sp!, {r4, r5}
 1144 
 1145 .Lmemcpy_srcul3loop16:
 1146         mov     r3, lr, lsr #24
 1147         ldmia   r1!, {r4, r5, r12, lr}
 1148         orr     r3, r3, r4, lsl #8
 1149         mov     r4, r4, lsr #24
 1150         orr     r4, r4, r5, lsl #8
 1151         mov     r5, r5, lsr #24
 1152         orr     r5, r5, r12, lsl #8
 1153         mov     r12, r12, lsr #24
 1154         orr     r12, r12, lr, lsl #8
 1155         stmia   r0!, {r3-r5, r12}
 1156         subs    r2, r2, #0x10
 1157         bge     .Lmemcpy_srcul3loop16
 1158         ldmia   sp!, {r4, r5}
 1159         adds    r2, r2, #0x0c
 1160         blt     .Lmemcpy_srcul3l4
 1161 
 1162 .Lmemcpy_srcul3loop4:
 1163         mov     r12, lr, lsr #24
 1164         ldr     lr, [r1], #4
 1165         orr     r12, r12, lr, lsl #8
 1166         str     r12, [r0], #4
 1167         subs    r2, r2, #4
 1168         bge     .Lmemcpy_srcul3loop4
 1169 
 1170 .Lmemcpy_srcul3l4:
 1171         sub     r1, r1, #1
 1172         b       .Lmemcpy_l4
 1173 END(memcpy)
 1174 
 1175 #else
 1176 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 1177 ENTRY(memcpy)
 1178         pld     [r1]
 1179         cmp     r2, #0x0c
 1180         ble     .Lmemcpy_short          /* <= 12 bytes */
 1181 #ifdef FLASHADDR
 1182 #if FLASHADDR > PHYSADDR
 1183         ldr     r3, =FLASHADDR
 1184         cmp     r3, pc
 1185         bls     .Lnormal
 1186 #else
 1187         ldr     r3, =FLASHADDR
 1188         cmp     r3, pc
 1189         bhi     .Lnormal
 1190 #endif
 1191 #endif
 1192         ldr     r3, .L_arm_memcpy
 1193         ldr     r3, [r3]
 1194         cmp     r3, #0
 1195         beq     .Lnormal
 1196         ldr     r3, .L_min_memcpy_size
 1197         ldr     r3, [r3]
 1198         cmp     r2, r3
 1199         blt     .Lnormal
 1200         stmfd   sp!, {r0-r2, r4, lr}
 1201         mov     r3, #0
 1202         ldr     r4, .L_arm_memcpy
 1203         mov     lr, pc
 1204         ldr     pc, [r4]
 1205         cmp     r0, #0
 1206         ldmfd   sp!, {r0-r2, r4, lr}
 1207         RETeq
 1208 .Lnormal:
 1209         mov     r3, r0                  /* We must not clobber r0 */
 1210 
 1211         /* Word-align the destination buffer */
 1212         ands    ip, r3, #0x03           /* Already word aligned? */
 1213         beq     .Lmemcpy_wordaligned    /* Yup */
 1214         cmp     ip, #0x02
 1215         ldrb    ip, [r1], #0x01
 1216         sub     r2, r2, #0x01
 1217         strb    ip, [r3], #0x01
 1218         ldrble  ip, [r1], #0x01
 1219         suble   r2, r2, #0x01
 1220         strble  ip, [r3], #0x01
 1221         ldrblt  ip, [r1], #0x01
 1222         sublt   r2, r2, #0x01
 1223         strblt  ip, [r3], #0x01
 1224 
 1225         /* Destination buffer is now word aligned */
 1226 .Lmemcpy_wordaligned:
 1227         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 1228         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 1229 
 1230         /* Quad-align the destination buffer */
 1231         tst     r3, #0x07               /* Already quad aligned? */
 1232         ldrne   ip, [r1], #0x04
 1233         stmfd   sp!, {r4-r9}            /* Free up some registers */
 1234         subne   r2, r2, #0x04
 1235         strne   ip, [r3], #0x04
 1236 
 1237         /* Destination buffer quad aligned, source is at least word aligned */
 1238         subs    r2, r2, #0x80
 1239         blt     .Lmemcpy_w_lessthan128
 1240 
 1241         /* Copy 128 bytes at a time */
 1242 .Lmemcpy_w_loop128:
 1243         ldr     r4, [r1], #0x04         /* LD:00-03 */
 1244         ldr     r5, [r1], #0x04         /* LD:04-07 */
 1245         pld     [r1, #0x18]             /* Prefetch 0x20 */
 1246         ldr     r6, [r1], #0x04         /* LD:08-0b */
 1247         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 1248         ldr     r8, [r1], #0x04         /* LD:10-13 */
 1249         ldr     r9, [r1], #0x04         /* LD:14-17 */
 1250         strd    r4, [r3], #0x08         /* ST:00-07 */
 1251         ldr     r4, [r1], #0x04         /* LD:18-1b */
 1252         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 1253         strd    r6, [r3], #0x08         /* ST:08-0f */
 1254         ldr     r6, [r1], #0x04         /* LD:20-23 */
 1255         ldr     r7, [r1], #0x04         /* LD:24-27 */
 1256         pld     [r1, #0x18]             /* Prefetch 0x40 */
 1257         strd    r8, [r3], #0x08         /* ST:10-17 */
 1258         ldr     r8, [r1], #0x04         /* LD:28-2b */
 1259         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 1260         strd    r4, [r3], #0x08         /* ST:18-1f */
 1261         ldr     r4, [r1], #0x04         /* LD:30-33 */
 1262         ldr     r5, [r1], #0x04         /* LD:34-37 */
 1263         strd    r6, [r3], #0x08         /* ST:20-27 */
 1264         ldr     r6, [r1], #0x04         /* LD:38-3b */
 1265         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 1266         strd    r8, [r3], #0x08         /* ST:28-2f */
 1267         ldr     r8, [r1], #0x04         /* LD:40-43 */
 1268         ldr     r9, [r1], #0x04         /* LD:44-47 */
 1269         pld     [r1, #0x18]             /* Prefetch 0x60 */
 1270         strd    r4, [r3], #0x08         /* ST:30-37 */
 1271         ldr     r4, [r1], #0x04         /* LD:48-4b */
 1272         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 1273         strd    r6, [r3], #0x08         /* ST:38-3f */
 1274         ldr     r6, [r1], #0x04         /* LD:50-53 */
 1275         ldr     r7, [r1], #0x04         /* LD:54-57 */
 1276         strd    r8, [r3], #0x08         /* ST:40-47 */
 1277         ldr     r8, [r1], #0x04         /* LD:58-5b */
 1278         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 1279         strd    r4, [r3], #0x08         /* ST:48-4f */
 1280         ldr     r4, [r1], #0x04         /* LD:60-63 */
 1281         ldr     r5, [r1], #0x04         /* LD:64-67 */
 1282         pld     [r1, #0x18]             /* Prefetch 0x80 */
 1283         strd    r6, [r3], #0x08         /* ST:50-57 */
 1284         ldr     r6, [r1], #0x04         /* LD:68-6b */
 1285         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 1286         strd    r8, [r3], #0x08         /* ST:58-5f */
 1287         ldr     r8, [r1], #0x04         /* LD:70-73 */
 1288         ldr     r9, [r1], #0x04         /* LD:74-77 */
 1289         strd    r4, [r3], #0x08         /* ST:60-67 */
 1290         ldr     r4, [r1], #0x04         /* LD:78-7b */
 1291         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 1292         strd    r6, [r3], #0x08         /* ST:68-6f */
 1293         strd    r8, [r3], #0x08         /* ST:70-77 */
 1294         subs    r2, r2, #0x80
 1295         strd    r4, [r3], #0x08         /* ST:78-7f */
 1296         bge     .Lmemcpy_w_loop128
 1297 
 1298 .Lmemcpy_w_lessthan128:
 1299         adds    r2, r2, #0x80           /* Adjust for extra sub */
 1300         ldmfdeq sp!, {r4-r9}
 1301         RETeq                   /* Return now if done */
 1302         subs    r2, r2, #0x20
 1303         blt     .Lmemcpy_w_lessthan32
 1304 
 1305         /* Copy 32 bytes at a time */
 1306 .Lmemcpy_w_loop32:
 1307         ldr     r4, [r1], #0x04
 1308         ldr     r5, [r1], #0x04
 1309         pld     [r1, #0x18]
 1310         ldr     r6, [r1], #0x04
 1311         ldr     r7, [r1], #0x04
 1312         ldr     r8, [r1], #0x04
 1313         ldr     r9, [r1], #0x04
 1314         strd    r4, [r3], #0x08
 1315         ldr     r4, [r1], #0x04
 1316         ldr     r5, [r1], #0x04
 1317         strd    r6, [r3], #0x08
 1318         strd    r8, [r3], #0x08
 1319         subs    r2, r2, #0x20
 1320         strd    r4, [r3], #0x08
 1321         bge     .Lmemcpy_w_loop32
 1322 
 1323 .Lmemcpy_w_lessthan32:
 1324         adds    r2, r2, #0x20           /* Adjust for extra sub */
 1325         ldmfdeq sp!, {r4-r9}
 1326         RETeq                   /* Return now if done */
 1327 
 1328         and     r4, r2, #0x18
 1329         rsbs    r4, r4, #0x18
 1330         addne   pc, pc, r4, lsl #1
 1331         nop
 1332 
 1333         /* At least 24 bytes remaining */
 1334         ldr     r4, [r1], #0x04
 1335         ldr     r5, [r1], #0x04
 1336         sub     r2, r2, #0x08
 1337         strd    r4, [r3], #0x08
 1338 
 1339         /* At least 16 bytes remaining */
 1340         ldr     r4, [r1], #0x04
 1341         ldr     r5, [r1], #0x04
 1342         sub     r2, r2, #0x08
 1343         strd    r4, [r3], #0x08
 1344 
 1345         /* At least 8 bytes remaining */
 1346         ldr     r4, [r1], #0x04
 1347         ldr     r5, [r1], #0x04
 1348         subs    r2, r2, #0x08
 1349         strd    r4, [r3], #0x08
 1350 
 1351         /* Less than 8 bytes remaining */
 1352         ldmfd   sp!, {r4-r9}
 1353         RETeq                   /* Return now if done */
 1354         subs    r2, r2, #0x04
 1355         ldrge   ip, [r1], #0x04
 1356         strge   ip, [r3], #0x04
 1357         RETeq                   /* Return now if done */
 1358         addlt   r2, r2, #0x04
 1359         ldrb    ip, [r1], #0x01
 1360         cmp     r2, #0x02
 1361         ldrbge  r2, [r1], #0x01
 1362         strb    ip, [r3], #0x01
 1363         ldrbgt  ip, [r1]
 1364         strbge  r2, [r3], #0x01
 1365         strbgt  ip, [r3]
 1366         RET
 1367 /* Place a literal pool here for the above ldr instructions to use */
 1368 .ltorg
 1369 
 1370 
 1371 /*
 1372  * At this point, it has not been possible to word align both buffers.
 1373  * The destination buffer is word aligned, but the source buffer is not.
 1374  */
 1375 .Lmemcpy_bad_align:
 1376         stmfd   sp!, {r4-r7}
 1377         bic     r1, r1, #0x03
 1378         cmp     ip, #2
 1379         ldr     ip, [r1], #0x04
 1380         bgt     .Lmemcpy_bad3
 1381         beq     .Lmemcpy_bad2
 1382         b       .Lmemcpy_bad1
 1383 
 1384 .Lmemcpy_bad1_loop16:
 1385 #ifdef __ARMEB__
 1386         mov     r4, ip, lsl #8
 1387 #else
 1388         mov     r4, ip, lsr #8
 1389 #endif
 1390         ldr     r5, [r1], #0x04
 1391         pld     [r1, #0x018]
 1392         ldr     r6, [r1], #0x04
 1393         ldr     r7, [r1], #0x04
 1394         ldr     ip, [r1], #0x04
 1395 #ifdef __ARMEB__
 1396         orr     r4, r4, r5, lsr #24
 1397         mov     r5, r5, lsl #8
 1398         orr     r5, r5, r6, lsr #24
 1399         mov     r6, r6, lsl #8
 1400         orr     r6, r6, r7, lsr #24
 1401         mov     r7, r7, lsl #8
 1402         orr     r7, r7, ip, lsr #24
 1403 #else
 1404         orr     r4, r4, r5, lsl #24
 1405         mov     r5, r5, lsr #8
 1406         orr     r5, r5, r6, lsl #24
 1407         mov     r6, r6, lsr #8
 1408         orr     r6, r6, r7, lsl #24
 1409         mov     r7, r7, lsr #8
 1410         orr     r7, r7, ip, lsl #24
 1411 #endif
 1412         str     r4, [r3], #0x04
 1413         str     r5, [r3], #0x04
 1414         str     r6, [r3], #0x04
 1415         str     r7, [r3], #0x04
 1416 .Lmemcpy_bad1:
 1417         subs    r2, r2, #0x10
 1418         bge     .Lmemcpy_bad1_loop16
 1419 
 1420         adds    r2, r2, #0x10
 1421         ldmfdeq sp!, {r4-r7}
 1422         RETeq                   /* Return now if done */
 1423         subs    r2, r2, #0x04
 1424         sublt   r1, r1, #0x03
 1425         blt     .Lmemcpy_bad_done
 1426 
 1427 .Lmemcpy_bad1_loop4:
 1428 #ifdef __ARMEB__
 1429         mov     r4, ip, lsl #8
 1430 #else
 1431         mov     r4, ip, lsr #8
 1432 #endif
 1433         ldr     ip, [r1], #0x04
 1434         subs    r2, r2, #0x04
 1435 #ifdef __ARMEB__
 1436         orr     r4, r4, ip, lsr #24
 1437 #else
 1438         orr     r4, r4, ip, lsl #24
 1439 #endif
 1440         str     r4, [r3], #0x04
 1441         bge     .Lmemcpy_bad1_loop4
 1442         sub     r1, r1, #0x03
 1443         b       .Lmemcpy_bad_done
 1444 
 1445 .Lmemcpy_bad2_loop16:
 1446 #ifdef __ARMEB__
 1447         mov     r4, ip, lsl #16
 1448 #else
 1449         mov     r4, ip, lsr #16
 1450 #endif
 1451         ldr     r5, [r1], #0x04
 1452         pld     [r1, #0x018]
 1453         ldr     r6, [r1], #0x04
 1454         ldr     r7, [r1], #0x04
 1455         ldr     ip, [r1], #0x04
 1456 #ifdef __ARMEB__
 1457         orr     r4, r4, r5, lsr #16
 1458         mov     r5, r5, lsl #16
 1459         orr     r5, r5, r6, lsr #16
 1460         mov     r6, r6, lsl #16
 1461         orr     r6, r6, r7, lsr #16
 1462         mov     r7, r7, lsl #16
 1463         orr     r7, r7, ip, lsr #16
 1464 #else
 1465         orr     r4, r4, r5, lsl #16
 1466         mov     r5, r5, lsr #16
 1467         orr     r5, r5, r6, lsl #16
 1468         mov     r6, r6, lsr #16
 1469         orr     r6, r6, r7, lsl #16
 1470         mov     r7, r7, lsr #16
 1471         orr     r7, r7, ip, lsl #16
 1472 #endif
 1473         str     r4, [r3], #0x04
 1474         str     r5, [r3], #0x04
 1475         str     r6, [r3], #0x04
 1476         str     r7, [r3], #0x04
 1477 .Lmemcpy_bad2:
 1478         subs    r2, r2, #0x10
 1479         bge     .Lmemcpy_bad2_loop16
 1480 
 1481         adds    r2, r2, #0x10
 1482         ldmfdeq sp!, {r4-r7}
 1483         RETeq                   /* Return now if done */
 1484         subs    r2, r2, #0x04
 1485         sublt   r1, r1, #0x02
 1486         blt     .Lmemcpy_bad_done
 1487 
 1488 .Lmemcpy_bad2_loop4:
 1489 #ifdef __ARMEB__
 1490         mov     r4, ip, lsl #16
 1491 #else
 1492         mov     r4, ip, lsr #16
 1493 #endif
 1494         ldr     ip, [r1], #0x04
 1495         subs    r2, r2, #0x04
 1496 #ifdef __ARMEB__
 1497         orr     r4, r4, ip, lsr #16
 1498 #else
 1499         orr     r4, r4, ip, lsl #16
 1500 #endif
 1501         str     r4, [r3], #0x04
 1502         bge     .Lmemcpy_bad2_loop4
 1503         sub     r1, r1, #0x02
 1504         b       .Lmemcpy_bad_done
 1505 
 1506 .Lmemcpy_bad3_loop16:
 1507 #ifdef __ARMEB__
 1508         mov     r4, ip, lsl #24
 1509 #else
 1510         mov     r4, ip, lsr #24
 1511 #endif
 1512         ldr     r5, [r1], #0x04
 1513         pld     [r1, #0x018]
 1514         ldr     r6, [r1], #0x04
 1515         ldr     r7, [r1], #0x04
 1516         ldr     ip, [r1], #0x04
 1517 #ifdef __ARMEB__
 1518         orr     r4, r4, r5, lsr #8
 1519         mov     r5, r5, lsl #24
 1520         orr     r5, r5, r6, lsr #8
 1521         mov     r6, r6, lsl #24
 1522         orr     r6, r6, r7, lsr #8
 1523         mov     r7, r7, lsl #24
 1524         orr     r7, r7, ip, lsr #8
 1525 #else
 1526         orr     r4, r4, r5, lsl #8
 1527         mov     r5, r5, lsr #24
 1528         orr     r5, r5, r6, lsl #8
 1529         mov     r6, r6, lsr #24
 1530         orr     r6, r6, r7, lsl #8
 1531         mov     r7, r7, lsr #24
 1532         orr     r7, r7, ip, lsl #8
 1533 #endif
 1534         str     r4, [r3], #0x04
 1535         str     r5, [r3], #0x04
 1536         str     r6, [r3], #0x04
 1537         str     r7, [r3], #0x04
 1538 .Lmemcpy_bad3:
 1539         subs    r2, r2, #0x10
 1540         bge     .Lmemcpy_bad3_loop16
 1541 
 1542         adds    r2, r2, #0x10
 1543         ldmfdeq sp!, {r4-r7}
 1544         RETeq                   /* Return now if done */
 1545         subs    r2, r2, #0x04
 1546         sublt   r1, r1, #0x01
 1547         blt     .Lmemcpy_bad_done
 1548 
 1549 .Lmemcpy_bad3_loop4:
 1550 #ifdef __ARMEB__
 1551         mov     r4, ip, lsl #24
 1552 #else
 1553         mov     r4, ip, lsr #24
 1554 #endif
 1555         ldr     ip, [r1], #0x04
 1556         subs    r2, r2, #0x04
 1557 #ifdef __ARMEB__
 1558         orr     r4, r4, ip, lsr #8
 1559 #else
 1560         orr     r4, r4, ip, lsl #8
 1561 #endif
 1562         str     r4, [r3], #0x04
 1563         bge     .Lmemcpy_bad3_loop4
 1564         sub     r1, r1, #0x01
 1565 
 1566 .Lmemcpy_bad_done:
 1567         ldmfd   sp!, {r4-r7}
 1568         adds    r2, r2, #0x04
 1569         RETeq
 1570         ldrb    ip, [r1], #0x01
 1571         cmp     r2, #0x02
 1572         ldrbge  r2, [r1], #0x01
 1573         strb    ip, [r3], #0x01
 1574         ldrbgt  ip, [r1]
 1575         strbge  r2, [r3], #0x01
 1576         strbgt  ip, [r3]
 1577         RET
 1578 
 1579 
 1580 /*
 1581  * Handle short copies (less than 16 bytes), possibly misaligned.
 1582  * Some of these are *very* common, thanks to the network stack,
 1583  * and so are handled specially.
 1584  */
 1585 .Lmemcpy_short:
 1586         add     pc, pc, r2, lsl #2
 1587         nop
 1588         RET                     /* 0x00 */
 1589         b       .Lmemcpy_bytewise       /* 0x01 */
 1590         b       .Lmemcpy_bytewise       /* 0x02 */
 1591         b       .Lmemcpy_bytewise       /* 0x03 */
 1592         b       .Lmemcpy_4              /* 0x04 */
 1593         b       .Lmemcpy_bytewise       /* 0x05 */
 1594         b       .Lmemcpy_6              /* 0x06 */
 1595         b       .Lmemcpy_bytewise       /* 0x07 */
 1596         b       .Lmemcpy_8              /* 0x08 */
 1597         b       .Lmemcpy_bytewise       /* 0x09 */
 1598         b       .Lmemcpy_bytewise       /* 0x0a */
 1599         b       .Lmemcpy_bytewise       /* 0x0b */
 1600         b       .Lmemcpy_c              /* 0x0c */
 1601 .Lmemcpy_bytewise:
 1602         mov     r3, r0                  /* We must not clobber r0 */
 1603         ldrb    ip, [r1], #0x01
 1604 1:      subs    r2, r2, #0x01
 1605         strb    ip, [r3], #0x01
 1606         ldrbne  ip, [r1], #0x01
 1607         bne     1b
 1608         RET
 1609 
 1610 /******************************************************************************
 1611  * Special case for 4 byte copies
 1612  */
 1613 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1614 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1615         LMEMCPY_4_PAD
 1616 .Lmemcpy_4:
 1617         and     r2, r1, #0x03
 1618         orr     r2, r2, r0, lsl #2
 1619         ands    r2, r2, #0x0f
 1620         sub     r3, pc, #0x14
 1621         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1622 
 1623 /*
 1624  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1625  */
 1626         ldr     r2, [r1]
 1627         str     r2, [r0]
 1628         RET
 1629         LMEMCPY_4_PAD
 1630 
 1631 /*
 1632  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1633  */
 1634         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1635         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1636 #ifdef __ARMEB__
 1637         mov     r3, r3, lsl #8          /* r3 = 012. */
 1638         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 1639 #else
 1640         mov     r3, r3, lsr #8          /* r3 = .210 */
 1641         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1642 #endif
 1643         str     r3, [r0]
 1644         RET
 1645         LMEMCPY_4_PAD
 1646 
 1647 /*
 1648  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1649  */
 1650 #ifdef __ARMEB__
 1651         ldrh    r3, [r1]
 1652         ldrh    r2, [r1, #0x02]
 1653 #else
 1654         ldrh    r3, [r1, #0x02]
 1655         ldrh    r2, [r1]
 1656 #endif
 1657         orr     r3, r2, r3, lsl #16
 1658         str     r3, [r0]
 1659         RET
 1660         LMEMCPY_4_PAD
 1661 
 1662 /*
 1663  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1664  */
 1665         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1666         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1667 #ifdef __ARMEB__
 1668         mov     r3, r3, lsl #24         /* r3 = 0... */
 1669         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 1670 #else
 1671         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1672         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1673 #endif
 1674         str     r3, [r0]
 1675         RET
 1676         LMEMCPY_4_PAD
 1677 
 1678 /*
 1679  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1680  */
 1681         ldr     r2, [r1]
 1682 #ifdef __ARMEB__
 1683         strb    r2, [r0, #0x03]
 1684         mov     r3, r2, lsr #8
 1685         mov     r1, r2, lsr #24
 1686         strb    r1, [r0]
 1687 #else
 1688         strb    r2, [r0]
 1689         mov     r3, r2, lsr #8
 1690         mov     r1, r2, lsr #24
 1691         strb    r1, [r0, #0x03]
 1692 #endif
 1693         strh    r3, [r0, #0x01]
 1694         RET
 1695         LMEMCPY_4_PAD
 1696 
 1697 /*
 1698  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1699  */
 1700         ldrb    r2, [r1]
 1701         ldrh    r3, [r1, #0x01]
 1702         ldrb    r1, [r1, #0x03]
 1703         strb    r2, [r0]
 1704         strh    r3, [r0, #0x01]
 1705         strb    r1, [r0, #0x03]
 1706         RET
 1707         LMEMCPY_4_PAD
 1708 
 1709 /*
 1710  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1711  */
 1712         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1713         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1714 #ifdef __ARMEB__
 1715         mov     r1, r2, lsr #8          /* r1 = ...0 */
 1716         strb    r1, [r0]
 1717         mov     r2, r2, lsl #8          /* r2 = .01. */
 1718         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
 1719 #else
 1720         strb    r2, [r0]
 1721         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1722         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1723         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1724 #endif
 1725         strh    r2, [r0, #0x01]
 1726         strb    r3, [r0, #0x03]
 1727         RET
 1728         LMEMCPY_4_PAD
 1729 
 1730 /*
 1731  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1732  */
 1733         ldrb    r2, [r1]
 1734         ldrh    r3, [r1, #0x01]
 1735         ldrb    r1, [r1, #0x03]
 1736         strb    r2, [r0]
 1737         strh    r3, [r0, #0x01]
 1738         strb    r1, [r0, #0x03]
 1739         RET
 1740         LMEMCPY_4_PAD
 1741 
 1742 /*
 1743  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1744  */
 1745         ldr     r2, [r1]
 1746 #ifdef __ARMEB__
 1747         strh    r2, [r0, #0x02]
 1748         mov     r3, r2, lsr #16
 1749         strh    r3, [r0]
 1750 #else
 1751         strh    r2, [r0]
 1752         mov     r3, r2, lsr #16
 1753         strh    r3, [r0, #0x02]
 1754 #endif
 1755         RET
 1756         LMEMCPY_4_PAD
 1757 
 1758 /*
 1759  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1760  */
 1761         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1762         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1763         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1764         strh    r1, [r0]
 1765 #ifdef __ARMEB__
 1766         mov     r2, r2, lsl #8          /* r2 = 012. */
 1767         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1768 #else
 1769         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1770         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1771 #endif
 1772         strh    r2, [r0, #0x02]
 1773         RET
 1774         LMEMCPY_4_PAD
 1775 
 1776 /*
 1777  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1778  */
 1779         ldrh    r2, [r1]
 1780         ldrh    r3, [r1, #0x02]
 1781         strh    r2, [r0]
 1782         strh    r3, [r0, #0x02]
 1783         RET
 1784         LMEMCPY_4_PAD
 1785 
 1786 /*
 1787  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1788  */
 1789         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1790         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1791         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1792         strh    r1, [r0, #0x02]
 1793 #ifdef __ARMEB__
 1794         mov     r3, r3, lsr #24         /* r3 = ...1 */
 1795         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
 1796 #else
 1797         mov     r3, r3, lsl #8          /* r3 = 321. */
 1798         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1799 #endif
 1800         strh    r3, [r0]
 1801         RET
 1802         LMEMCPY_4_PAD
 1803 
 1804 /*
 1805  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1806  */
 1807         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1808 #ifdef __ARMEB__
 1809         strb    r2, [r0, #0x03]
 1810         mov     r3, r2, lsr #8
 1811         mov     r1, r2, lsr #24
 1812         strh    r3, [r0, #0x01]
 1813         strb    r1, [r0]
 1814 #else
 1815         strb    r2, [r0]
 1816         mov     r3, r2, lsr #8
 1817         mov     r1, r2, lsr #24
 1818         strh    r3, [r0, #0x01]
 1819         strb    r1, [r0, #0x03]
 1820 #endif
 1821         RET
 1822         LMEMCPY_4_PAD
 1823 
 1824 /*
 1825  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1826  */
 1827         ldrb    r2, [r1]
 1828         ldrh    r3, [r1, #0x01]
 1829         ldrb    r1, [r1, #0x03]
 1830         strb    r2, [r0]
 1831         strh    r3, [r0, #0x01]
 1832         strb    r1, [r0, #0x03]
 1833         RET
 1834         LMEMCPY_4_PAD
 1835 
 1836 /*
 1837  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1838  */
 1839 #ifdef __ARMEB__
 1840         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1841         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1842         strb    r3, [r0, #0x03]
 1843         mov     r3, r3, lsr #8          /* r3 = ...2 */
 1844         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
 1845         strh    r3, [r0, #0x01]
 1846         mov     r2, r2, lsr #8          /* r2 = ...0 */
 1847         strb    r2, [r0]
 1848 #else
 1849         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1850         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1851         strb    r2, [r0]
 1852         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1853         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1854         strh    r2, [r0, #0x01]
 1855         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1856         strb    r3, [r0, #0x03]
 1857 #endif
 1858         RET
 1859         LMEMCPY_4_PAD
 1860 
 1861 /*
 1862  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1863  */
 1864         ldrb    r2, [r1]
 1865         ldrh    r3, [r1, #0x01]
 1866         ldrb    r1, [r1, #0x03]
 1867         strb    r2, [r0]
 1868         strh    r3, [r0, #0x01]
 1869         strb    r1, [r0, #0x03]
 1870         RET
 1871         LMEMCPY_4_PAD
 1872 
 1873 
 1874 /******************************************************************************
 1875  * Special case for 6 byte copies
 1876  */
 1877 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1878 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1879         LMEMCPY_6_PAD
 1880 .Lmemcpy_6:
 1881         and     r2, r1, #0x03
 1882         orr     r2, r2, r0, lsl #2
 1883         ands    r2, r2, #0x0f
 1884         sub     r3, pc, #0x14
 1885         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1886 
 1887 /*
 1888  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1889  */
 1890         ldr     r2, [r1]
 1891         ldrh    r3, [r1, #0x04]
 1892         str     r2, [r0]
 1893         strh    r3, [r0, #0x04]
 1894         RET
 1895         LMEMCPY_6_PAD
 1896 
 1897 /*
 1898  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1899  */
 1900         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1901         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1902 #ifdef __ARMEB__
 1903         mov     r2, r2, lsl #8          /* r2 = 012. */
 1904         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1905 #else
 1906         mov     r2, r2, lsr #8          /* r2 = .210 */
 1907         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1908 #endif
 1909         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1910         str     r2, [r0]
 1911         strh    r3, [r0, #0x04]
 1912         RET
 1913         LMEMCPY_6_PAD
 1914 
 1915 /*
 1916  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1917  */
 1918         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1919         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1920 #ifdef __ARMEB__
 1921         mov     r1, r3, lsr #16         /* r1 = ..23 */
 1922         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
 1923         str     r1, [r0]
 1924         strh    r3, [r0, #0x04]
 1925 #else
 1926         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1927         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1928         str     r2, [r0]
 1929         strh    r1, [r0, #0x04]
 1930 #endif
 1931         RET
 1932         LMEMCPY_6_PAD
 1933 
 1934 /*
 1935  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1936  */
 1937         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1938         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1939         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1940 #ifdef __ARMEB__
 1941         mov     r2, r2, lsl #24         /* r2 = 0... */
 1942         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 1943         mov     r3, r3, lsl #8          /* r3 = 234. */
 1944         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
 1945 #else
 1946         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1947         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1948         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1949         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1950 #endif
 1951         str     r2, [r0]
 1952         strh    r1, [r0, #0x04]
 1953         RET
 1954         LMEMCPY_6_PAD
 1955 
 1956 /*
 1957  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1958  */
 1959         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1960         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1961         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1962         strh    r1, [r0, #0x01]
 1963 #ifdef __ARMEB__
 1964         mov     r1, r3, lsr #24         /* r1 = ...0 */
 1965         strb    r1, [r0]
 1966         mov     r3, r3, lsl #8          /* r3 = 123. */
 1967         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
 1968 #else
 1969         strb    r3, [r0]
 1970         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1971         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1972         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1973 #endif
 1974         strh    r3, [r0, #0x03]
 1975         strb    r2, [r0, #0x05]
 1976         RET
 1977         LMEMCPY_6_PAD
 1978 
 1979 /*
 1980  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1981  */
 1982         ldrb    r2, [r1]
 1983         ldrh    r3, [r1, #0x01]
 1984         ldrh    ip, [r1, #0x03]
 1985         ldrb    r1, [r1, #0x05]
 1986         strb    r2, [r0]
 1987         strh    r3, [r0, #0x01]
 1988         strh    ip, [r0, #0x03]
 1989         strb    r1, [r0, #0x05]
 1990         RET
 1991         LMEMCPY_6_PAD
 1992 
 1993 /*
 1994  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1995  */
 1996         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1997         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1998 #ifdef __ARMEB__
 1999         mov     r3, r2, lsr #8          /* r3 = ...0 */
 2000         strb    r3, [r0]
 2001         strb    r1, [r0, #0x05]
 2002         mov     r3, r1, lsr #8          /* r3 = .234 */
 2003         strh    r3, [r0, #0x03]
 2004         mov     r3, r2, lsl #8          /* r3 = .01. */
 2005         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
 2006         strh    r3, [r0, #0x01]
 2007 #else
 2008         strb    r2, [r0]
 2009         mov     r3, r1, lsr #24
 2010         strb    r3, [r0, #0x05]
 2011         mov     r3, r1, lsr #8          /* r3 = .543 */
 2012         strh    r3, [r0, #0x03]
 2013         mov     r3, r2, lsr #8          /* r3 = ...1 */
 2014         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 2015         strh    r3, [r0, #0x01]
 2016 #endif
 2017         RET
 2018         LMEMCPY_6_PAD
 2019 
 2020 /*
 2021  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2022  */
 2023         ldrb    r2, [r1]
 2024         ldrh    r3, [r1, #0x01]
 2025         ldrh    ip, [r1, #0x03]
 2026         ldrb    r1, [r1, #0x05]
 2027         strb    r2, [r0]
 2028         strh    r3, [r0, #0x01]
 2029         strh    ip, [r0, #0x03]
 2030         strb    r1, [r0, #0x05]
 2031         RET
 2032         LMEMCPY_6_PAD
 2033 
 2034 /*
 2035  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2036  */
 2037 #ifdef __ARMEB__
 2038         ldr     r2, [r1]                /* r2 = 0123 */
 2039         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
 2040         mov     r1, r2, lsr #16         /* r1 = ..01 */
 2041         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
 2042         strh    r1, [r0]
 2043         str     r3, [r0, #0x02]
 2044 #else
 2045         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 2046         ldr     r3, [r1]                /* r3 = 3210 */
 2047         mov     r2, r2, lsl #16         /* r2 = 54.. */
 2048         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 2049         strh    r3, [r0]
 2050         str     r2, [r0, #0x02]
 2051 #endif
 2052         RET
 2053         LMEMCPY_6_PAD
 2054 
 2055 /*
 2056  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2057  */
 2058         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2059         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 2060         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2061 #ifdef __ARMEB__
 2062         mov     r2, r2, lsr #8          /* r2 = .345 */
 2063         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
 2064 #else
 2065         mov     r2, r2, lsl #8          /* r2 = 543. */
 2066         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 2067 #endif
 2068         strh    r1, [r0]
 2069         str     r2, [r0, #0x02]
 2070         RET
 2071         LMEMCPY_6_PAD
 2072 
 2073 /*
 2074  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2075  */
 2076         ldrh    r2, [r1]
 2077         ldr     r3, [r1, #0x02]
 2078         strh    r2, [r0]
 2079         str     r3, [r0, #0x02]
 2080         RET
 2081         LMEMCPY_6_PAD
 2082 
 2083 /*
 2084  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2085  */
 2086         ldrb    r3, [r1]                /* r3 = ...0 */
 2087         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2088         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 2089 #ifdef __ARMEB__
 2090         mov     r3, r3, lsl #8          /* r3 = ..0. */
 2091         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
 2092         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
 2093 #else
 2094         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2095         mov     r1, r1, lsl #24         /* r1 = 5... */
 2096         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 2097 #endif
 2098         strh    r3, [r0]
 2099         str     r1, [r0, #0x02]
 2100         RET
 2101         LMEMCPY_6_PAD
 2102 
 2103 /*
 2104  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2105  */
 2106         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2107         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 2108 #ifdef __ARMEB__
 2109         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2110         strb    r3, [r0]
 2111         mov     r2, r2, lsl #8          /* r2 = 123. */
 2112         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2113 #else
 2114         strb    r2, [r0]
 2115         mov     r2, r2, lsr #8          /* r2 = .321 */
 2116         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 2117         mov     r1, r1, lsr #8          /* r1 = ...5 */
 2118 #endif
 2119         str     r2, [r0, #0x01]
 2120         strb    r1, [r0, #0x05]
 2121         RET
 2122         LMEMCPY_6_PAD
 2123 
 2124 /*
 2125  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2126  */
 2127         ldrb    r2, [r1]
 2128         ldrh    r3, [r1, #0x01]
 2129         ldrh    ip, [r1, #0x03]
 2130         ldrb    r1, [r1, #0x05]
 2131         strb    r2, [r0]
 2132         strh    r3, [r0, #0x01]
 2133         strh    ip, [r0, #0x03]
 2134         strb    r1, [r0, #0x05]
 2135         RET
 2136         LMEMCPY_6_PAD
 2137 
 2138 /*
 2139  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2140  */
 2141         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2142         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 2143 #ifdef __ARMEB__
 2144         mov     r3, r2, lsr #8          /* r3 = ...0 */
 2145         strb    r3, [r0]
 2146         mov     r2, r2, lsl #24         /* r2 = 1... */
 2147         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2148 #else
 2149         strb    r2, [r0]
 2150         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2151         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 2152         mov     r1, r1, lsr #24         /* r1 = ...5 */
 2153 #endif
 2154         str     r2, [r0, #0x01]
 2155         strb    r1, [r0, #0x05]
 2156         RET
 2157         LMEMCPY_6_PAD
 2158 
 2159 /*
 2160  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2161  */
 2162         ldrb    r2, [r1]
 2163         ldr     r3, [r1, #0x01]
 2164         ldrb    r1, [r1, #0x05]
 2165         strb    r2, [r0]
 2166         str     r3, [r0, #0x01]
 2167         strb    r1, [r0, #0x05]
 2168         RET
 2169         LMEMCPY_6_PAD
 2170 
 2171 
 2172 /******************************************************************************
 2173  * Special case for 8 byte copies
 2174  */
 2175 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 2176 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 2177         LMEMCPY_8_PAD
 2178 .Lmemcpy_8:
 2179         and     r2, r1, #0x03
 2180         orr     r2, r2, r0, lsl #2
 2181         ands    r2, r2, #0x0f
 2182         sub     r3, pc, #0x14
 2183         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 2184 
 2185 /*
 2186  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2187  */
 2188         ldr     r2, [r1]
 2189         ldr     r3, [r1, #0x04]
 2190         str     r2, [r0]
 2191         str     r3, [r0, #0x04]
 2192         RET
 2193         LMEMCPY_8_PAD
 2194 
 2195 /*
 2196  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2197  */
 2198         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2199         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 2200         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2201 #ifdef __ARMEB__
 2202         mov     r3, r3, lsl #8          /* r3 = 012. */
 2203         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 2204         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
 2205 #else
 2206         mov     r3, r3, lsr #8          /* r3 = .210 */
 2207         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 2208         mov     r1, r1, lsl #24         /* r1 = 7... */
 2209         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 2210 #endif
 2211         str     r3, [r0]
 2212         str     r2, [r0, #0x04]
 2213         RET
 2214         LMEMCPY_8_PAD
 2215 
 2216 /*
 2217  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2218  */
 2219         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2220         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2221         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2222 #ifdef __ARMEB__
 2223         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2224         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2225         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
 2226 #else
 2227         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2228         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2229         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 2230 #endif
 2231         str     r2, [r0]
 2232         str     r3, [r0, #0x04]
 2233         RET
 2234         LMEMCPY_8_PAD
 2235 
 2236 /*
 2237  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2238  */
 2239         ldrb    r3, [r1]                /* r3 = ...0 */
 2240         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2241         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 2242 #ifdef __ARMEB__
 2243         mov     r3, r3, lsl #24         /* r3 = 0... */
 2244         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 2245         mov     r2, r2, lsl #24         /* r2 = 4... */
 2246         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
 2247 #else
 2248         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2249         mov     r2, r2, lsr #24         /* r2 = ...4 */
 2250         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 2251 #endif
 2252         str     r3, [r0]
 2253         str     r2, [r0, #0x04]
 2254         RET
 2255         LMEMCPY_8_PAD
 2256 
 2257 /*
 2258  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 2259  */
 2260         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 2261         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 2262 #ifdef __ARMEB__
 2263         mov     r1, r3, lsr #24         /* r1 = ...0 */
 2264         strb    r1, [r0]
 2265         mov     r1, r3, lsr #8          /* r1 = .012 */
 2266         strb    r2, [r0, #0x07]
 2267         mov     r3, r3, lsl #24         /* r3 = 3... */
 2268         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
 2269 #else
 2270         strb    r3, [r0]
 2271         mov     r1, r2, lsr #24         /* r1 = ...7 */
 2272         strb    r1, [r0, #0x07]
 2273         mov     r1, r3, lsr #8          /* r1 = .321 */
 2274         mov     r3, r3, lsr #24         /* r3 = ...3 */
 2275         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 2276 #endif
 2277         strh    r1, [r0, #0x01]
 2278         str     r3, [r0, #0x03]
 2279         RET
 2280         LMEMCPY_8_PAD
 2281 
 2282 /*
 2283  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 2284  */
 2285         ldrb    r2, [r1]
 2286         ldrh    r3, [r1, #0x01]
 2287         ldr     ip, [r1, #0x03]
 2288         ldrb    r1, [r1, #0x07]
 2289         strb    r2, [r0]
 2290         strh    r3, [r0, #0x01]
 2291         str     ip, [r0, #0x03]
 2292         strb    r1, [r0, #0x07]
 2293         RET
 2294         LMEMCPY_8_PAD
 2295 
 2296 /*
 2297  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 2298  */
 2299         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2300         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2301         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2302 #ifdef __ARMEB__
 2303         mov     ip, r2, lsr #8          /* ip = ...0 */
 2304         strb    ip, [r0]
 2305         mov     ip, r2, lsl #8          /* ip = .01. */
 2306         orr     ip, ip, r3, lsr #24     /* ip = .012 */
 2307         strb    r1, [r0, #0x07]
 2308         mov     r3, r3, lsl #8          /* r3 = 345. */
 2309         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
 2310 #else
 2311         strb    r2, [r0]                /* 0 */
 2312         mov     ip, r1, lsr #8          /* ip = ...7 */
 2313         strb    ip, [r0, #0x07]         /* 7 */
 2314         mov     ip, r2, lsr #8          /* ip = ...1 */
 2315         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2316         mov     r3, r3, lsr #8          /* r3 = .543 */
 2317         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 2318 #endif
 2319         strh    ip, [r0, #0x01]
 2320         str     r3, [r0, #0x03]
 2321         RET
 2322         LMEMCPY_8_PAD
 2323 
 2324 /*
 2325  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2326  */
 2327         ldrb    r3, [r1]                /* r3 = ...0 */
 2328         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2329         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 2330         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2331         strb    r3, [r0]
 2332         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 2333 #ifdef __ARMEB__
 2334         strh    r3, [r0, #0x01]
 2335         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
 2336 #else
 2337         strh    ip, [r0, #0x01]
 2338         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 2339 #endif
 2340         str     r2, [r0, #0x03]
 2341         strb    r1, [r0, #0x07]
 2342         RET
 2343         LMEMCPY_8_PAD
 2344 
 2345 /*
 2346  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2347  */
 2348         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2349         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2350         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2351 #ifdef __ARMEB__
 2352         strh    r1, [r0]
 2353         mov     r1, r3, lsr #16         /* r1 = ..45 */
 2354         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
 2355 #else
 2356         strh    r2, [r0]
 2357         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 2358         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2359 #endif
 2360         str     r2, [r0, #0x02]
 2361         strh    r3, [r0, #0x06]
 2362         RET
 2363         LMEMCPY_8_PAD
 2364 
 2365 /*
 2366  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2367  */
 2368         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2369         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2370         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 2371         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2372         strh    r1, [r0]
 2373 #ifdef __ARMEB__
 2374         mov     r1, r2, lsl #24         /* r1 = 2... */
 2375         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
 2376         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
 2377 #else
 2378         mov     r1, r2, lsr #24         /* r1 = ...2 */
 2379         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 2380         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2381         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 2382 #endif
 2383         str     r1, [r0, #0x02]
 2384         strh    r3, [r0, #0x06]
 2385         RET
 2386         LMEMCPY_8_PAD
 2387 
 2388 /*
 2389  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2390  */
 2391         ldrh    r2, [r1]
 2392         ldr     ip, [r1, #0x02]
 2393         ldrh    r3, [r1, #0x06]
 2394         strh    r2, [r0]
 2395         str     ip, [r0, #0x02]
 2396         strh    r3, [r0, #0x06]
 2397         RET
 2398         LMEMCPY_8_PAD
 2399 
 2400 /*
 2401  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2402  */
 2403         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 2404         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2405         ldrb    ip, [r1]                /* ip = ...0 */
 2406         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 2407         strh    r1, [r0, #0x06]
 2408 #ifdef __ARMEB__
 2409         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2410         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
 2411         mov     r2, r2, lsr #24         /* r2 = ...1 */
 2412         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
 2413 #else
 2414         mov     r3, r3, lsl #24         /* r3 = 5... */
 2415         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 2416         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 2417 #endif
 2418         str     r3, [r0, #0x02]
 2419         strh    r2, [r0]
 2420         RET
 2421         LMEMCPY_8_PAD
 2422 
 2423 /*
 2424  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2425  */
 2426         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2427         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2428         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 2429         strh    r1, [r0, #0x05]
 2430 #ifdef __ARMEB__
 2431         strb    r3, [r0, #0x07]
 2432         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2433         strb    r1, [r0]
 2434         mov     r2, r2, lsl #8          /* r2 = 123. */
 2435         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
 2436         str     r2, [r0, #0x01]
 2437 #else
 2438         strb    r2, [r0]
 2439         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2440         strb    r1, [r0, #0x07]
 2441         mov     r2, r2, lsr #8          /* r2 = .321 */
 2442         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 2443         str     r2, [r0, #0x01]
 2444 #endif
 2445         RET
 2446         LMEMCPY_8_PAD
 2447 
 2448 /*
 2449  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2450  */
 2451         ldrb    r3, [r1]                /* r3 = ...0 */
 2452         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 2453         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2454         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2455         strb    r3, [r0]
 2456         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 2457 #ifdef __ARMEB__
 2458         strh    ip, [r0, #0x05]
 2459         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
 2460 #else
 2461         strh    r3, [r0, #0x05]
 2462         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 2463 #endif
 2464         str     r2, [r0, #0x01]
 2465         strb    r1, [r0, #0x07]
 2466         RET
 2467         LMEMCPY_8_PAD
 2468 
 2469 /*
 2470  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2471  */
 2472         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2473         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2474         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2475 #ifdef __ARMEB__
 2476         mov     ip, r2, lsr #8          /* ip = ...0 */
 2477         strb    ip, [r0]
 2478         mov     ip, r2, lsl #24         /* ip = 1... */
 2479         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
 2480         strb    r1, [r0, #0x07]
 2481         mov     r1, r1, lsr #8          /* r1 = ...6 */
 2482         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
 2483 #else
 2484         strb    r2, [r0]
 2485         mov     ip, r2, lsr #8          /* ip = ...1 */
 2486         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2487         mov     r2, r1, lsr #8          /* r2 = ...7 */
 2488         strb    r2, [r0, #0x07]
 2489         mov     r1, r1, lsl #8          /* r1 = .76. */
 2490         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 2491 #endif
 2492         str     ip, [r0, #0x01]
 2493         strh    r1, [r0, #0x05]
 2494         RET
 2495         LMEMCPY_8_PAD
 2496 
 2497 /*
 2498  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2499  */
 2500         ldrb    r2, [r1]
 2501         ldr     ip, [r1, #0x01]
 2502         ldrh    r3, [r1, #0x05]
 2503         ldrb    r1, [r1, #0x07]
 2504         strb    r2, [r0]
 2505         str     ip, [r0, #0x01]
 2506         strh    r3, [r0, #0x05]
 2507         strb    r1, [r0, #0x07]
 2508         RET
 2509         LMEMCPY_8_PAD
 2510 
 2511 /******************************************************************************
 2512  * Special case for 12 byte copies
 2513  */
 2514 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 2515 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 2516         LMEMCPY_C_PAD
 2517 .Lmemcpy_c:
 2518         and     r2, r1, #0x03
 2519         orr     r2, r2, r0, lsl #2
 2520         ands    r2, r2, #0x0f
 2521         sub     r3, pc, #0x14
 2522         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 2523 
 2524 /*
 2525  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2526  */
 2527         ldr     r2, [r1]
 2528         ldr     r3, [r1, #0x04]
 2529         ldr     r1, [r1, #0x08]
 2530         str     r2, [r0]
 2531         str     r3, [r0, #0x04]
 2532         str     r1, [r0, #0x08]
 2533         RET
 2534         LMEMCPY_C_PAD
 2535 
 2536 /*
 2537  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2538  */
 2539         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 2540         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2541         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2542         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2543 #ifdef __ARMEB__
 2544         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
 2545         str     r2, [r0, #0x08]
 2546         mov     r2, ip, lsr #24         /* r2 = ...7 */
 2547         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
 2548         mov     r1, r1, lsl #8          /* r1 = 012. */
 2549         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
 2550 #else
 2551         mov     r2, r2, lsl #24         /* r2 = B... */
 2552         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 2553         str     r2, [r0, #0x08]
 2554         mov     r2, ip, lsl #24         /* r2 = 7... */
 2555         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 2556         mov     r1, r1, lsr #8          /* r1 = .210 */
 2557         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 2558 #endif
 2559         str     r2, [r0, #0x04]
 2560         str     r1, [r0]
 2561         RET
 2562         LMEMCPY_C_PAD
 2563 
 2564 /*
 2565  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2566  */
 2567         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2568         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2569         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2570         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2571 #ifdef __ARMEB__
 2572         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2573         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2574         str     r2, [r0]
 2575         mov     r3, r3, lsl #16         /* r3 = 45.. */
 2576         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
 2577         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
 2578 #else
 2579         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2580         str     r2, [r0]
 2581         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2582         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 2583         mov     r1, r1, lsl #16         /* r1 = BA.. */
 2584         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 2585 #endif
 2586         str     r3, [r0, #0x04]
 2587         str     r1, [r0, #0x08]
 2588         RET
 2589         LMEMCPY_C_PAD
 2590 
 2591 /*
 2592  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2593  */
 2594         ldrb    r2, [r1]                /* r2 = ...0 */
 2595         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2596         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2597         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2598 #ifdef __ARMEB__
 2599         mov     r2, r2, lsl #24         /* r2 = 0... */
 2600         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 2601         str     r2, [r0]
 2602         mov     r3, r3, lsl #24         /* r3 = 4... */
 2603         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
 2604         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2605         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
 2606 #else
 2607         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 2608         str     r2, [r0]
 2609         mov     r3, r3, lsr #24         /* r3 = ...4 */
 2610         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 2611         mov     r1, r1, lsl #8          /* r1 = BA9. */
 2612         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 2613 #endif
 2614         str     r3, [r0, #0x04]
 2615         str     r1, [r0, #0x08]
 2616         RET
 2617         LMEMCPY_C_PAD
 2618 
 2619 /*
 2620  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 2621  */
 2622         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2623         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2624         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 2625         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 2626         strh    r1, [r0, #0x01]
 2627 #ifdef __ARMEB__
 2628         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2629         strb    r1, [r0]
 2630         mov     r1, r2, lsl #24         /* r1 = 3... */
 2631         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
 2632         mov     r1, r3, lsl #24         /* r1 = 7... */
 2633         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
 2634 #else
 2635         strb    r2, [r0]
 2636         mov     r1, r2, lsr #24         /* r1 = ...3 */
 2637         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 2638         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2639         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 2640         mov     ip, ip, lsr #24         /* ip = ...B */
 2641 #endif
 2642         str     r2, [r0, #0x03]
 2643         str     r1, [r0, #0x07]
 2644         strb    ip, [r0, #0x0b]
 2645         RET
 2646         LMEMCPY_C_PAD
 2647 
 2648 /*
 2649  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 2650  */
 2651         ldrb    r2, [r1]
 2652         ldrh    r3, [r1, #0x01]
 2653         ldr     ip, [r1, #0x03]
 2654         strb    r2, [r0]
 2655         ldr     r2, [r1, #0x07]
 2656         ldrb    r1, [r1, #0x0b]
 2657         strh    r3, [r0, #0x01]
 2658         str     ip, [r0, #0x03]
 2659         str     r2, [r0, #0x07]
 2660         strb    r1, [r0, #0x0b]
 2661         RET
 2662         LMEMCPY_C_PAD
 2663 
 2664 /*
 2665  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 2666  */
 2667         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2668         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2669         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2670         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2671 #ifdef __ARMEB__
 2672         mov     r2, r2, ror #8          /* r2 = 1..0 */
 2673         strb    r2, [r0]
 2674         mov     r2, r2, lsr #16         /* r2 = ..1. */
 2675         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
 2676         strh    r2, [r0, #0x01]
 2677         mov     r2, r3, lsl #8          /* r2 = 345. */
 2678         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
 2679         mov     r2, ip, lsl #8          /* r2 = 789. */
 2680         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
 2681 #else
 2682         strb    r2, [r0]
 2683         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2684         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2685         strh    r2, [r0, #0x01]
 2686         mov     r2, r3, lsr #8          /* r2 = .543 */
 2687         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 2688         mov     r2, ip, lsr #8          /* r2 = .987 */
 2689         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 2690         mov     r1, r1, lsr #8          /* r1 = ...B */
 2691 #endif
 2692         str     r3, [r0, #0x03]
 2693         str     r2, [r0, #0x07]
 2694         strb    r1, [r0, #0x0b]
 2695         RET
 2696         LMEMCPY_C_PAD
 2697 
 2698 /*
 2699  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 2700  */
 2701         ldrb    r2, [r1]
 2702         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2703         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2704         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2705         strb    r2, [r0]
 2706 #ifdef __ARMEB__
 2707         mov     r2, r3, lsr #16         /* r2 = ..12 */
 2708         strh    r2, [r0, #0x01]
 2709         mov     r3, r3, lsl #16         /* r3 = 34.. */
 2710         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
 2711         mov     ip, ip, lsl #16         /* ip = 78.. */
 2712         orr     ip, ip, r1, lsr #16     /* ip = 789A */
 2713         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2714 #else
 2715         strh    r3, [r0, #0x01]
 2716         mov     r3, r3, lsr #16         /* r3 = ..43 */
 2717         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 2718         mov     ip, ip, lsr #16         /* ip = ..87 */
 2719         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 2720         mov     r1, r1, lsr #16         /* r1 = ..xB */
 2721 #endif
 2722         str     r3, [r0, #0x03]
 2723         str     ip, [r0, #0x07]
 2724         strb    r1, [r0, #0x0b]
 2725         RET
 2726         LMEMCPY_C_PAD
 2727 
 2728 /*
 2729  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2730  */
 2731         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 2732         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2733         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 2734         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2735 #ifdef __ARMEB__
 2736         strh    r1, [r0]
 2737         mov     r1, ip, lsl #16         /* r1 = 23.. */
 2738         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
 2739         mov     r3, r3, lsl #16         /* r3 = 67.. */
 2740         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
 2741 #else
 2742         strh    ip, [r0]
 2743         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 2744         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2745         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 2746         mov     r2, r2, lsr #16         /* r2 = ..BA */
 2747 #endif
 2748         str     r1, [r0, #0x02]
 2749         str     r3, [r0, #0x06]
 2750         strh    r2, [r0, #0x0a]
 2751         RET
 2752         LMEMCPY_C_PAD
 2753 
 2754 /*
 2755  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 2756  */
 2757         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2758         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2759         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 2760         strh    ip, [r0]
 2761         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2762         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 2763 #ifdef __ARMEB__
 2764         mov     r2, r2, lsl #24         /* r2 = 2... */
 2765         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
 2766         mov     r3, r3, lsl #24         /* r3 = 6... */
 2767         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
 2768         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
 2769 #else
 2770         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2771         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2772         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2773         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2774         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2775         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2776 #endif
 2777         str     r2, [r0, #0x02]
 2778         str     r3, [r0, #0x06]
 2779         strh    r1, [r0, #0x0a]
 2780         RET
 2781         LMEMCPY_C_PAD
 2782 
 2783 /*
 2784  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2785  */
 2786         ldrh    r2, [r1]
 2787         ldr     r3, [r1, #0x02]
 2788         ldr     ip, [r1, #0x06]
 2789         ldrh    r1, [r1, #0x0a]
 2790         strh    r2, [r0]
 2791         str     r3, [r0, #0x02]
 2792         str     ip, [r0, #0x06]
 2793         strh    r1, [r0, #0x0a]
 2794         RET
 2795         LMEMCPY_C_PAD
 2796 
 2797 /*
 2798  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2799  */
 2800         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2801         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2802         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2803         strh    ip, [r0, #0x0a]
 2804         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2805         ldrb    r1, [r1]                /* r1 = ...0 */
 2806 #ifdef __ARMEB__
 2807         mov     r2, r2, lsr #24         /* r2 = ...9 */
 2808         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
 2809         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2810         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
 2811         mov     r1, r1, lsl #8          /* r1 = ..0. */
 2812         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
 2813 #else
 2814         mov     r2, r2, lsl #24         /* r2 = 9... */
 2815         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2816         mov     r3, r3, lsl #24         /* r3 = 5... */
 2817         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2818         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2819 #endif
 2820         str     r2, [r0, #0x06]
 2821         str     r3, [r0, #0x02]
 2822         strh    r1, [r0]
 2823         RET
 2824         LMEMCPY_C_PAD
 2825 
 2826 /*
 2827  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2828  */
 2829         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2830         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2831         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2832 #ifdef __ARMEB__
 2833         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2834         strb    r3, [r0]
 2835         mov     r2, r2, lsl #8          /* r2 = 123. */
 2836         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
 2837         str     r2, [r0, #0x01]
 2838         mov     r2, ip, lsl #8          /* r2 = 567. */
 2839         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
 2840         str     r2, [r0, #0x05]
 2841         mov     r2, r1, lsr #8          /* r2 = ..9A */
 2842         strh    r2, [r0, #0x09]
 2843         strb    r1, [r0, #0x0b]
 2844 #else
 2845         strb    r2, [r0]
 2846         mov     r3, r2, lsr #8          /* r3 = .321 */
 2847         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2848         str     r3, [r0, #0x01]
 2849         mov     r3, ip, lsr #8          /* r3 = .765 */
 2850         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2851         str     r3, [r0, #0x05]
 2852         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2853         strh    r1, [r0, #0x09]
 2854         mov     r1, r1, lsr #16         /* r1 = ...B */
 2855         strb    r1, [r0, #0x0b]
 2856 #endif
 2857         RET
 2858         LMEMCPY_C_PAD
 2859 
 2860 /*
 2861  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2862  */
 2863         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2864         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2865         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2866         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2867         strb    r2, [r0, #0x0b]
 2868 #ifdef __ARMEB__
 2869         strh    r3, [r0, #0x09]
 2870         mov     r3, r3, lsr #16         /* r3 = ..78 */
 2871         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
 2872         mov     ip, ip, lsr #16         /* ip = ..34 */
 2873         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
 2874         mov     r1, r1, lsr #16         /* r1 = ..x0 */
 2875 #else
 2876         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2877         strh    r2, [r0, #0x09]
 2878         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2879         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2880         mov     ip, ip, lsl #16         /* ip = 43.. */
 2881         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2882         mov     r1, r1, lsr #8          /* r1 = .210 */
 2883 #endif
 2884         str     r3, [r0, #0x05]
 2885         str     ip, [r0, #0x01]
 2886         strb    r1, [r0]
 2887         RET
 2888         LMEMCPY_C_PAD
 2889 
 2890 /*
 2891  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2892  */
 2893 #ifdef __ARMEB__
 2894         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
 2895         ldr     ip, [r1, #0x06]         /* ip = 6789 */
 2896         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
 2897         ldrh    r1, [r1]                /* r1 = ..01 */
 2898         strb    r2, [r0, #0x0b]
 2899         mov     r2, r2, lsr #8          /* r2 = ...A */
 2900         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
 2901         mov     ip, ip, lsr #8          /* ip = .678 */
 2902         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
 2903         mov     r3, r3, lsr #8          /* r3 = .234 */
 2904         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
 2905         mov     r1, r1, lsr #8          /* r1 = ...0 */
 2906         strb    r1, [r0]
 2907         str     r3, [r0, #0x01]
 2908         str     ip, [r0, #0x05]
 2909         strh    r2, [r0, #0x09]
 2910 #else
 2911         ldrh    r2, [r1]                /* r2 = ..10 */
 2912         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2913         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2914         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2915         strb    r2, [r0]
 2916         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2917         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2918         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2919         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2920         mov     ip, ip, lsr #24         /* ip = ...9 */
 2921         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2922         mov     r1, r1, lsr #8          /* r1 = ...B */
 2923         str     r2, [r0, #0x01]
 2924         str     r3, [r0, #0x05]
 2925         strh    ip, [r0, #0x09]
 2926         strb    r1, [r0, #0x0b]
 2927 #endif
 2928         RET
 2929         LMEMCPY_C_PAD
 2930 
 2931 /*
 2932  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2933  */
 2934         ldrb    r2, [r1]
 2935         ldr     r3, [r1, #0x01]
 2936         ldr     ip, [r1, #0x05]
 2937         strb    r2, [r0]
 2938         ldrh    r2, [r1, #0x09]
 2939         ldrb    r1, [r1, #0x0b]
 2940         str     r3, [r0, #0x01]
 2941         str     ip, [r0, #0x05]
 2942         strh    r2, [r0, #0x09]
 2943         strb    r1, [r0, #0x0b]
 2944         RET
 2945 END(memcpy)
 2946 #endif /* _ARM_ARCH_5E */
 2947 
 2948 #ifdef GPROF
 2949 
 2950 ENTRY(user)
 2951         nop
 2952 END(user)
 2953 ENTRY(btrap)
 2954         nop
 2955 END(btrap)
 2956 ENTRY(etrap)
 2957         nop
 2958 END(etrap)
 2959 ENTRY(bintr)
 2960         nop
 2961 END(bintr)
 2962 ENTRY(eintr)
 2963         nop
 2964 END(eintr)
 2965 #endif
Cache object: c37d6b076d8d2940d0c934d20e4af293
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S