support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  *
   76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   86  * POSSIBILITY OF SUCH DAMAGE.
   87  */
   88 
   89 #include <machine/asm.h>
   90 #include <machine/asmacros.h>
   91 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/support.S 248361 2013-03-16 02:48:49Z andrew $");
   92 
   93 #include "assym.s"
   94 
   95 .L_arm_memcpy:
   96         .word   _C_LABEL(_arm_memcpy)
   97 .L_arm_bzero:
   98         .word   _C_LABEL(_arm_bzero)
   99 .L_min_memcpy_size:
  100         .word   _C_LABEL(_min_memcpy_size)
  101 .L_min_bzero_size:
  102         .word   _C_LABEL(_min_bzero_size)
  103 /*
  104  * memset: Sets a block of memory to the specified value
  105  *
  106  * On entry:
  107  *   r0 - dest address
  108  *   r1 - byte to write
  109  *   r2 - number of bytes to write
  110  *
  111  * On exit:
  112  *   r0 - dest address
  113  */
  114 /* LINTSTUB: Func: void bzero(void *, size_t) */
  115 ENTRY(bzero)
  116         ldr     r3, .L_arm_bzero
  117         ldr     r3, [r3]
  118         cmp     r3, #0
  119         beq     .Lnormal0
  120         ldr     r2, .L_min_bzero_size
  121         ldr     r2, [r2]
  122         cmp     r1, r2
  123         blt     .Lnormal0
  124         stmfd   sp!, {r0, r1, lr}
  125         mov     r2, #0
  126         mov     lr, pc
  127         mov     pc, r3
  128         cmp     r0, #0
  129         ldmfd   sp!, {r0, r1, lr}
  130         RETeq
  131 .Lnormal0:
  132         mov     r3, #0x00
  133         b       do_memset
  134 
  135 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  136 ENTRY(memset)
  137         and     r3, r1, #0xff           /* We deal with bytes */
  138         mov     r1, r2
  139 do_memset:
  140         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  141         mov     ip, r0
  142         blt     .Lmemset_lessthanfour
  143 
  144         /* Ok first we will word align the address */
  145         ands    r2, ip, #0x03           /* Get the bottom two bits */
  146         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  147 
  148         /* We are now word aligned */
  149 .Lmemset_wordaligned:
  150         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  151 #ifdef _ARM_ARCH_5E
  152         tst     ip, #0x04               /* Quad-align for armv5e */
  153 #else
  154         cmp     r1, #0x10
  155 #endif
  156         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  157 #ifdef _ARM_ARCH_5E
  158         subne   r1, r1, #0x04           /* Quad-align if necessary */
  159         strne   r3, [ip], #0x04
  160         cmp     r1, #0x10
  161 #endif
  162         blt     .Lmemset_loop4          /* If less than 16 then use words */
  163         mov     r2, r3                  /* Duplicate data */
  164         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  165         blt     .Lmemset_loop32
  166 
  167         /* Do 128 bytes at a time */
  168 .Lmemset_loop128:
  169         subs    r1, r1, #0x80
  170 #ifdef _ARM_ARCH_5E
  171         strged  r2, [ip], #0x08
  172         strged  r2, [ip], #0x08
  173         strged  r2, [ip], #0x08
  174         strged  r2, [ip], #0x08
  175         strged  r2, [ip], #0x08
  176         strged  r2, [ip], #0x08
  177         strged  r2, [ip], #0x08
  178         strged  r2, [ip], #0x08
  179         strged  r2, [ip], #0x08
  180         strged  r2, [ip], #0x08
  181         strged  r2, [ip], #0x08
  182         strged  r2, [ip], #0x08
  183         strged  r2, [ip], #0x08
  184         strged  r2, [ip], #0x08
  185         strged  r2, [ip], #0x08
  186         strged  r2, [ip], #0x08
  187 #else
  188         stmgeia ip!, {r2-r3}
  189         stmgeia ip!, {r2-r3}
  190         stmgeia ip!, {r2-r3}
  191         stmgeia ip!, {r2-r3}
  192         stmgeia ip!, {r2-r3}
  193         stmgeia ip!, {r2-r3}
  194         stmgeia ip!, {r2-r3}
  195         stmgeia ip!, {r2-r3}
  196         stmgeia ip!, {r2-r3}
  197         stmgeia ip!, {r2-r3}
  198         stmgeia ip!, {r2-r3}
  199         stmgeia ip!, {r2-r3}
  200         stmgeia ip!, {r2-r3}
  201         stmgeia ip!, {r2-r3}
  202         stmgeia ip!, {r2-r3}
  203         stmgeia ip!, {r2-r3}
  204 #endif
  205         bgt     .Lmemset_loop128
  206         RETeq                   /* Zero length so just exit */
  207 
  208         add     r1, r1, #0x80           /* Adjust for extra sub */
  209 
  210         /* Do 32 bytes at a time */
  211 .Lmemset_loop32:
  212         subs    r1, r1, #0x20
  213 #ifdef _ARM_ARCH_5E
  214         strged  r2, [ip], #0x08
  215         strged  r2, [ip], #0x08
  216         strged  r2, [ip], #0x08
  217         strged  r2, [ip], #0x08
  218 #else
  219         stmgeia ip!, {r2-r3}
  220         stmgeia ip!, {r2-r3}
  221         stmgeia ip!, {r2-r3}
  222         stmgeia ip!, {r2-r3}
  223 #endif
  224         bgt     .Lmemset_loop32
  225         RETeq                   /* Zero length so just exit */
  226 
  227         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  228 
  229         /* Deal with 16 bytes or more */
  230 #ifdef _ARM_ARCH_5E
  231         strged  r2, [ip], #0x08
  232         strged  r2, [ip], #0x08
  233 #else
  234         stmgeia ip!, {r2-r3}
  235         stmgeia ip!, {r2-r3}
  236 #endif
  237         RETeq                   /* Zero length so just exit */
  238 
  239         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  240 
  241         /* We have at least 4 bytes so copy as words */
  242 .Lmemset_loop4:
  243         subs    r1, r1, #0x04
  244         strge   r3, [ip], #0x04
  245         bgt     .Lmemset_loop4
  246         RETeq                   /* Zero length so just exit */
  247 
  248 #ifdef _ARM_ARCH_5E
  249         /* Compensate for 64-bit alignment check */
  250         adds    r1, r1, #0x04
  251         RETeq
  252         cmp     r1, #2
  253 #else
  254         cmp     r1, #-2
  255 #endif
  256 
  257         strb    r3, [ip], #0x01         /* Set 1 byte */
  258         strgeb  r3, [ip], #0x01         /* Set another byte */
  259         strgtb  r3, [ip]                /* and a third */
  260         RET                     /* Exit */
  261 
  262 .Lmemset_wordunaligned:
  263         rsb     r2, r2, #0x004
  264         strb    r3, [ip], #0x01         /* Set 1 byte */
  265         cmp     r2, #0x02
  266         strgeb  r3, [ip], #0x01         /* Set another byte */
  267         sub     r1, r1, r2
  268         strgtb  r3, [ip], #0x01         /* and a third */
  269         cmp     r1, #0x04               /* More than 4 bytes left? */
  270         bge     .Lmemset_wordaligned    /* Yup */
  271 
  272 .Lmemset_lessthanfour:
  273         cmp     r1, #0x00
  274         RETeq                   /* Zero length so exit */
  275         strb    r3, [ip], #0x01         /* Set 1 byte */
  276         cmp     r1, #0x02
  277         strgeb  r3, [ip], #0x01         /* Set another byte */
  278         strgtb  r3, [ip]                /* and a third */
  279         RET                     /* Exit */
  280 END(bzero)
  281 END(memset)
  282 
  283 ENTRY(bcmp)
  284         mov     ip, r0
  285         cmp     r2, #0x06
  286         beq     .Lmemcmp_6bytes
  287         mov     r0, #0x00
  288 
  289         /* Are both addresses aligned the same way? */
  290         cmp     r2, #0x00
  291         eornes  r3, ip, r1
  292         RETeq                   /* len == 0, or same addresses! */
  293         tst     r3, #0x03
  294         subne   r2, r2, #0x01
  295         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  296 
  297         /* Word-align the addresses, if necessary */
  298         sub     r3, r1, #0x05
  299         ands    r3, r3, #0x03
  300         add     r3, r3, r3, lsl #1
  301         addne   pc, pc, r3, lsl #3
  302         nop
  303 
  304         /* Compare up to 3 bytes */
  305         ldrb    r0, [ip], #0x01
  306         ldrb    r3, [r1], #0x01
  307         subs    r0, r0, r3
  308         RETne
  309         subs    r2, r2, #0x01
  310         RETeq
  311 
  312         /* Compare up to 2 bytes */
  313         ldrb    r0, [ip], #0x01
  314         ldrb    r3, [r1], #0x01
  315         subs    r0, r0, r3
  316         RETne
  317         subs    r2, r2, #0x01
  318         RETeq
  319 
  320         /* Compare 1 byte */
  321         ldrb    r0, [ip], #0x01
  322         ldrb    r3, [r1], #0x01
  323         subs    r0, r0, r3
  324         RETne
  325         subs    r2, r2, #0x01
  326         RETeq
  327 
  328         /* Compare 4 bytes at a time, if possible */
  329         subs    r2, r2, #0x04
  330         bcc     .Lmemcmp_bytewise
  331 .Lmemcmp_word_aligned:
  332         ldr     r0, [ip], #0x04
  333         ldr     r3, [r1], #0x04
  334         subs    r2, r2, #0x04
  335         cmpcs   r0, r3
  336         beq     .Lmemcmp_word_aligned
  337         sub     r0, r0, r3
  338 
  339         /* Correct for extra subtraction, and check if done */
  340         adds    r2, r2, #0x04
  341         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  342         RETeq                   /* Yup. Just return */
  343 
  344         /* Re-do the final word byte-wise */
  345         sub     ip, ip, #0x04
  346         sub     r1, r1, #0x04
  347 
  348 .Lmemcmp_bytewise:
  349         add     r2, r2, #0x03
  350 .Lmemcmp_bytewise2:
  351         ldrb    r0, [ip], #0x01
  352         ldrb    r3, [r1], #0x01
  353         subs    r2, r2, #0x01
  354         cmpcs   r0, r3
  355         beq     .Lmemcmp_bytewise2
  356         sub     r0, r0, r3
  357         RET
  358 
  359         /*
  360          * 6 byte compares are very common, thanks to the network stack.
  361          * This code is hand-scheduled to reduce the number of stalls for
  362          * load results. Everything else being equal, this will be ~32%
  363          * faster than a byte-wise memcmp.
  364          */
  365         .align  5
  366 .Lmemcmp_6bytes:
  367         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  368         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  369         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  370         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  371         ldreqb  r3, [ip, #0x01]         /* r3 = b1#1 */
  372         RETne                   /* Return if mismatch on #0 */
  373         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  374         ldreqb  r3, [r1, #0x02]         /* r3 = b2#2 */
  375         ldreqb  r0, [ip, #0x02]         /* r0 = b1#2 */
  376         RETne                   /* Return if mismatch on #1 */
  377         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  378         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  379         ldreqb  r3, [ip, #0x03]         /* r3 = b1#3 */
  380         RETne                   /* Return if mismatch on #2 */
  381         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  382         ldreqb  r3, [r1, #0x04]         /* r3 = b2#4 */
  383         ldreqb  r0, [ip, #0x04]         /* r0 = b1#4 */
  384         RETne                   /* Return if mismatch on #3 */
  385         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  386         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  387         ldreqb  r3, [ip, #0x05]         /* r3 = b1#5 */
  388         RETne                   /* Return if mismatch on #4 */
  389         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  390         RET
  391 END(bcmp)
  392 
  393 ENTRY(bcopy)
  394         /* switch the source and destination registers */
  395         eor     r0, r1, r0
  396         eor     r1, r0, r1
  397         eor     r0, r1, r0
  398 ENTRY(memmove)
  399         /* Do the buffers overlap? */
  400         cmp     r0, r1
  401         RETeq           /* Bail now if src/dst are the same */
  402         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  403         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  404         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  405         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  406 
  407         /* Determine copy direction */
  408         cmp     r1, r0
  409         bcc     .Lmemmove_backwards
  410 
  411         moveq   r0, #0                  /* Quick abort for len=0 */
  412         RETeq
  413 
  414         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  415         subs    r2, r2, #4
  416         blt     .Lmemmove_fl4           /* less than 4 bytes */
  417         ands    r12, r0, #3
  418         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  419         ands    r12, r1, #3
  420         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  421 
  422 .Lmemmove_ft8:
  423         /* We have aligned source and destination */
  424         subs    r2, r2, #8
  425         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  426         subs    r2, r2, #0x14
  427         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  428         stmdb   sp!, {r4}               /* borrow r4 */
  429 
  430         /* blat 32 bytes at a time */
  431         /* XXX for really big copies perhaps we should use more registers */
  432 .Lmemmove_floop32:      
  433         ldmia   r1!, {r3, r4, r12, lr}
  434         stmia   r0!, {r3, r4, r12, lr}
  435         ldmia   r1!, {r3, r4, r12, lr}
  436         stmia   r0!, {r3, r4, r12, lr}
  437         subs    r2, r2, #0x20
  438         bge     .Lmemmove_floop32
  439 
  440         cmn     r2, #0x10
  441         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  442         stmgeia r0!, {r3, r4, r12, lr}
  443         subge   r2, r2, #0x10
  444         ldmia   sp!, {r4}               /* return r4 */
  445 
  446 .Lmemmove_fl32:
  447         adds    r2, r2, #0x14
  448 
  449         /* blat 12 bytes at a time */
  450 .Lmemmove_floop12:
  451         ldmgeia r1!, {r3, r12, lr}
  452         stmgeia r0!, {r3, r12, lr}
  453         subges  r2, r2, #0x0c
  454         bge     .Lmemmove_floop12
  455 
  456 .Lmemmove_fl12:
  457         adds    r2, r2, #8
  458         blt     .Lmemmove_fl4
  459 
  460         subs    r2, r2, #4
  461         ldrlt   r3, [r1], #4
  462         strlt   r3, [r0], #4
  463         ldmgeia r1!, {r3, r12}
  464         stmgeia r0!, {r3, r12}
  465         subge   r2, r2, #4
  466 
  467 .Lmemmove_fl4:
  468         /* less than 4 bytes to go */
  469         adds    r2, r2, #4
  470         ldmeqia sp!, {r0, pc}           /* done */
  471 
  472         /* copy the crud byte at a time */
  473         cmp     r2, #2
  474         ldrb    r3, [r1], #1
  475         strb    r3, [r0], #1
  476         ldrgeb  r3, [r1], #1
  477         strgeb  r3, [r0], #1
  478         ldrgtb  r3, [r1], #1
  479         strgtb  r3, [r0], #1
  480         ldmia   sp!, {r0, pc}
  481 
  482         /* erg - unaligned destination */
  483 .Lmemmove_fdestul:
  484         rsb     r12, r12, #4
  485         cmp     r12, #2
  486 
  487         /* align destination with byte copies */
  488         ldrb    r3, [r1], #1
  489         strb    r3, [r0], #1
  490         ldrgeb  r3, [r1], #1
  491         strgeb  r3, [r0], #1
  492         ldrgtb  r3, [r1], #1
  493         strgtb  r3, [r0], #1
  494         subs    r2, r2, r12
  495         blt     .Lmemmove_fl4           /* less the 4 bytes */
  496 
  497         ands    r12, r1, #3
  498         beq     .Lmemmove_ft8           /* we have an aligned source */
  499 
  500         /* erg - unaligned source */
  501         /* This is where it gets nasty ... */
  502 .Lmemmove_fsrcul:
  503         bic     r1, r1, #3
  504         ldr     lr, [r1], #4
  505         cmp     r12, #2
  506         bgt     .Lmemmove_fsrcul3
  507         beq     .Lmemmove_fsrcul2
  508         cmp     r2, #0x0c
  509         blt     .Lmemmove_fsrcul1loop4
  510         sub     r2, r2, #0x0c
  511         stmdb   sp!, {r4, r5}
  512 
  513 .Lmemmove_fsrcul1loop16:
  514 #ifdef __ARMEB__
  515         mov     r3, lr, lsl #8
  516 #else
  517         mov     r3, lr, lsr #8
  518 #endif
  519         ldmia   r1!, {r4, r5, r12, lr}
  520 #ifdef __ARMEB__
  521         orr     r3, r3, r4, lsr #24
  522         mov     r4, r4, lsl #8
  523         orr     r4, r4, r5, lsr #24
  524         mov     r5, r5, lsl #8
  525         orr     r5, r5, r12, lsr #24
  526         mov     r12, r12, lsl #8
  527         orr     r12, r12, lr, lsr #24
  528 #else
  529         orr     r3, r3, r4, lsl #24
  530         mov     r4, r4, lsr #8
  531         orr     r4, r4, r5, lsl #24
  532         mov     r5, r5, lsr #8
  533         orr     r5, r5, r12, lsl #24
  534         mov     r12, r12, lsr #8
  535         orr     r12, r12, lr, lsl #24
  536 #endif
  537         stmia   r0!, {r3-r5, r12}
  538         subs    r2, r2, #0x10
  539         bge     .Lmemmove_fsrcul1loop16
  540         ldmia   sp!, {r4, r5}
  541         adds    r2, r2, #0x0c
  542         blt     .Lmemmove_fsrcul1l4
  543 
  544 .Lmemmove_fsrcul1loop4:
  545 #ifdef __ARMEB__
  546         mov     r12, lr, lsl #8
  547 #else
  548         mov     r12, lr, lsr #8
  549 #endif
  550         ldr     lr, [r1], #4
  551 #ifdef __ARMEB__
  552         orr     r12, r12, lr, lsr #24
  553 #else
  554         orr     r12, r12, lr, lsl #24
  555 #endif
  556         str     r12, [r0], #4
  557         subs    r2, r2, #4
  558         bge     .Lmemmove_fsrcul1loop4
  559 
  560 .Lmemmove_fsrcul1l4:
  561         sub     r1, r1, #3
  562         b       .Lmemmove_fl4
  563 
  564 .Lmemmove_fsrcul2:
  565         cmp     r2, #0x0c
  566         blt     .Lmemmove_fsrcul2loop4
  567         sub     r2, r2, #0x0c
  568         stmdb   sp!, {r4, r5}
  569 
  570 .Lmemmove_fsrcul2loop16:
  571 #ifdef __ARMEB__
  572         mov     r3, lr, lsl #16
  573 #else
  574         mov     r3, lr, lsr #16
  575 #endif
  576         ldmia   r1!, {r4, r5, r12, lr}
  577 #ifdef __ARMEB__
  578         orr     r3, r3, r4, lsr #16
  579         mov     r4, r4, lsl #16
  580         orr     r4, r4, r5, lsr #16
  581         mov     r5, r5, lsl #16
  582         orr     r5, r5, r12, lsr #16
  583         mov     r12, r12, lsl #16
  584         orr     r12, r12, lr, lsr #16
  585 #else
  586         orr     r3, r3, r4, lsl #16
  587         mov     r4, r4, lsr #16
  588         orr     r4, r4, r5, lsl #16
  589         mov     r5, r5, lsr #16
  590         orr     r5, r5, r12, lsl #16
  591         mov     r12, r12, lsr #16
  592         orr     r12, r12, lr, lsl #16
  593 #endif
  594         stmia   r0!, {r3-r5, r12}
  595         subs    r2, r2, #0x10
  596         bge     .Lmemmove_fsrcul2loop16
  597         ldmia   sp!, {r4, r5}
  598         adds    r2, r2, #0x0c
  599         blt     .Lmemmove_fsrcul2l4
  600 
  601 .Lmemmove_fsrcul2loop4:
  602 #ifdef __ARMEB__
  603         mov     r12, lr, lsl #16
  604 #else
  605         mov     r12, lr, lsr #16
  606 #endif
  607         ldr     lr, [r1], #4
  608 #ifdef __ARMEB__
  609         orr     r12, r12, lr, lsr #16
  610 #else
  611         orr     r12, r12, lr, lsl #16
  612 #endif
  613         str     r12, [r0], #4
  614         subs    r2, r2, #4
  615         bge     .Lmemmove_fsrcul2loop4
  616 
  617 .Lmemmove_fsrcul2l4:
  618         sub     r1, r1, #2
  619         b       .Lmemmove_fl4
  620 
  621 .Lmemmove_fsrcul3:
  622         cmp     r2, #0x0c
  623         blt     .Lmemmove_fsrcul3loop4
  624         sub     r2, r2, #0x0c
  625         stmdb   sp!, {r4, r5}
  626 
  627 .Lmemmove_fsrcul3loop16:
  628 #ifdef __ARMEB__
  629         mov     r3, lr, lsl #24
  630 #else
  631         mov     r3, lr, lsr #24
  632 #endif
  633         ldmia   r1!, {r4, r5, r12, lr}
  634 #ifdef __ARMEB__
  635         orr     r3, r3, r4, lsr #8
  636         mov     r4, r4, lsl #24
  637         orr     r4, r4, r5, lsr #8
  638         mov     r5, r5, lsl #24
  639         orr     r5, r5, r12, lsr #8
  640         mov     r12, r12, lsl #24
  641         orr     r12, r12, lr, lsr #8
  642 #else
  643         orr     r3, r3, r4, lsl #8
  644         mov     r4, r4, lsr #24
  645         orr     r4, r4, r5, lsl #8
  646         mov     r5, r5, lsr #24
  647         orr     r5, r5, r12, lsl #8
  648         mov     r12, r12, lsr #24
  649         orr     r12, r12, lr, lsl #8
  650 #endif
  651         stmia   r0!, {r3-r5, r12}
  652         subs    r2, r2, #0x10
  653         bge     .Lmemmove_fsrcul3loop16
  654         ldmia   sp!, {r4, r5}
  655         adds    r2, r2, #0x0c
  656         blt     .Lmemmove_fsrcul3l4
  657 
  658 .Lmemmove_fsrcul3loop4:
  659 #ifdef __ARMEB__
  660         mov     r12, lr, lsl #24
  661 #else
  662         mov     r12, lr, lsr #24
  663 #endif
  664         ldr     lr, [r1], #4
  665 #ifdef __ARMEB__
  666         orr     r12, r12, lr, lsr #8
  667 #else
  668         orr     r12, r12, lr, lsl #8
  669 #endif
  670         str     r12, [r0], #4
  671         subs    r2, r2, #4
  672         bge     .Lmemmove_fsrcul3loop4
  673 
  674 .Lmemmove_fsrcul3l4:
  675         sub     r1, r1, #1
  676         b       .Lmemmove_fl4
  677 
  678 .Lmemmove_backwards:
  679         add     r1, r1, r2
  680         add     r0, r0, r2
  681         subs    r2, r2, #4
  682         blt     .Lmemmove_bl4           /* less than 4 bytes */
  683         ands    r12, r0, #3
  684         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  685         ands    r12, r1, #3
  686         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  687 
  688 .Lmemmove_bt8:
  689         /* We have aligned source and destination */
  690         subs    r2, r2, #8
  691         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  692         stmdb   sp!, {r4, lr}
  693         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  694         blt     .Lmemmove_bl32
  695 
  696         /* blat 32 bytes at a time */
  697         /* XXX for really big copies perhaps we should use more registers */
  698 .Lmemmove_bloop32:
  699         ldmdb   r1!, {r3, r4, r12, lr}
  700         stmdb   r0!, {r3, r4, r12, lr}
  701         ldmdb   r1!, {r3, r4, r12, lr}
  702         stmdb   r0!, {r3, r4, r12, lr}
  703         subs    r2, r2, #0x20
  704         bge     .Lmemmove_bloop32
  705 
  706 .Lmemmove_bl32:
  707         cmn     r2, #0x10
  708         ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  709         stmgedb r0!, {r3, r4, r12, lr}
  710         subge   r2, r2, #0x10
  711         adds    r2, r2, #0x14
  712         ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  713         stmgedb r0!, {r3, r12, lr}
  714         subge   r2, r2, #0x0c
  715         ldmia   sp!, {r4, lr}
  716 
  717 .Lmemmove_bl12:
  718         adds    r2, r2, #8
  719         blt     .Lmemmove_bl4
  720         subs    r2, r2, #4
  721         ldrlt   r3, [r1, #-4]!
  722         strlt   r3, [r0, #-4]!
  723         ldmgedb r1!, {r3, r12}
  724         stmgedb r0!, {r3, r12}
  725         subge   r2, r2, #4
  726 
  727 .Lmemmove_bl4:
  728         /* less than 4 bytes to go */
  729         adds    r2, r2, #4
  730         RETeq                   /* done */
  731 
  732         /* copy the crud byte at a time */
  733         cmp     r2, #2
  734         ldrb    r3, [r1, #-1]!
  735         strb    r3, [r0, #-1]!
  736         ldrgeb  r3, [r1, #-1]!
  737         strgeb  r3, [r0, #-1]!
  738         ldrgtb  r3, [r1, #-1]!
  739         strgtb  r3, [r0, #-1]!
  740         RET
  741 
  742         /* erg - unaligned destination */
  743 .Lmemmove_bdestul:
  744         cmp     r12, #2
  745 
  746         /* align destination with byte copies */
  747         ldrb    r3, [r1, #-1]!
  748         strb    r3, [r0, #-1]!
  749         ldrgeb  r3, [r1, #-1]!
  750         strgeb  r3, [r0, #-1]!
  751         ldrgtb  r3, [r1, #-1]!
  752         strgtb  r3, [r0, #-1]!
  753         subs    r2, r2, r12
  754         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  755         ands    r12, r1, #3
  756         beq     .Lmemmove_bt8           /* we have an aligned source */
  757 
  758         /* erg - unaligned source */
  759         /* This is where it gets nasty ... */
  760 .Lmemmove_bsrcul:
  761         bic     r1, r1, #3
  762         ldr     r3, [r1, #0]
  763         cmp     r12, #2
  764         blt     .Lmemmove_bsrcul1
  765         beq     .Lmemmove_bsrcul2
  766         cmp     r2, #0x0c
  767         blt     .Lmemmove_bsrcul3loop4
  768         sub     r2, r2, #0x0c
  769         stmdb   sp!, {r4, r5, lr}
  770 
  771 .Lmemmove_bsrcul3loop16:
  772 #ifdef __ARMEB__
  773         mov     lr, r3, lsr #8
  774 #else
  775         mov     lr, r3, lsl #8
  776 #endif
  777         ldmdb   r1!, {r3-r5, r12}
  778 #ifdef __ARMEB__
  779         orr     lr, lr, r12, lsl #24
  780         mov     r12, r12, lsr #8
  781         orr     r12, r12, r5, lsl #24
  782         mov     r5, r5, lsr #8
  783         orr     r5, r5, r4, lsl #24
  784         mov     r4, r4, lsr #8
  785         orr     r4, r4, r3, lsl #24
  786 #else
  787         orr     lr, lr, r12, lsr #24
  788         mov     r12, r12, lsl #8
  789         orr     r12, r12, r5, lsr #24
  790         mov     r5, r5, lsl #8
  791         orr     r5, r5, r4, lsr #24
  792         mov     r4, r4, lsl #8
  793         orr     r4, r4, r3, lsr #24
  794 #endif
  795         stmdb   r0!, {r4, r5, r12, lr}
  796         subs    r2, r2, #0x10
  797         bge     .Lmemmove_bsrcul3loop16
  798         ldmia   sp!, {r4, r5, lr}
  799         adds    r2, r2, #0x0c
  800         blt     .Lmemmove_bsrcul3l4
  801 
  802 .Lmemmove_bsrcul3loop4:
  803 #ifdef __ARMEB__
  804         mov     r12, r3, lsr #8
  805 #else
  806         mov     r12, r3, lsl #8
  807 #endif
  808         ldr     r3, [r1, #-4]!
  809 #ifdef __ARMEB__
  810         orr     r12, r12, r3, lsl #24
  811 #else
  812         orr     r12, r12, r3, lsr #24
  813 #endif
  814         str     r12, [r0, #-4]!
  815         subs    r2, r2, #4
  816         bge     .Lmemmove_bsrcul3loop4
  817 
  818 .Lmemmove_bsrcul3l4:
  819         add     r1, r1, #3
  820         b       .Lmemmove_bl4
  821 
  822 .Lmemmove_bsrcul2:
  823         cmp     r2, #0x0c
  824         blt     .Lmemmove_bsrcul2loop4
  825         sub     r2, r2, #0x0c
  826         stmdb   sp!, {r4, r5, lr}
  827 
  828 .Lmemmove_bsrcul2loop16:
  829 #ifdef __ARMEB__
  830         mov     lr, r3, lsr #16
  831 #else
  832         mov     lr, r3, lsl #16
  833 #endif
  834         ldmdb   r1!, {r3-r5, r12}
  835 #ifdef __ARMEB__
  836         orr     lr, lr, r12, lsl #16
  837         mov     r12, r12, lsr #16
  838         orr     r12, r12, r5, lsl #16
  839         mov     r5, r5, lsr #16
  840         orr     r5, r5, r4, lsl #16
  841         mov     r4, r4, lsr #16
  842         orr     r4, r4, r3, lsl #16
  843 #else
  844         orr     lr, lr, r12, lsr #16
  845         mov     r12, r12, lsl #16
  846         orr     r12, r12, r5, lsr #16
  847         mov     r5, r5, lsl #16
  848         orr     r5, r5, r4, lsr #16
  849         mov     r4, r4, lsl #16
  850         orr     r4, r4, r3, lsr #16
  851 #endif
  852         stmdb   r0!, {r4, r5, r12, lr}
  853         subs    r2, r2, #0x10
  854         bge     .Lmemmove_bsrcul2loop16
  855         ldmia   sp!, {r4, r5, lr}
  856         adds    r2, r2, #0x0c
  857         blt     .Lmemmove_bsrcul2l4
  858 
  859 .Lmemmove_bsrcul2loop4:
  860 #ifdef __ARMEB__
  861         mov     r12, r3, lsr #16
  862 #else
  863         mov     r12, r3, lsl #16
  864 #endif
  865         ldr     r3, [r1, #-4]!
  866 #ifdef __ARMEB__
  867         orr     r12, r12, r3, lsl #16
  868 #else
  869         orr     r12, r12, r3, lsr #16
  870 #endif
  871         str     r12, [r0, #-4]!
  872         subs    r2, r2, #4
  873         bge     .Lmemmove_bsrcul2loop4
  874 
  875 .Lmemmove_bsrcul2l4:
  876         add     r1, r1, #2
  877         b       .Lmemmove_bl4
  878 
  879 .Lmemmove_bsrcul1:
  880         cmp     r2, #0x0c
  881         blt     .Lmemmove_bsrcul1loop4
  882         sub     r2, r2, #0x0c
  883         stmdb   sp!, {r4, r5, lr}
  884 
  885 .Lmemmove_bsrcul1loop32:
  886 #ifdef __ARMEB__
  887         mov     lr, r3, lsr #24
  888 #else
  889         mov     lr, r3, lsl #24
  890 #endif
  891         ldmdb   r1!, {r3-r5, r12}
  892 #ifdef __ARMEB__
  893         orr     lr, lr, r12, lsl #8
  894         mov     r12, r12, lsr #24
  895         orr     r12, r12, r5, lsl #8
  896         mov     r5, r5, lsr #24
  897         orr     r5, r5, r4, lsl #8
  898         mov     r4, r4, lsr #24
  899         orr     r4, r4, r3, lsl #8
  900 #else
  901         orr     lr, lr, r12, lsr #8
  902         mov     r12, r12, lsl #24
  903         orr     r12, r12, r5, lsr #8
  904         mov     r5, r5, lsl #24
  905         orr     r5, r5, r4, lsr #8
  906         mov     r4, r4, lsl #24
  907         orr     r4, r4, r3, lsr #8
  908 #endif
  909         stmdb   r0!, {r4, r5, r12, lr}
  910         subs    r2, r2, #0x10
  911         bge     .Lmemmove_bsrcul1loop32
  912         ldmia   sp!, {r4, r5, lr}
  913         adds    r2, r2, #0x0c
  914         blt     .Lmemmove_bsrcul1l4
  915 
  916 .Lmemmove_bsrcul1loop4:
  917 #ifdef __ARMEB__
  918         mov     r12, r3, lsr #24
  919 #else
  920         mov     r12, r3, lsl #24
  921 #endif
  922         ldr     r3, [r1, #-4]!
  923 #ifdef __ARMEB__
  924         orr     r12, r12, r3, lsl #8
  925 #else
  926         orr     r12, r12, r3, lsr #8
  927 #endif
  928         str     r12, [r0, #-4]!
  929         subs    r2, r2, #4
  930         bge     .Lmemmove_bsrcul1loop4
  931 
  932 .Lmemmove_bsrcul1l4:
  933         add     r1, r1, #1
  934         b       .Lmemmove_bl4
  935 END(bcopy)
  936 END(memmove)
  937 
  938 #if !defined(_ARM_ARCH_5E)
  939 ENTRY(memcpy)
  940         /* save leaf functions having to store this away */
  941         /* Do not check arm_memcpy if we're running from flash */
  942 #ifdef FLASHADDR
  943 #if FLASHADDR > PHYSADDR
  944         ldr     r3, =FLASHADDR
  945         cmp     r3, pc
  946         bls     .Lnormal
  947 #else
  948         ldr     r3, =FLASHADDR
  949         cmp     r3, pc
  950         bhi     .Lnormal
  951 #endif
  952 #endif
  953         ldr     r3, .L_arm_memcpy
  954         ldr     r3, [r3]
  955         cmp     r3, #0
  956         beq     .Lnormal
  957         ldr     r3, .L_min_memcpy_size
  958         ldr     r3, [r3]
  959         cmp     r2, r3
  960         blt     .Lnormal
  961         stmfd   sp!, {r0-r2, r4, lr}
  962         mov     r3, #0
  963         ldr     r4, .L_arm_memcpy
  964         mov     lr, pc
  965         ldr     pc, [r4]
  966         cmp     r0, #0
  967         ldmfd   sp!, {r0-r2, r4, lr}
  968         RETeq
  969 
  970 .Lnormal:
  971         stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
  972 
  973         subs    r2, r2, #4
  974         blt     .Lmemcpy_l4             /* less than 4 bytes */
  975         ands    r12, r0, #3
  976         bne     .Lmemcpy_destul         /* oh unaligned destination addr */
  977         ands    r12, r1, #3
  978         bne     .Lmemcpy_srcul          /* oh unaligned source addr */
  979 
  980 .Lmemcpy_t8:
  981         /* We have aligned source and destination */
  982         subs    r2, r2, #8
  983         blt     .Lmemcpy_l12            /* less than 12 bytes (4 from above) */
  984         subs    r2, r2, #0x14
  985         blt     .Lmemcpy_l32            /* less than 32 bytes (12 from above) */
  986         stmdb   sp!, {r4}               /* borrow r4 */
  987 
  988         /* blat 32 bytes at a time */
  989         /* XXX for really big copies perhaps we should use more registers */
  990 .Lmemcpy_loop32:        
  991         ldmia   r1!, {r3, r4, r12, lr}
  992         stmia   r0!, {r3, r4, r12, lr}
  993         ldmia   r1!, {r3, r4, r12, lr}
  994         stmia   r0!, {r3, r4, r12, lr}
  995         subs    r2, r2, #0x20
  996         bge     .Lmemcpy_loop32
  997 
  998         cmn     r2, #0x10
  999         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 1000         stmgeia r0!, {r3, r4, r12, lr}
 1001         subge   r2, r2, #0x10
 1002         ldmia   sp!, {r4}               /* return r4 */
 1003 
 1004 .Lmemcpy_l32:
 1005         adds    r2, r2, #0x14
 1006 
 1007         /* blat 12 bytes at a time */
 1008 .Lmemcpy_loop12:
 1009         ldmgeia r1!, {r3, r12, lr}
 1010         stmgeia r0!, {r3, r12, lr}
 1011         subges  r2, r2, #0x0c
 1012         bge     .Lmemcpy_loop12
 1013 
 1014 .Lmemcpy_l12:
 1015         adds    r2, r2, #8
 1016         blt     .Lmemcpy_l4
 1017 
 1018         subs    r2, r2, #4
 1019         ldrlt   r3, [r1], #4
 1020         strlt   r3, [r0], #4
 1021         ldmgeia r1!, {r3, r12}
 1022         stmgeia r0!, {r3, r12}
 1023         subge   r2, r2, #4
 1024 
 1025 .Lmemcpy_l4:
 1026         /* less than 4 bytes to go */
 1027         adds    r2, r2, #4
 1028 #ifdef __APCS_26_
 1029         ldmeqia sp!, {r0, pc}^          /* done */
 1030 #else
 1031         ldmeqia sp!, {r0, pc}           /* done */
 1032 #endif
 1033         /* copy the crud byte at a time */
 1034         cmp     r2, #2
 1035         ldrb    r3, [r1], #1
 1036         strb    r3, [r0], #1
 1037         ldrgeb  r3, [r1], #1
 1038         strgeb  r3, [r0], #1
 1039         ldrgtb  r3, [r1], #1
 1040         strgtb  r3, [r0], #1
 1041         ldmia   sp!, {r0, pc}
 1042 
 1043         /* erg - unaligned destination */
 1044 .Lmemcpy_destul:
 1045         rsb     r12, r12, #4
 1046         cmp     r12, #2
 1047 
 1048         /* align destination with byte copies */
 1049         ldrb    r3, [r1], #1
 1050         strb    r3, [r0], #1
 1051         ldrgeb  r3, [r1], #1
 1052         strgeb  r3, [r0], #1
 1053         ldrgtb  r3, [r1], #1
 1054         strgtb  r3, [r0], #1
 1055         subs    r2, r2, r12
 1056         blt     .Lmemcpy_l4             /* less the 4 bytes */
 1057 
 1058         ands    r12, r1, #3
 1059         beq     .Lmemcpy_t8             /* we have an aligned source */
 1060 
 1061         /* erg - unaligned source */
 1062         /* This is where it gets nasty ... */
 1063 .Lmemcpy_srcul:
 1064         bic     r1, r1, #3
 1065         ldr     lr, [r1], #4
 1066         cmp     r12, #2
 1067         bgt     .Lmemcpy_srcul3
 1068         beq     .Lmemcpy_srcul2
 1069         cmp     r2, #0x0c
 1070         blt     .Lmemcpy_srcul1loop4
 1071         sub     r2, r2, #0x0c
 1072         stmdb   sp!, {r4, r5}
 1073 
 1074 .Lmemcpy_srcul1loop16:
 1075         mov     r3, lr, lsr #8
 1076         ldmia   r1!, {r4, r5, r12, lr}
 1077         orr     r3, r3, r4, lsl #24
 1078         mov     r4, r4, lsr #8
 1079         orr     r4, r4, r5, lsl #24
 1080         mov     r5, r5, lsr #8
 1081         orr     r5, r5, r12, lsl #24
 1082         mov     r12, r12, lsr #8
 1083         orr     r12, r12, lr, lsl #24
 1084         stmia   r0!, {r3-r5, r12}
 1085         subs    r2, r2, #0x10
 1086         bge     .Lmemcpy_srcul1loop16
 1087         ldmia   sp!, {r4, r5}
 1088         adds    r2, r2, #0x0c
 1089         blt     .Lmemcpy_srcul1l4
 1090 
 1091 .Lmemcpy_srcul1loop4:
 1092         mov     r12, lr, lsr #8
 1093         ldr     lr, [r1], #4
 1094         orr     r12, r12, lr, lsl #24
 1095         str     r12, [r0], #4
 1096         subs    r2, r2, #4
 1097         bge     .Lmemcpy_srcul1loop4
 1098 
 1099 .Lmemcpy_srcul1l4:
 1100         sub     r1, r1, #3
 1101         b       .Lmemcpy_l4
 1102 
 1103 .Lmemcpy_srcul2:
 1104         cmp     r2, #0x0c
 1105         blt     .Lmemcpy_srcul2loop4
 1106         sub     r2, r2, #0x0c
 1107         stmdb   sp!, {r4, r5}
 1108 
 1109 .Lmemcpy_srcul2loop16:
 1110         mov     r3, lr, lsr #16
 1111         ldmia   r1!, {r4, r5, r12, lr}
 1112         orr     r3, r3, r4, lsl #16
 1113         mov     r4, r4, lsr #16
 1114         orr     r4, r4, r5, lsl #16
 1115         mov     r5, r5, lsr #16
 1116         orr     r5, r5, r12, lsl #16
 1117         mov     r12, r12, lsr #16
 1118         orr     r12, r12, lr, lsl #16
 1119         stmia   r0!, {r3-r5, r12}
 1120         subs    r2, r2, #0x10
 1121         bge     .Lmemcpy_srcul2loop16
 1122         ldmia   sp!, {r4, r5}
 1123         adds    r2, r2, #0x0c
 1124         blt     .Lmemcpy_srcul2l4
 1125 
 1126 .Lmemcpy_srcul2loop4:
 1127         mov     r12, lr, lsr #16
 1128         ldr     lr, [r1], #4
 1129         orr     r12, r12, lr, lsl #16
 1130         str     r12, [r0], #4
 1131         subs    r2, r2, #4
 1132         bge     .Lmemcpy_srcul2loop4
 1133 
 1134 .Lmemcpy_srcul2l4:
 1135         sub     r1, r1, #2
 1136         b       .Lmemcpy_l4
 1137 
 1138 .Lmemcpy_srcul3:
 1139         cmp     r2, #0x0c
 1140         blt     .Lmemcpy_srcul3loop4
 1141         sub     r2, r2, #0x0c
 1142         stmdb   sp!, {r4, r5}
 1143 
 1144 .Lmemcpy_srcul3loop16:
 1145         mov     r3, lr, lsr #24
 1146         ldmia   r1!, {r4, r5, r12, lr}
 1147         orr     r3, r3, r4, lsl #8
 1148         mov     r4, r4, lsr #24
 1149         orr     r4, r4, r5, lsl #8
 1150         mov     r5, r5, lsr #24
 1151         orr     r5, r5, r12, lsl #8
 1152         mov     r12, r12, lsr #24
 1153         orr     r12, r12, lr, lsl #8
 1154         stmia   r0!, {r3-r5, r12}
 1155         subs    r2, r2, #0x10
 1156         bge     .Lmemcpy_srcul3loop16
 1157         ldmia   sp!, {r4, r5}
 1158         adds    r2, r2, #0x0c
 1159         blt     .Lmemcpy_srcul3l4
 1160 
 1161 .Lmemcpy_srcul3loop4:
 1162         mov     r12, lr, lsr #24
 1163         ldr     lr, [r1], #4
 1164         orr     r12, r12, lr, lsl #8
 1165         str     r12, [r0], #4
 1166         subs    r2, r2, #4
 1167         bge     .Lmemcpy_srcul3loop4
 1168 
 1169 .Lmemcpy_srcul3l4:
 1170         sub     r1, r1, #1
 1171         b       .Lmemcpy_l4
 1172 END(memcpy)
 1173 
 1174 #else
 1175 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 1176 ENTRY(memcpy)
 1177         pld     [r1]
 1178         cmp     r2, #0x0c
 1179         ble     .Lmemcpy_short          /* <= 12 bytes */
 1180 #ifdef FLASHADDR
 1181 #if FLASHADDR > PHYSADDR
 1182         ldr     r3, =FLASHADDR
 1183         cmp     r3, pc
 1184         bls     .Lnormal
 1185 #else
 1186         ldr     r3, =FLASHADDR
 1187         cmp     r3, pc
 1188         bhi     .Lnormal
 1189 #endif
 1190 #endif
 1191         ldr     r3, .L_arm_memcpy
 1192         ldr     r3, [r3]
 1193         cmp     r3, #0
 1194         beq     .Lnormal
 1195         ldr     r3, .L_min_memcpy_size
 1196         ldr     r3, [r3]
 1197         cmp     r2, r3
 1198         blt     .Lnormal
 1199         stmfd   sp!, {r0-r2, r4, lr}
 1200         mov     r3, #0
 1201         ldr     r4, .L_arm_memcpy
 1202         mov     lr, pc
 1203         ldr     pc, [r4]
 1204         cmp     r0, #0
 1205         ldmfd   sp!, {r0-r2, r4, lr}
 1206         RETeq
 1207 .Lnormal:
 1208         mov     r3, r0                  /* We must not clobber r0 */
 1209 
 1210         /* Word-align the destination buffer */
 1211         ands    ip, r3, #0x03           /* Already word aligned? */
 1212         beq     .Lmemcpy_wordaligned    /* Yup */
 1213         cmp     ip, #0x02
 1214         ldrb    ip, [r1], #0x01
 1215         sub     r2, r2, #0x01
 1216         strb    ip, [r3], #0x01
 1217         ldrleb  ip, [r1], #0x01
 1218         suble   r2, r2, #0x01
 1219         strleb  ip, [r3], #0x01
 1220         ldrltb  ip, [r1], #0x01
 1221         sublt   r2, r2, #0x01
 1222         strltb  ip, [r3], #0x01
 1223 
 1224         /* Destination buffer is now word aligned */
 1225 .Lmemcpy_wordaligned:
 1226         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 1227         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 1228 
 1229         /* Quad-align the destination buffer */
 1230         tst     r3, #0x07               /* Already quad aligned? */
 1231         ldrne   ip, [r1], #0x04
 1232         stmfd   sp!, {r4-r9}            /* Free up some registers */
 1233         subne   r2, r2, #0x04
 1234         strne   ip, [r3], #0x04
 1235 
 1236         /* Destination buffer quad aligned, source is at least word aligned */
 1237         subs    r2, r2, #0x80
 1238         blt     .Lmemcpy_w_lessthan128
 1239 
 1240         /* Copy 128 bytes at a time */
 1241 .Lmemcpy_w_loop128:
 1242         ldr     r4, [r1], #0x04         /* LD:00-03 */
 1243         ldr     r5, [r1], #0x04         /* LD:04-07 */
 1244         pld     [r1, #0x18]             /* Prefetch 0x20 */
 1245         ldr     r6, [r1], #0x04         /* LD:08-0b */
 1246         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 1247         ldr     r8, [r1], #0x04         /* LD:10-13 */
 1248         ldr     r9, [r1], #0x04         /* LD:14-17 */
 1249         strd    r4, [r3], #0x08         /* ST:00-07 */
 1250         ldr     r4, [r1], #0x04         /* LD:18-1b */
 1251         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 1252         strd    r6, [r3], #0x08         /* ST:08-0f */
 1253         ldr     r6, [r1], #0x04         /* LD:20-23 */
 1254         ldr     r7, [r1], #0x04         /* LD:24-27 */
 1255         pld     [r1, #0x18]             /* Prefetch 0x40 */
 1256         strd    r8, [r3], #0x08         /* ST:10-17 */
 1257         ldr     r8, [r1], #0x04         /* LD:28-2b */
 1258         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 1259         strd    r4, [r3], #0x08         /* ST:18-1f */
 1260         ldr     r4, [r1], #0x04         /* LD:30-33 */
 1261         ldr     r5, [r1], #0x04         /* LD:34-37 */
 1262         strd    r6, [r3], #0x08         /* ST:20-27 */
 1263         ldr     r6, [r1], #0x04         /* LD:38-3b */
 1264         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 1265         strd    r8, [r3], #0x08         /* ST:28-2f */
 1266         ldr     r8, [r1], #0x04         /* LD:40-43 */
 1267         ldr     r9, [r1], #0x04         /* LD:44-47 */
 1268         pld     [r1, #0x18]             /* Prefetch 0x60 */
 1269         strd    r4, [r3], #0x08         /* ST:30-37 */
 1270         ldr     r4, [r1], #0x04         /* LD:48-4b */
 1271         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 1272         strd    r6, [r3], #0x08         /* ST:38-3f */
 1273         ldr     r6, [r1], #0x04         /* LD:50-53 */
 1274         ldr     r7, [r1], #0x04         /* LD:54-57 */
 1275         strd    r8, [r3], #0x08         /* ST:40-47 */
 1276         ldr     r8, [r1], #0x04         /* LD:58-5b */
 1277         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 1278         strd    r4, [r3], #0x08         /* ST:48-4f */
 1279         ldr     r4, [r1], #0x04         /* LD:60-63 */
 1280         ldr     r5, [r1], #0x04         /* LD:64-67 */
 1281         pld     [r1, #0x18]             /* Prefetch 0x80 */
 1282         strd    r6, [r3], #0x08         /* ST:50-57 */
 1283         ldr     r6, [r1], #0x04         /* LD:68-6b */
 1284         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 1285         strd    r8, [r3], #0x08         /* ST:58-5f */
 1286         ldr     r8, [r1], #0x04         /* LD:70-73 */
 1287         ldr     r9, [r1], #0x04         /* LD:74-77 */
 1288         strd    r4, [r3], #0x08         /* ST:60-67 */
 1289         ldr     r4, [r1], #0x04         /* LD:78-7b */
 1290         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 1291         strd    r6, [r3], #0x08         /* ST:68-6f */
 1292         strd    r8, [r3], #0x08         /* ST:70-77 */
 1293         subs    r2, r2, #0x80
 1294         strd    r4, [r3], #0x08         /* ST:78-7f */
 1295         bge     .Lmemcpy_w_loop128
 1296 
 1297 .Lmemcpy_w_lessthan128:
 1298         adds    r2, r2, #0x80           /* Adjust for extra sub */
 1299         ldmeqfd sp!, {r4-r9}
 1300         RETeq                   /* Return now if done */
 1301         subs    r2, r2, #0x20
 1302         blt     .Lmemcpy_w_lessthan32
 1303 
 1304         /* Copy 32 bytes at a time */
 1305 .Lmemcpy_w_loop32:
 1306         ldr     r4, [r1], #0x04
 1307         ldr     r5, [r1], #0x04
 1308         pld     [r1, #0x18]
 1309         ldr     r6, [r1], #0x04
 1310         ldr     r7, [r1], #0x04
 1311         ldr     r8, [r1], #0x04
 1312         ldr     r9, [r1], #0x04
 1313         strd    r4, [r3], #0x08
 1314         ldr     r4, [r1], #0x04
 1315         ldr     r5, [r1], #0x04
 1316         strd    r6, [r3], #0x08
 1317         strd    r8, [r3], #0x08
 1318         subs    r2, r2, #0x20
 1319         strd    r4, [r3], #0x08
 1320         bge     .Lmemcpy_w_loop32
 1321 
 1322 .Lmemcpy_w_lessthan32:
 1323         adds    r2, r2, #0x20           /* Adjust for extra sub */
 1324         ldmeqfd sp!, {r4-r9}
 1325         RETeq                   /* Return now if done */
 1326 
 1327         and     r4, r2, #0x18
 1328         rsbs    r4, r4, #0x18
 1329         addne   pc, pc, r4, lsl #1
 1330         nop
 1331 
 1332         /* At least 24 bytes remaining */
 1333         ldr     r4, [r1], #0x04
 1334         ldr     r5, [r1], #0x04
 1335         sub     r2, r2, #0x08
 1336         strd    r4, [r3], #0x08
 1337 
 1338         /* At least 16 bytes remaining */
 1339         ldr     r4, [r1], #0x04
 1340         ldr     r5, [r1], #0x04
 1341         sub     r2, r2, #0x08
 1342         strd    r4, [r3], #0x08
 1343 
 1344         /* At least 8 bytes remaining */
 1345         ldr     r4, [r1], #0x04
 1346         ldr     r5, [r1], #0x04
 1347         subs    r2, r2, #0x08
 1348         strd    r4, [r3], #0x08
 1349 
 1350         /* Less than 8 bytes remaining */
 1351         ldmfd   sp!, {r4-r9}
 1352         RETeq                   /* Return now if done */
 1353         subs    r2, r2, #0x04
 1354         ldrge   ip, [r1], #0x04
 1355         strge   ip, [r3], #0x04
 1356         RETeq                   /* Return now if done */
 1357         addlt   r2, r2, #0x04
 1358         ldrb    ip, [r1], #0x01
 1359         cmp     r2, #0x02
 1360         ldrgeb  r2, [r1], #0x01
 1361         strb    ip, [r3], #0x01
 1362         ldrgtb  ip, [r1]
 1363         strgeb  r2, [r3], #0x01
 1364         strgtb  ip, [r3]
 1365         RET
 1366 
 1367 
 1368 /*
 1369  * At this point, it has not been possible to word align both buffers.
 1370  * The destination buffer is word aligned, but the source buffer is not.
 1371  */
 1372 .Lmemcpy_bad_align:
 1373         stmfd   sp!, {r4-r7}
 1374         bic     r1, r1, #0x03
 1375         cmp     ip, #2
 1376         ldr     ip, [r1], #0x04
 1377         bgt     .Lmemcpy_bad3
 1378         beq     .Lmemcpy_bad2
 1379         b       .Lmemcpy_bad1
 1380 
 1381 .Lmemcpy_bad1_loop16:
 1382 #ifdef __ARMEB__
 1383         mov     r4, ip, lsl #8
 1384 #else
 1385         mov     r4, ip, lsr #8
 1386 #endif
 1387         ldr     r5, [r1], #0x04
 1388         pld     [r1, #0x018]
 1389         ldr     r6, [r1], #0x04
 1390         ldr     r7, [r1], #0x04
 1391         ldr     ip, [r1], #0x04
 1392 #ifdef __ARMEB__
 1393         orr     r4, r4, r5, lsr #24
 1394         mov     r5, r5, lsl #8
 1395         orr     r5, r5, r6, lsr #24
 1396         mov     r6, r6, lsl #8
 1397         orr     r6, r6, r7, lsr #24
 1398         mov     r7, r7, lsl #8
 1399         orr     r7, r7, ip, lsr #24
 1400 #else
 1401         orr     r4, r4, r5, lsl #24
 1402         mov     r5, r5, lsr #8
 1403         orr     r5, r5, r6, lsl #24
 1404         mov     r6, r6, lsr #8
 1405         orr     r6, r6, r7, lsl #24
 1406         mov     r7, r7, lsr #8
 1407         orr     r7, r7, ip, lsl #24
 1408 #endif
 1409         str     r4, [r3], #0x04
 1410         str     r5, [r3], #0x04
 1411         str     r6, [r3], #0x04
 1412         str     r7, [r3], #0x04
 1413 .Lmemcpy_bad1:
 1414         subs    r2, r2, #0x10
 1415         bge     .Lmemcpy_bad1_loop16
 1416 
 1417         adds    r2, r2, #0x10
 1418         ldmeqfd sp!, {r4-r7}
 1419         RETeq                   /* Return now if done */
 1420         subs    r2, r2, #0x04
 1421         sublt   r1, r1, #0x03
 1422         blt     .Lmemcpy_bad_done
 1423 
 1424 .Lmemcpy_bad1_loop4:
 1425 #ifdef __ARMEB__
 1426         mov     r4, ip, lsl #8
 1427 #else
 1428         mov     r4, ip, lsr #8
 1429 #endif
 1430         ldr     ip, [r1], #0x04
 1431         subs    r2, r2, #0x04
 1432 #ifdef __ARMEB__
 1433         orr     r4, r4, ip, lsr #24
 1434 #else
 1435         orr     r4, r4, ip, lsl #24
 1436 #endif
 1437         str     r4, [r3], #0x04
 1438         bge     .Lmemcpy_bad1_loop4
 1439         sub     r1, r1, #0x03
 1440         b       .Lmemcpy_bad_done
 1441 
 1442 .Lmemcpy_bad2_loop16:
 1443 #ifdef __ARMEB__
 1444         mov     r4, ip, lsl #16
 1445 #else
 1446         mov     r4, ip, lsr #16
 1447 #endif
 1448         ldr     r5, [r1], #0x04
 1449         pld     [r1, #0x018]
 1450         ldr     r6, [r1], #0x04
 1451         ldr     r7, [r1], #0x04
 1452         ldr     ip, [r1], #0x04
 1453 #ifdef __ARMEB__
 1454         orr     r4, r4, r5, lsr #16
 1455         mov     r5, r5, lsl #16
 1456         orr     r5, r5, r6, lsr #16
 1457         mov     r6, r6, lsl #16
 1458         orr     r6, r6, r7, lsr #16
 1459         mov     r7, r7, lsl #16
 1460         orr     r7, r7, ip, lsr #16
 1461 #else
 1462         orr     r4, r4, r5, lsl #16
 1463         mov     r5, r5, lsr #16
 1464         orr     r5, r5, r6, lsl #16
 1465         mov     r6, r6, lsr #16
 1466         orr     r6, r6, r7, lsl #16
 1467         mov     r7, r7, lsr #16
 1468         orr     r7, r7, ip, lsl #16
 1469 #endif
 1470         str     r4, [r3], #0x04
 1471         str     r5, [r3], #0x04
 1472         str     r6, [r3], #0x04
 1473         str     r7, [r3], #0x04
 1474 .Lmemcpy_bad2:
 1475         subs    r2, r2, #0x10
 1476         bge     .Lmemcpy_bad2_loop16
 1477 
 1478         adds    r2, r2, #0x10
 1479         ldmeqfd sp!, {r4-r7}
 1480         RETeq                   /* Return now if done */
 1481         subs    r2, r2, #0x04
 1482         sublt   r1, r1, #0x02
 1483         blt     .Lmemcpy_bad_done
 1484 
 1485 .Lmemcpy_bad2_loop4:
 1486 #ifdef __ARMEB__
 1487         mov     r4, ip, lsl #16
 1488 #else
 1489         mov     r4, ip, lsr #16
 1490 #endif
 1491         ldr     ip, [r1], #0x04
 1492         subs    r2, r2, #0x04
 1493 #ifdef __ARMEB__
 1494         orr     r4, r4, ip, lsr #16
 1495 #else
 1496         orr     r4, r4, ip, lsl #16
 1497 #endif
 1498         str     r4, [r3], #0x04
 1499         bge     .Lmemcpy_bad2_loop4
 1500         sub     r1, r1, #0x02
 1501         b       .Lmemcpy_bad_done
 1502 
 1503 .Lmemcpy_bad3_loop16:
 1504 #ifdef __ARMEB__
 1505         mov     r4, ip, lsl #24
 1506 #else
 1507         mov     r4, ip, lsr #24
 1508 #endif
 1509         ldr     r5, [r1], #0x04
 1510         pld     [r1, #0x018]
 1511         ldr     r6, [r1], #0x04
 1512         ldr     r7, [r1], #0x04
 1513         ldr     ip, [r1], #0x04
 1514 #ifdef __ARMEB__
 1515         orr     r4, r4, r5, lsr #8
 1516         mov     r5, r5, lsl #24
 1517         orr     r5, r5, r6, lsr #8
 1518         mov     r6, r6, lsl #24
 1519         orr     r6, r6, r7, lsr #8
 1520         mov     r7, r7, lsl #24
 1521         orr     r7, r7, ip, lsr #8
 1522 #else
 1523         orr     r4, r4, r5, lsl #8
 1524         mov     r5, r5, lsr #24
 1525         orr     r5, r5, r6, lsl #8
 1526         mov     r6, r6, lsr #24
 1527         orr     r6, r6, r7, lsl #8
 1528         mov     r7, r7, lsr #24
 1529         orr     r7, r7, ip, lsl #8
 1530 #endif
 1531         str     r4, [r3], #0x04
 1532         str     r5, [r3], #0x04
 1533         str     r6, [r3], #0x04
 1534         str     r7, [r3], #0x04
 1535 .Lmemcpy_bad3:
 1536         subs    r2, r2, #0x10
 1537         bge     .Lmemcpy_bad3_loop16
 1538 
 1539         adds    r2, r2, #0x10
 1540         ldmeqfd sp!, {r4-r7}
 1541         RETeq                   /* Return now if done */
 1542         subs    r2, r2, #0x04
 1543         sublt   r1, r1, #0x01
 1544         blt     .Lmemcpy_bad_done
 1545 
 1546 .Lmemcpy_bad3_loop4:
 1547 #ifdef __ARMEB__
 1548         mov     r4, ip, lsl #24
 1549 #else
 1550         mov     r4, ip, lsr #24
 1551 #endif
 1552         ldr     ip, [r1], #0x04
 1553         subs    r2, r2, #0x04
 1554 #ifdef __ARMEB__
 1555         orr     r4, r4, ip, lsr #8
 1556 #else
 1557         orr     r4, r4, ip, lsl #8
 1558 #endif
 1559         str     r4, [r3], #0x04
 1560         bge     .Lmemcpy_bad3_loop4
 1561         sub     r1, r1, #0x01
 1562 
 1563 .Lmemcpy_bad_done:
 1564         ldmfd   sp!, {r4-r7}
 1565         adds    r2, r2, #0x04
 1566         RETeq
 1567         ldrb    ip, [r1], #0x01
 1568         cmp     r2, #0x02
 1569         ldrgeb  r2, [r1], #0x01
 1570         strb    ip, [r3], #0x01
 1571         ldrgtb  ip, [r1]
 1572         strgeb  r2, [r3], #0x01
 1573         strgtb  ip, [r3]
 1574         RET
 1575 
 1576 
 1577 /*
 1578  * Handle short copies (less than 16 bytes), possibly misaligned.
 1579  * Some of these are *very* common, thanks to the network stack,
 1580  * and so are handled specially.
 1581  */
 1582 .Lmemcpy_short:
 1583         add     pc, pc, r2, lsl #2
 1584         nop
 1585         RET                     /* 0x00 */
 1586         b       .Lmemcpy_bytewise       /* 0x01 */
 1587         b       .Lmemcpy_bytewise       /* 0x02 */
 1588         b       .Lmemcpy_bytewise       /* 0x03 */
 1589         b       .Lmemcpy_4              /* 0x04 */
 1590         b       .Lmemcpy_bytewise       /* 0x05 */
 1591         b       .Lmemcpy_6              /* 0x06 */
 1592         b       .Lmemcpy_bytewise       /* 0x07 */
 1593         b       .Lmemcpy_8              /* 0x08 */
 1594         b       .Lmemcpy_bytewise       /* 0x09 */
 1595         b       .Lmemcpy_bytewise       /* 0x0a */
 1596         b       .Lmemcpy_bytewise       /* 0x0b */
 1597         b       .Lmemcpy_c              /* 0x0c */
 1598 .Lmemcpy_bytewise:
 1599         mov     r3, r0                  /* We must not clobber r0 */
 1600         ldrb    ip, [r1], #0x01
 1601 1:      subs    r2, r2, #0x01
 1602         strb    ip, [r3], #0x01
 1603         ldrneb  ip, [r1], #0x01
 1604         bne     1b
 1605         RET
 1606 
 1607 /******************************************************************************
 1608  * Special case for 4 byte copies
 1609  */
 1610 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1611 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1612         LMEMCPY_4_PAD
 1613 .Lmemcpy_4:
 1614         and     r2, r1, #0x03
 1615         orr     r2, r2, r0, lsl #2
 1616         ands    r2, r2, #0x0f
 1617         sub     r3, pc, #0x14
 1618         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1619 
 1620 /*
 1621  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1622  */
 1623         ldr     r2, [r1]
 1624         str     r2, [r0]
 1625         RET
 1626         LMEMCPY_4_PAD
 1627 
 1628 /*
 1629  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1630  */
 1631         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1632         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1633 #ifdef __ARMEB__
 1634         mov     r3, r3, lsl #8          /* r3 = 012. */
 1635         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 1636 #else
 1637         mov     r3, r3, lsr #8          /* r3 = .210 */
 1638         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1639 #endif
 1640         str     r3, [r0]
 1641         RET
 1642         LMEMCPY_4_PAD
 1643 
 1644 /*
 1645  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1646  */
 1647 #ifdef __ARMEB__
 1648         ldrh    r3, [r1]
 1649         ldrh    r2, [r1, #0x02]
 1650 #else
 1651         ldrh    r3, [r1, #0x02]
 1652         ldrh    r2, [r1]
 1653 #endif
 1654         orr     r3, r2, r3, lsl #16
 1655         str     r3, [r0]
 1656         RET
 1657         LMEMCPY_4_PAD
 1658 
 1659 /*
 1660  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1661  */
 1662         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1663         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1664 #ifdef __ARMEB__
 1665         mov     r3, r3, lsl #24         /* r3 = 0... */
 1666         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 1667 #else
 1668         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1669         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1670 #endif
 1671         str     r3, [r0]
 1672         RET
 1673         LMEMCPY_4_PAD
 1674 
 1675 /*
 1676  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1677  */
 1678         ldr     r2, [r1]
 1679 #ifdef __ARMEB__
 1680         strb    r2, [r0, #0x03]
 1681         mov     r3, r2, lsr #8
 1682         mov     r1, r2, lsr #24
 1683         strb    r1, [r0]
 1684 #else
 1685         strb    r2, [r0]
 1686         mov     r3, r2, lsr #8
 1687         mov     r1, r2, lsr #24
 1688         strb    r1, [r0, #0x03]
 1689 #endif
 1690         strh    r3, [r0, #0x01]
 1691         RET
 1692         LMEMCPY_4_PAD
 1693 
 1694 /*
 1695  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1696  */
 1697         ldrb    r2, [r1]
 1698         ldrh    r3, [r1, #0x01]
 1699         ldrb    r1, [r1, #0x03]
 1700         strb    r2, [r0]
 1701         strh    r3, [r0, #0x01]
 1702         strb    r1, [r0, #0x03]
 1703         RET
 1704         LMEMCPY_4_PAD
 1705 
 1706 /*
 1707  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1708  */
 1709         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1710         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1711 #ifdef __ARMEB__
 1712         mov     r1, r2, lsr #8          /* r1 = ...0 */
 1713         strb    r1, [r0]
 1714         mov     r2, r2, lsl #8          /* r2 = .01. */
 1715         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
 1716 #else
 1717         strb    r2, [r0]
 1718         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1719         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1720         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1721 #endif
 1722         strh    r2, [r0, #0x01]
 1723         strb    r3, [r0, #0x03]
 1724         RET
 1725         LMEMCPY_4_PAD
 1726 
 1727 /*
 1728  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1729  */
 1730         ldrb    r2, [r1]
 1731         ldrh    r3, [r1, #0x01]
 1732         ldrb    r1, [r1, #0x03]
 1733         strb    r2, [r0]
 1734         strh    r3, [r0, #0x01]
 1735         strb    r1, [r0, #0x03]
 1736         RET
 1737         LMEMCPY_4_PAD
 1738 
 1739 /*
 1740  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1741  */
 1742         ldr     r2, [r1]
 1743 #ifdef __ARMEB__
 1744         strh    r2, [r0, #0x02]
 1745         mov     r3, r2, lsr #16
 1746         strh    r3, [r0]
 1747 #else
 1748         strh    r2, [r0]
 1749         mov     r3, r2, lsr #16
 1750         strh    r3, [r0, #0x02]
 1751 #endif
 1752         RET
 1753         LMEMCPY_4_PAD
 1754 
 1755 /*
 1756  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1757  */
 1758         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1759         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1760         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1761         strh    r1, [r0]
 1762 #ifdef __ARMEB__
 1763         mov     r2, r2, lsl #8          /* r2 = 012. */
 1764         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1765 #else
 1766         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1767         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1768 #endif
 1769         strh    r2, [r0, #0x02]
 1770         RET
 1771         LMEMCPY_4_PAD
 1772 
 1773 /*
 1774  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1775  */
 1776         ldrh    r2, [r1]
 1777         ldrh    r3, [r1, #0x02]
 1778         strh    r2, [r0]
 1779         strh    r3, [r0, #0x02]
 1780         RET
 1781         LMEMCPY_4_PAD
 1782 
 1783 /*
 1784  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1785  */
 1786         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1787         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1788         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1789         strh    r1, [r0, #0x02]
 1790 #ifdef __ARMEB__
 1791         mov     r3, r3, lsr #24         /* r3 = ...1 */
 1792         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
 1793 #else
 1794         mov     r3, r3, lsl #8          /* r3 = 321. */
 1795         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1796 #endif
 1797         strh    r3, [r0]
 1798         RET
 1799         LMEMCPY_4_PAD
 1800 
 1801 /*
 1802  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1803  */
 1804         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1805 #ifdef __ARMEB__
 1806         strb    r2, [r0, #0x03]
 1807         mov     r3, r2, lsr #8
 1808         mov     r1, r2, lsr #24
 1809         strh    r3, [r0, #0x01]
 1810         strb    r1, [r0]
 1811 #else
 1812         strb    r2, [r0]
 1813         mov     r3, r2, lsr #8
 1814         mov     r1, r2, lsr #24
 1815         strh    r3, [r0, #0x01]
 1816         strb    r1, [r0, #0x03]
 1817 #endif
 1818         RET
 1819         LMEMCPY_4_PAD
 1820 
 1821 /*
 1822  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1823  */
 1824         ldrb    r2, [r1]
 1825         ldrh    r3, [r1, #0x01]
 1826         ldrb    r1, [r1, #0x03]
 1827         strb    r2, [r0]
 1828         strh    r3, [r0, #0x01]
 1829         strb    r1, [r0, #0x03]
 1830         RET
 1831         LMEMCPY_4_PAD
 1832 
 1833 /*
 1834  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1835  */
 1836 #ifdef __ARMEB__
 1837         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1838         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1839         strb    r3, [r0, #0x03]
 1840         mov     r3, r3, lsr #8          /* r3 = ...2 */
 1841         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
 1842         strh    r3, [r0, #0x01]
 1843         mov     r2, r2, lsr #8          /* r2 = ...0 */
 1844         strb    r2, [r0]
 1845 #else
 1846         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1847         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1848         strb    r2, [r0]
 1849         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1850         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1851         strh    r2, [r0, #0x01]
 1852         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1853         strb    r3, [r0, #0x03]
 1854 #endif
 1855         RET
 1856         LMEMCPY_4_PAD
 1857 
 1858 /*
 1859  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1860  */
 1861         ldrb    r2, [r1]
 1862         ldrh    r3, [r1, #0x01]
 1863         ldrb    r1, [r1, #0x03]
 1864         strb    r2, [r0]
 1865         strh    r3, [r0, #0x01]
 1866         strb    r1, [r0, #0x03]
 1867         RET
 1868         LMEMCPY_4_PAD
 1869 
 1870 
 1871 /******************************************************************************
 1872  * Special case for 6 byte copies
 1873  */
 1874 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1875 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1876         LMEMCPY_6_PAD
 1877 .Lmemcpy_6:
 1878         and     r2, r1, #0x03
 1879         orr     r2, r2, r0, lsl #2
 1880         ands    r2, r2, #0x0f
 1881         sub     r3, pc, #0x14
 1882         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1883 
 1884 /*
 1885  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1886  */
 1887         ldr     r2, [r1]
 1888         ldrh    r3, [r1, #0x04]
 1889         str     r2, [r0]
 1890         strh    r3, [r0, #0x04]
 1891         RET
 1892         LMEMCPY_6_PAD
 1893 
 1894 /*
 1895  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1896  */
 1897         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1898         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1899 #ifdef __ARMEB__
 1900         mov     r2, r2, lsl #8          /* r2 = 012. */
 1901         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1902 #else
 1903         mov     r2, r2, lsr #8          /* r2 = .210 */
 1904         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1905 #endif
 1906         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1907         str     r2, [r0]
 1908         strh    r3, [r0, #0x04]
 1909         RET
 1910         LMEMCPY_6_PAD
 1911 
 1912 /*
 1913  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1914  */
 1915         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1916         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1917 #ifdef __ARMEB__
 1918         mov     r1, r3, lsr #16         /* r1 = ..23 */
 1919         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
 1920         str     r1, [r0]
 1921         strh    r3, [r0, #0x04]
 1922 #else
 1923         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1924         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1925         str     r2, [r0]
 1926         strh    r1, [r0, #0x04]
 1927 #endif
 1928         RET
 1929         LMEMCPY_6_PAD
 1930 
 1931 /*
 1932  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1933  */
 1934         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1935         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1936         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1937 #ifdef __ARMEB__
 1938         mov     r2, r2, lsl #24         /* r2 = 0... */
 1939         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 1940         mov     r3, r3, lsl #8          /* r3 = 234. */
 1941         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
 1942 #else
 1943         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1944         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1945         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1946         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1947 #endif
 1948         str     r2, [r0]
 1949         strh    r1, [r0, #0x04]
 1950         RET
 1951         LMEMCPY_6_PAD
 1952 
 1953 /*
 1954  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1955  */
 1956         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1957         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1958         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1959         strh    r1, [r0, #0x01]
 1960 #ifdef __ARMEB__
 1961         mov     r1, r3, lsr #24         /* r1 = ...0 */
 1962         strb    r1, [r0]
 1963         mov     r3, r3, lsl #8          /* r3 = 123. */
 1964         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
 1965 #else
 1966         strb    r3, [r0]
 1967         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1968         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1969         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1970 #endif
 1971         strh    r3, [r0, #0x03]
 1972         strb    r2, [r0, #0x05]
 1973         RET
 1974         LMEMCPY_6_PAD
 1975 
 1976 /*
 1977  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1978  */
 1979         ldrb    r2, [r1]
 1980         ldrh    r3, [r1, #0x01]
 1981         ldrh    ip, [r1, #0x03]
 1982         ldrb    r1, [r1, #0x05]
 1983         strb    r2, [r0]
 1984         strh    r3, [r0, #0x01]
 1985         strh    ip, [r0, #0x03]
 1986         strb    r1, [r0, #0x05]
 1987         RET
 1988         LMEMCPY_6_PAD
 1989 
 1990 /*
 1991  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1992  */
 1993         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1994         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1995 #ifdef __ARMEB__
 1996         mov     r3, r2, lsr #8          /* r3 = ...0 */
 1997         strb    r3, [r0]
 1998         strb    r1, [r0, #0x05]
 1999         mov     r3, r1, lsr #8          /* r3 = .234 */
 2000         strh    r3, [r0, #0x03]
 2001         mov     r3, r2, lsl #8          /* r3 = .01. */
 2002         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
 2003         strh    r3, [r0, #0x01]
 2004 #else
 2005         strb    r2, [r0]
 2006         mov     r3, r1, lsr #24
 2007         strb    r3, [r0, #0x05]
 2008         mov     r3, r1, lsr #8          /* r3 = .543 */
 2009         strh    r3, [r0, #0x03]
 2010         mov     r3, r2, lsr #8          /* r3 = ...1 */
 2011         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 2012         strh    r3, [r0, #0x01]
 2013 #endif
 2014         RET
 2015         LMEMCPY_6_PAD
 2016 
 2017 /*
 2018  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2019  */
 2020         ldrb    r2, [r1]
 2021         ldrh    r3, [r1, #0x01]
 2022         ldrh    ip, [r1, #0x03]
 2023         ldrb    r1, [r1, #0x05]
 2024         strb    r2, [r0]
 2025         strh    r3, [r0, #0x01]
 2026         strh    ip, [r0, #0x03]
 2027         strb    r1, [r0, #0x05]
 2028         RET
 2029         LMEMCPY_6_PAD
 2030 
 2031 /*
 2032  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2033  */
 2034 #ifdef __ARMEB__
 2035         ldr     r2, [r1]                /* r2 = 0123 */
 2036         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
 2037         mov     r1, r2, lsr #16         /* r1 = ..01 */
 2038         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
 2039         strh    r1, [r0]
 2040         str     r3, [r0, #0x02]
 2041 #else
 2042         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 2043         ldr     r3, [r1]                /* r3 = 3210 */
 2044         mov     r2, r2, lsl #16         /* r2 = 54.. */
 2045         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 2046         strh    r3, [r0]
 2047         str     r2, [r0, #0x02]
 2048 #endif
 2049         RET
 2050         LMEMCPY_6_PAD
 2051 
 2052 /*
 2053  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2054  */
 2055         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2056         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 2057         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2058 #ifdef __ARMEB__
 2059         mov     r2, r2, lsr #8          /* r2 = .345 */
 2060         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
 2061 #else
 2062         mov     r2, r2, lsl #8          /* r2 = 543. */
 2063         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 2064 #endif
 2065         strh    r1, [r0]
 2066         str     r2, [r0, #0x02]
 2067         RET
 2068         LMEMCPY_6_PAD
 2069 
 2070 /*
 2071  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2072  */
 2073         ldrh    r2, [r1]
 2074         ldr     r3, [r1, #0x02]
 2075         strh    r2, [r0]
 2076         str     r3, [r0, #0x02]
 2077         RET
 2078         LMEMCPY_6_PAD
 2079 
 2080 /*
 2081  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2082  */
 2083         ldrb    r3, [r1]                /* r3 = ...0 */
 2084         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2085         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 2086 #ifdef __ARMEB__
 2087         mov     r3, r3, lsl #8          /* r3 = ..0. */
 2088         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
 2089         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
 2090 #else
 2091         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2092         mov     r1, r1, lsl #24         /* r1 = 5... */
 2093         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 2094 #endif
 2095         strh    r3, [r0]
 2096         str     r1, [r0, #0x02]
 2097         RET
 2098         LMEMCPY_6_PAD
 2099 
 2100 /*
 2101  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2102  */
 2103         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2104         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 2105 #ifdef __ARMEB__
 2106         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2107         strb    r3, [r0]
 2108         mov     r2, r2, lsl #8          /* r2 = 123. */
 2109         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2110 #else
 2111         strb    r2, [r0]
 2112         mov     r2, r2, lsr #8          /* r2 = .321 */
 2113         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 2114         mov     r1, r1, lsr #8          /* r1 = ...5 */
 2115 #endif
 2116         str     r2, [r0, #0x01]
 2117         strb    r1, [r0, #0x05]
 2118         RET
 2119         LMEMCPY_6_PAD
 2120 
 2121 /*
 2122  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2123  */
 2124         ldrb    r2, [r1]
 2125         ldrh    r3, [r1, #0x01]
 2126         ldrh    ip, [r1, #0x03]
 2127         ldrb    r1, [r1, #0x05]
 2128         strb    r2, [r0]
 2129         strh    r3, [r0, #0x01]
 2130         strh    ip, [r0, #0x03]
 2131         strb    r1, [r0, #0x05]
 2132         RET
 2133         LMEMCPY_6_PAD
 2134 
 2135 /*
 2136  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2137  */
 2138         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2139         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 2140 #ifdef __ARMEB__
 2141         mov     r3, r2, lsr #8          /* r3 = ...0 */
 2142         strb    r3, [r0]
 2143         mov     r2, r2, lsl #24         /* r2 = 1... */
 2144         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2145 #else
 2146         strb    r2, [r0]
 2147         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2148         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 2149         mov     r1, r1, lsr #24         /* r1 = ...5 */
 2150 #endif
 2151         str     r2, [r0, #0x01]
 2152         strb    r1, [r0, #0x05]
 2153         RET
 2154         LMEMCPY_6_PAD
 2155 
 2156 /*
 2157  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2158  */
 2159         ldrb    r2, [r1]
 2160         ldr     r3, [r1, #0x01]
 2161         ldrb    r1, [r1, #0x05]
 2162         strb    r2, [r0]
 2163         str     r3, [r0, #0x01]
 2164         strb    r1, [r0, #0x05]
 2165         RET
 2166         LMEMCPY_6_PAD
 2167 
 2168 
 2169 /******************************************************************************
 2170  * Special case for 8 byte copies
 2171  */
 2172 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 2173 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 2174         LMEMCPY_8_PAD
 2175 .Lmemcpy_8:
 2176         and     r2, r1, #0x03
 2177         orr     r2, r2, r0, lsl #2
 2178         ands    r2, r2, #0x0f
 2179         sub     r3, pc, #0x14
 2180         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 2181 
 2182 /*
 2183  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2184  */
 2185         ldr     r2, [r1]
 2186         ldr     r3, [r1, #0x04]
 2187         str     r2, [r0]
 2188         str     r3, [r0, #0x04]
 2189         RET
 2190         LMEMCPY_8_PAD
 2191 
 2192 /*
 2193  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2194  */
 2195         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2196         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 2197         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2198 #ifdef __ARMEB__
 2199         mov     r3, r3, lsl #8          /* r3 = 012. */
 2200         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 2201         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
 2202 #else
 2203         mov     r3, r3, lsr #8          /* r3 = .210 */
 2204         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 2205         mov     r1, r1, lsl #24         /* r1 = 7... */
 2206         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 2207 #endif
 2208         str     r3, [r0]
 2209         str     r2, [r0, #0x04]
 2210         RET
 2211         LMEMCPY_8_PAD
 2212 
 2213 /*
 2214  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2215  */
 2216         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2217         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2218         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2219 #ifdef __ARMEB__
 2220         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2221         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2222         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
 2223 #else
 2224         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2225         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2226         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 2227 #endif
 2228         str     r2, [r0]
 2229         str     r3, [r0, #0x04]
 2230         RET
 2231         LMEMCPY_8_PAD
 2232 
 2233 /*
 2234  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2235  */
 2236         ldrb    r3, [r1]                /* r3 = ...0 */
 2237         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2238         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 2239 #ifdef __ARMEB__
 2240         mov     r3, r3, lsl #24         /* r3 = 0... */
 2241         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 2242         mov     r2, r2, lsl #24         /* r2 = 4... */
 2243         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
 2244 #else
 2245         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2246         mov     r2, r2, lsr #24         /* r2 = ...4 */
 2247         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 2248 #endif
 2249         str     r3, [r0]
 2250         str     r2, [r0, #0x04]
 2251         RET
 2252         LMEMCPY_8_PAD
 2253 
 2254 /*
 2255  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 2256  */
 2257         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 2258         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 2259 #ifdef __ARMEB__
 2260         mov     r1, r3, lsr #24         /* r1 = ...0 */
 2261         strb    r1, [r0]
 2262         mov     r1, r3, lsr #8          /* r1 = .012 */
 2263         strb    r2, [r0, #0x07]
 2264         mov     r3, r3, lsl #24         /* r3 = 3... */
 2265         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
 2266 #else
 2267         strb    r3, [r0]
 2268         mov     r1, r2, lsr #24         /* r1 = ...7 */
 2269         strb    r1, [r0, #0x07]
 2270         mov     r1, r3, lsr #8          /* r1 = .321 */
 2271         mov     r3, r3, lsr #24         /* r3 = ...3 */
 2272         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 2273 #endif
 2274         strh    r1, [r0, #0x01]
 2275         str     r3, [r0, #0x03]
 2276         RET
 2277         LMEMCPY_8_PAD
 2278 
 2279 /*
 2280  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 2281  */
 2282         ldrb    r2, [r1]
 2283         ldrh    r3, [r1, #0x01]
 2284         ldr     ip, [r1, #0x03]
 2285         ldrb    r1, [r1, #0x07]
 2286         strb    r2, [r0]
 2287         strh    r3, [r0, #0x01]
 2288         str     ip, [r0, #0x03]
 2289         strb    r1, [r0, #0x07]
 2290         RET
 2291         LMEMCPY_8_PAD
 2292 
 2293 /*
 2294  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 2295  */
 2296         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2297         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2298         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2299 #ifdef __ARMEB__
 2300         mov     ip, r2, lsr #8          /* ip = ...0 */
 2301         strb    ip, [r0]
 2302         mov     ip, r2, lsl #8          /* ip = .01. */
 2303         orr     ip, ip, r3, lsr #24     /* ip = .012 */
 2304         strb    r1, [r0, #0x07]
 2305         mov     r3, r3, lsl #8          /* r3 = 345. */
 2306         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
 2307 #else
 2308         strb    r2, [r0]                /* 0 */
 2309         mov     ip, r1, lsr #8          /* ip = ...7 */
 2310         strb    ip, [r0, #0x07]         /* 7 */
 2311         mov     ip, r2, lsr #8          /* ip = ...1 */
 2312         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2313         mov     r3, r3, lsr #8          /* r3 = .543 */
 2314         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 2315 #endif
 2316         strh    ip, [r0, #0x01]
 2317         str     r3, [r0, #0x03]
 2318         RET
 2319         LMEMCPY_8_PAD
 2320 
 2321 /*
 2322  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2323  */
 2324         ldrb    r3, [r1]                /* r3 = ...0 */
 2325         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2326         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 2327         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2328         strb    r3, [r0]
 2329         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 2330 #ifdef __ARMEB__
 2331         strh    r3, [r0, #0x01]
 2332         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
 2333 #else
 2334         strh    ip, [r0, #0x01]
 2335         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 2336 #endif
 2337         str     r2, [r0, #0x03]
 2338         strb    r1, [r0, #0x07]
 2339         RET
 2340         LMEMCPY_8_PAD
 2341 
 2342 /*
 2343  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2344  */
 2345         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2346         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2347         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2348 #ifdef __ARMEB__
 2349         strh    r1, [r0]
 2350         mov     r1, r3, lsr #16         /* r1 = ..45 */
 2351         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
 2352 #else
 2353         strh    r2, [r0]
 2354         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 2355         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2356 #endif
 2357         str     r2, [r0, #0x02]
 2358         strh    r3, [r0, #0x06]
 2359         RET
 2360         LMEMCPY_8_PAD
 2361 
 2362 /*
 2363  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2364  */
 2365         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2366         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2367         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 2368         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2369         strh    r1, [r0]
 2370 #ifdef __ARMEB__
 2371         mov     r1, r2, lsl #24         /* r1 = 2... */
 2372         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
 2373         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
 2374 #else
 2375         mov     r1, r2, lsr #24         /* r1 = ...2 */
 2376         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 2377         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2378         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 2379 #endif
 2380         str     r1, [r0, #0x02]
 2381         strh    r3, [r0, #0x06]
 2382         RET
 2383         LMEMCPY_8_PAD
 2384 
 2385 /*
 2386  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2387  */
 2388         ldrh    r2, [r1]
 2389         ldr     ip, [r1, #0x02]
 2390         ldrh    r3, [r1, #0x06]
 2391         strh    r2, [r0]
 2392         str     ip, [r0, #0x02]
 2393         strh    r3, [r0, #0x06]
 2394         RET
 2395         LMEMCPY_8_PAD
 2396 
 2397 /*
 2398  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2399  */
 2400         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 2401         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2402         ldrb    ip, [r1]                /* ip = ...0 */
 2403         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 2404         strh    r1, [r0, #0x06]
 2405 #ifdef __ARMEB__
 2406         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2407         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
 2408         mov     r2, r2, lsr #24         /* r2 = ...1 */
 2409         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
 2410 #else
 2411         mov     r3, r3, lsl #24         /* r3 = 5... */
 2412         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 2413         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 2414 #endif
 2415         str     r3, [r0, #0x02]
 2416         strh    r2, [r0]
 2417         RET
 2418         LMEMCPY_8_PAD
 2419 
 2420 /*
 2421  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2422  */
 2423         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2424         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2425         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 2426         strh    r1, [r0, #0x05]
 2427 #ifdef __ARMEB__
 2428         strb    r3, [r0, #0x07]
 2429         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2430         strb    r1, [r0]
 2431         mov     r2, r2, lsl #8          /* r2 = 123. */
 2432         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
 2433         str     r2, [r0, #0x01]
 2434 #else
 2435         strb    r2, [r0]
 2436         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2437         strb    r1, [r0, #0x07]
 2438         mov     r2, r2, lsr #8          /* r2 = .321 */
 2439         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 2440         str     r2, [r0, #0x01]
 2441 #endif
 2442         RET
 2443         LMEMCPY_8_PAD
 2444 
 2445 /*
 2446  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2447  */
 2448         ldrb    r3, [r1]                /* r3 = ...0 */
 2449         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 2450         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2451         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2452         strb    r3, [r0]
 2453         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 2454 #ifdef __ARMEB__
 2455         strh    ip, [r0, #0x05]
 2456         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
 2457 #else
 2458         strh    r3, [r0, #0x05]
 2459         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 2460 #endif
 2461         str     r2, [r0, #0x01]
 2462         strb    r1, [r0, #0x07]
 2463         RET
 2464         LMEMCPY_8_PAD
 2465 
 2466 /*
 2467  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2468  */
 2469         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2470         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2471         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2472 #ifdef __ARMEB__
 2473         mov     ip, r2, lsr #8          /* ip = ...0 */
 2474         strb    ip, [r0]
 2475         mov     ip, r2, lsl #24         /* ip = 1... */
 2476         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
 2477         strb    r1, [r0, #0x07]
 2478         mov     r1, r1, lsr #8          /* r1 = ...6 */
 2479         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
 2480 #else
 2481         strb    r2, [r0]
 2482         mov     ip, r2, lsr #8          /* ip = ...1 */
 2483         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2484         mov     r2, r1, lsr #8          /* r2 = ...7 */
 2485         strb    r2, [r0, #0x07]
 2486         mov     r1, r1, lsl #8          /* r1 = .76. */
 2487         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 2488 #endif
 2489         str     ip, [r0, #0x01]
 2490         strh    r1, [r0, #0x05]
 2491         RET
 2492         LMEMCPY_8_PAD
 2493 
 2494 /*
 2495  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2496  */
 2497         ldrb    r2, [r1]
 2498         ldr     ip, [r1, #0x01]
 2499         ldrh    r3, [r1, #0x05]
 2500         ldrb    r1, [r1, #0x07]
 2501         strb    r2, [r0]
 2502         str     ip, [r0, #0x01]
 2503         strh    r3, [r0, #0x05]
 2504         strb    r1, [r0, #0x07]
 2505         RET
 2506         LMEMCPY_8_PAD
 2507 
 2508 /******************************************************************************
 2509  * Special case for 12 byte copies
 2510  */
 2511 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 2512 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 2513         LMEMCPY_C_PAD
 2514 .Lmemcpy_c:
 2515         and     r2, r1, #0x03
 2516         orr     r2, r2, r0, lsl #2
 2517         ands    r2, r2, #0x0f
 2518         sub     r3, pc, #0x14
 2519         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 2520 
 2521 /*
 2522  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2523  */
 2524         ldr     r2, [r1]
 2525         ldr     r3, [r1, #0x04]
 2526         ldr     r1, [r1, #0x08]
 2527         str     r2, [r0]
 2528         str     r3, [r0, #0x04]
 2529         str     r1, [r0, #0x08]
 2530         RET
 2531         LMEMCPY_C_PAD
 2532 
 2533 /*
 2534  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2535  */
 2536         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 2537         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2538         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2539         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2540 #ifdef __ARMEB__
 2541         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
 2542         str     r2, [r0, #0x08]
 2543         mov     r2, ip, lsr #24         /* r2 = ...7 */
 2544         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
 2545         mov     r1, r1, lsl #8          /* r1 = 012. */
 2546         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
 2547 #else
 2548         mov     r2, r2, lsl #24         /* r2 = B... */
 2549         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 2550         str     r2, [r0, #0x08]
 2551         mov     r2, ip, lsl #24         /* r2 = 7... */
 2552         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 2553         mov     r1, r1, lsr #8          /* r1 = .210 */
 2554         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 2555 #endif
 2556         str     r2, [r0, #0x04]
 2557         str     r1, [r0]
 2558         RET
 2559         LMEMCPY_C_PAD
 2560 
 2561 /*
 2562  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2563  */
 2564         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2565         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2566         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2567         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2568 #ifdef __ARMEB__
 2569         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2570         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2571         str     r2, [r0]
 2572         mov     r3, r3, lsl #16         /* r3 = 45.. */
 2573         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
 2574         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
 2575 #else
 2576         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2577         str     r2, [r0]
 2578         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2579         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 2580         mov     r1, r1, lsl #16         /* r1 = BA.. */
 2581         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 2582 #endif
 2583         str     r3, [r0, #0x04]
 2584         str     r1, [r0, #0x08]
 2585         RET
 2586         LMEMCPY_C_PAD
 2587 
 2588 /*
 2589  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2590  */
 2591         ldrb    r2, [r1]                /* r2 = ...0 */
 2592         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2593         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2594         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2595 #ifdef __ARMEB__
 2596         mov     r2, r2, lsl #24         /* r2 = 0... */
 2597         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 2598         str     r2, [r0]
 2599         mov     r3, r3, lsl #24         /* r3 = 4... */
 2600         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
 2601         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2602         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
 2603 #else
 2604         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 2605         str     r2, [r0]
 2606         mov     r3, r3, lsr #24         /* r3 = ...4 */
 2607         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 2608         mov     r1, r1, lsl #8          /* r1 = BA9. */
 2609         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 2610 #endif
 2611         str     r3, [r0, #0x04]
 2612         str     r1, [r0, #0x08]
 2613         RET
 2614         LMEMCPY_C_PAD
 2615 
 2616 /*
 2617  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 2618  */
 2619         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2620         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2621         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 2622         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 2623         strh    r1, [r0, #0x01]
 2624 #ifdef __ARMEB__
 2625         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2626         strb    r1, [r0]
 2627         mov     r1, r2, lsl #24         /* r1 = 3... */
 2628         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
 2629         mov     r1, r3, lsl #24         /* r1 = 7... */
 2630         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
 2631 #else
 2632         strb    r2, [r0]
 2633         mov     r1, r2, lsr #24         /* r1 = ...3 */
 2634         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 2635         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2636         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 2637         mov     ip, ip, lsr #24         /* ip = ...B */
 2638 #endif
 2639         str     r2, [r0, #0x03]
 2640         str     r1, [r0, #0x07]
 2641         strb    ip, [r0, #0x0b]
 2642         RET
 2643         LMEMCPY_C_PAD
 2644 
 2645 /*
 2646  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 2647  */
 2648         ldrb    r2, [r1]
 2649         ldrh    r3, [r1, #0x01]
 2650         ldr     ip, [r1, #0x03]
 2651         strb    r2, [r0]
 2652         ldr     r2, [r1, #0x07]
 2653         ldrb    r1, [r1, #0x0b]
 2654         strh    r3, [r0, #0x01]
 2655         str     ip, [r0, #0x03]
 2656         str     r2, [r0, #0x07]
 2657         strb    r1, [r0, #0x0b]
 2658         RET
 2659         LMEMCPY_C_PAD
 2660 
 2661 /*
 2662  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 2663  */
 2664         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2665         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2666         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2667         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2668 #ifdef __ARMEB__
 2669         mov     r2, r2, ror #8          /* r2 = 1..0 */
 2670         strb    r2, [r0]
 2671         mov     r2, r2, lsr #16         /* r2 = ..1. */
 2672         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
 2673         strh    r2, [r0, #0x01]
 2674         mov     r2, r3, lsl #8          /* r2 = 345. */
 2675         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
 2676         mov     r2, ip, lsl #8          /* r2 = 789. */
 2677         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
 2678 #else
 2679         strb    r2, [r0]
 2680         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2681         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2682         strh    r2, [r0, #0x01]
 2683         mov     r2, r3, lsr #8          /* r2 = .543 */
 2684         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 2685         mov     r2, ip, lsr #8          /* r2 = .987 */
 2686         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 2687         mov     r1, r1, lsr #8          /* r1 = ...B */
 2688 #endif
 2689         str     r3, [r0, #0x03]
 2690         str     r2, [r0, #0x07]
 2691         strb    r1, [r0, #0x0b]
 2692         RET
 2693         LMEMCPY_C_PAD
 2694 
 2695 /*
 2696  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 2697  */
 2698         ldrb    r2, [r1]
 2699         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2700         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2701         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2702         strb    r2, [r0]
 2703 #ifdef __ARMEB__
 2704         mov     r2, r3, lsr #16         /* r2 = ..12 */
 2705         strh    r2, [r0, #0x01]
 2706         mov     r3, r3, lsl #16         /* r3 = 34.. */
 2707         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
 2708         mov     ip, ip, lsl #16         /* ip = 78.. */
 2709         orr     ip, ip, r1, lsr #16     /* ip = 789A */
 2710         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2711 #else
 2712         strh    r3, [r0, #0x01]
 2713         mov     r3, r3, lsr #16         /* r3 = ..43 */
 2714         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 2715         mov     ip, ip, lsr #16         /* ip = ..87 */
 2716         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 2717         mov     r1, r1, lsr #16         /* r1 = ..xB */
 2718 #endif
 2719         str     r3, [r0, #0x03]
 2720         str     ip, [r0, #0x07]
 2721         strb    r1, [r0, #0x0b]
 2722         RET
 2723         LMEMCPY_C_PAD
 2724 
 2725 /*
 2726  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2727  */
 2728         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 2729         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2730         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 2731         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2732 #ifdef __ARMEB__
 2733         strh    r1, [r0]
 2734         mov     r1, ip, lsl #16         /* r1 = 23.. */
 2735         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
 2736         mov     r3, r3, lsl #16         /* r3 = 67.. */
 2737         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
 2738 #else
 2739         strh    ip, [r0]
 2740         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 2741         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2742         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 2743         mov     r2, r2, lsr #16         /* r2 = ..BA */
 2744 #endif
 2745         str     r1, [r0, #0x02]
 2746         str     r3, [r0, #0x06]
 2747         strh    r2, [r0, #0x0a]
 2748         RET
 2749         LMEMCPY_C_PAD
 2750 
 2751 /*
 2752  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 2753  */
 2754         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2755         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2756         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 2757         strh    ip, [r0]
 2758         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2759         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 2760 #ifdef __ARMEB__
 2761         mov     r2, r2, lsl #24         /* r2 = 2... */
 2762         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
 2763         mov     r3, r3, lsl #24         /* r3 = 6... */
 2764         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
 2765         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
 2766 #else
 2767         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2768         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2769         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2770         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2771         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2772         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2773 #endif
 2774         str     r2, [r0, #0x02]
 2775         str     r3, [r0, #0x06]
 2776         strh    r1, [r0, #0x0a]
 2777         RET
 2778         LMEMCPY_C_PAD
 2779 
 2780 /*
 2781  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2782  */
 2783         ldrh    r2, [r1]
 2784         ldr     r3, [r1, #0x02]
 2785         ldr     ip, [r1, #0x06]
 2786         ldrh    r1, [r1, #0x0a]
 2787         strh    r2, [r0]
 2788         str     r3, [r0, #0x02]
 2789         str     ip, [r0, #0x06]
 2790         strh    r1, [r0, #0x0a]
 2791         RET
 2792         LMEMCPY_C_PAD
 2793 
 2794 /*
 2795  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2796  */
 2797         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2798         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2799         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2800         strh    ip, [r0, #0x0a]
 2801         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2802         ldrb    r1, [r1]                /* r1 = ...0 */
 2803 #ifdef __ARMEB__
 2804         mov     r2, r2, lsr #24         /* r2 = ...9 */
 2805         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
 2806         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2807         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
 2808         mov     r1, r1, lsl #8          /* r1 = ..0. */
 2809         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
 2810 #else
 2811         mov     r2, r2, lsl #24         /* r2 = 9... */
 2812         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2813         mov     r3, r3, lsl #24         /* r3 = 5... */
 2814         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2815         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2816 #endif
 2817         str     r2, [r0, #0x06]
 2818         str     r3, [r0, #0x02]
 2819         strh    r1, [r0]
 2820         RET
 2821         LMEMCPY_C_PAD
 2822 
 2823 /*
 2824  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2825  */
 2826         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2827         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2828         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2829 #ifdef __ARMEB__
 2830         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2831         strb    r3, [r0]
 2832         mov     r2, r2, lsl #8          /* r2 = 123. */
 2833         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
 2834         str     r2, [r0, #0x01]
 2835         mov     r2, ip, lsl #8          /* r2 = 567. */
 2836         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
 2837         str     r2, [r0, #0x05]
 2838         mov     r2, r1, lsr #8          /* r2 = ..9A */
 2839         strh    r2, [r0, #0x09]
 2840         strb    r1, [r0, #0x0b]
 2841 #else
 2842         strb    r2, [r0]
 2843         mov     r3, r2, lsr #8          /* r3 = .321 */
 2844         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2845         str     r3, [r0, #0x01]
 2846         mov     r3, ip, lsr #8          /* r3 = .765 */
 2847         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2848         str     r3, [r0, #0x05]
 2849         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2850         strh    r1, [r0, #0x09]
 2851         mov     r1, r1, lsr #16         /* r1 = ...B */
 2852         strb    r1, [r0, #0x0b]
 2853 #endif
 2854         RET
 2855         LMEMCPY_C_PAD
 2856 
 2857 /*
 2858  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2859  */
 2860         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2861         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2862         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2863         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2864         strb    r2, [r0, #0x0b]
 2865 #ifdef __ARMEB__
 2866         strh    r3, [r0, #0x09]
 2867         mov     r3, r3, lsr #16         /* r3 = ..78 */
 2868         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
 2869         mov     ip, ip, lsr #16         /* ip = ..34 */
 2870         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
 2871         mov     r1, r1, lsr #16         /* r1 = ..x0 */
 2872 #else
 2873         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2874         strh    r2, [r0, #0x09]
 2875         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2876         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2877         mov     ip, ip, lsl #16         /* ip = 43.. */
 2878         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2879         mov     r1, r1, lsr #8          /* r1 = .210 */
 2880 #endif
 2881         str     r3, [r0, #0x05]
 2882         str     ip, [r0, #0x01]
 2883         strb    r1, [r0]
 2884         RET
 2885         LMEMCPY_C_PAD
 2886 
 2887 /*
 2888  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2889  */
 2890 #ifdef __ARMEB__
 2891         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
 2892         ldr     ip, [r1, #0x06]         /* ip = 6789 */
 2893         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
 2894         ldrh    r1, [r1]                /* r1 = ..01 */
 2895         strb    r2, [r0, #0x0b]
 2896         mov     r2, r2, lsr #8          /* r2 = ...A */
 2897         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
 2898         mov     ip, ip, lsr #8          /* ip = .678 */
 2899         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
 2900         mov     r3, r3, lsr #8          /* r3 = .234 */
 2901         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
 2902         mov     r1, r1, lsr #8          /* r1 = ...0 */
 2903         strb    r1, [r0]
 2904         str     r3, [r0, #0x01]
 2905         str     ip, [r0, #0x05]
 2906         strh    r2, [r0, #0x09]
 2907 #else
 2908         ldrh    r2, [r1]                /* r2 = ..10 */
 2909         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2910         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2911         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2912         strb    r2, [r0]
 2913         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2914         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2915         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2916         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2917         mov     ip, ip, lsr #24         /* ip = ...9 */
 2918         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2919         mov     r1, r1, lsr #8          /* r1 = ...B */
 2920         str     r2, [r0, #0x01]
 2921         str     r3, [r0, #0x05]
 2922         strh    ip, [r0, #0x09]
 2923         strb    r1, [r0, #0x0b]
 2924 #endif
 2925         RET
 2926         LMEMCPY_C_PAD
 2927 
 2928 /*
 2929  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2930  */
 2931         ldrb    r2, [r1]
 2932         ldr     r3, [r1, #0x01]
 2933         ldr     ip, [r1, #0x05]
 2934         strb    r2, [r0]
 2935         ldrh    r2, [r1, #0x09]
 2936         ldrb    r1, [r1, #0x0b]
 2937         str     r3, [r0, #0x01]
 2938         str     ip, [r0, #0x05]
 2939         strh    r2, [r0, #0x09]
 2940         strb    r1, [r0, #0x0b]
 2941         RET
 2942 END(memcpy)
 2943 #endif /* _ARM_ARCH_5E */
 2944 
 2945 #ifdef GPROF
 2946 
 2947 ENTRY(user)
 2948         nop
 2949 ENTRY(btrap)
 2950         nop
 2951 ENTRY(etrap)
 2952         nop
 2953 ENTRY(bintr)
 2954         nop
 2955 ENTRY(eintr)
 2956         nop
 2957 
 2958 #endif
Cache object: 7234edb2fa618340722f553ad481a98b
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S