support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  *
   76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   86  * POSSIBILITY OF SUCH DAMAGE.
   87  */
   88 
   89 #include <machine/asm.h>
   90 #include <machine/asmacros.h>
   91 __FBSDID("$FreeBSD: releng/9.0/sys/arm/arm/support.S 203974 2010-02-16 21:59:17Z imp $");
   92 
   93 #include "assym.s"
   94 
   95 .L_arm_memcpy:
   96         .word   _C_LABEL(_arm_memcpy)
   97 .L_arm_bzero:
   98         .word   _C_LABEL(_arm_bzero)
   99 .L_min_memcpy_size:
  100         .word   _C_LABEL(_min_memcpy_size)
  101 .L_min_bzero_size:
  102         .word   _C_LABEL(_min_bzero_size)
  103 /*
  104  * memset: Sets a block of memory to the specified value
  105  *
  106  * On entry:
  107  *   r0 - dest address
  108  *   r1 - byte to write
  109  *   r2 - number of bytes to write
  110  *
  111  * On exit:
  112  *   r0 - dest address
  113  */
  114 /* LINTSTUB: Func: void bzero(void *, size_t) */
  115 ENTRY(bzero)
  116         ldr     r3, .L_arm_bzero
  117         ldr     r3, [r3]
  118         cmp     r3, #0
  119         beq     .Lnormal0
  120         ldr     r2, .L_min_bzero_size
  121         ldr     r2, [r2]
  122         cmp     r1, r2
  123         blt     .Lnormal0
  124         stmfd   sp!, {r0, r1, lr}
  125         mov     r2, #0
  126         mov     lr, pc
  127         mov     pc, r3
  128         cmp     r0, #0
  129         ldmfd   sp!, {r0, r1, lr}
  130         RETeq
  131 .Lnormal0:
  132         mov     r3, #0x00
  133         b       do_memset
  134 
  135 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  136 ENTRY(memset)
  137         and     r3, r1, #0xff           /* We deal with bytes */
  138         mov     r1, r2
  139 do_memset:
  140         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  141         mov     ip, r0
  142         blt     .Lmemset_lessthanfour
  143 
  144         /* Ok first we will word align the address */
  145         ands    r2, ip, #0x03           /* Get the bottom two bits */
  146         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  147 
  148         /* We are now word aligned */
  149 .Lmemset_wordaligned:
  150         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  151 #ifdef _ARM_ARCH_5E
  152         tst     ip, #0x04               /* Quad-align for armv5e */
  153 #else
  154         cmp     r1, #0x10
  155 #endif
  156         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  157 #ifdef _ARM_ARCH_5E
  158         subne   r1, r1, #0x04           /* Quad-align if necessary */
  159         strne   r3, [ip], #0x04
  160         cmp     r1, #0x10
  161 #endif
  162         blt     .Lmemset_loop4          /* If less than 16 then use words */
  163         mov     r2, r3                  /* Duplicate data */
  164         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  165         blt     .Lmemset_loop32
  166 
  167         /* Do 128 bytes at a time */
  168 .Lmemset_loop128:
  169         subs    r1, r1, #0x80
  170 #ifdef _ARM_ARCH_5E
  171         strged  r2, [ip], #0x08
  172         strged  r2, [ip], #0x08
  173         strged  r2, [ip], #0x08
  174         strged  r2, [ip], #0x08
  175         strged  r2, [ip], #0x08
  176         strged  r2, [ip], #0x08
  177         strged  r2, [ip], #0x08
  178         strged  r2, [ip], #0x08
  179         strged  r2, [ip], #0x08
  180         strged  r2, [ip], #0x08
  181         strged  r2, [ip], #0x08
  182         strged  r2, [ip], #0x08
  183         strged  r2, [ip], #0x08
  184         strged  r2, [ip], #0x08
  185         strged  r2, [ip], #0x08
  186         strged  r2, [ip], #0x08
  187 #else
  188         stmgeia ip!, {r2-r3}
  189         stmgeia ip!, {r2-r3}
  190         stmgeia ip!, {r2-r3}
  191         stmgeia ip!, {r2-r3}
  192         stmgeia ip!, {r2-r3}
  193         stmgeia ip!, {r2-r3}
  194         stmgeia ip!, {r2-r3}
  195         stmgeia ip!, {r2-r3}
  196         stmgeia ip!, {r2-r3}
  197         stmgeia ip!, {r2-r3}
  198         stmgeia ip!, {r2-r3}
  199         stmgeia ip!, {r2-r3}
  200         stmgeia ip!, {r2-r3}
  201         stmgeia ip!, {r2-r3}
  202         stmgeia ip!, {r2-r3}
  203         stmgeia ip!, {r2-r3}
  204 #endif
  205         bgt     .Lmemset_loop128
  206         RETeq                   /* Zero length so just exit */
  207 
  208         add     r1, r1, #0x80           /* Adjust for extra sub */
  209 
  210         /* Do 32 bytes at a time */
  211 .Lmemset_loop32:
  212         subs    r1, r1, #0x20
  213 #ifdef _ARM_ARCH_5E
  214         strged  r2, [ip], #0x08
  215         strged  r2, [ip], #0x08
  216         strged  r2, [ip], #0x08
  217         strged  r2, [ip], #0x08
  218 #else
  219         stmgeia ip!, {r2-r3}
  220         stmgeia ip!, {r2-r3}
  221         stmgeia ip!, {r2-r3}
  222         stmgeia ip!, {r2-r3}
  223 #endif
  224         bgt     .Lmemset_loop32
  225         RETeq                   /* Zero length so just exit */
  226 
  227         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  228 
  229         /* Deal with 16 bytes or more */
  230 #ifdef _ARM_ARCH_5E
  231         strged  r2, [ip], #0x08
  232         strged  r2, [ip], #0x08
  233 #else
  234         stmgeia ip!, {r2-r3}
  235         stmgeia ip!, {r2-r3}
  236 #endif
  237         RETeq                   /* Zero length so just exit */
  238 
  239         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  240 
  241         /* We have at least 4 bytes so copy as words */
  242 .Lmemset_loop4:
  243         subs    r1, r1, #0x04
  244         strge   r3, [ip], #0x04
  245         bgt     .Lmemset_loop4
  246         RETeq                   /* Zero length so just exit */
  247 
  248 #ifdef _ARM_ARCH_5E
  249         /* Compensate for 64-bit alignment check */
  250         adds    r1, r1, #0x04
  251         RETeq
  252         cmp     r1, #2
  253 #else
  254         cmp     r1, #-2
  255 #endif
  256 
  257         strb    r3, [ip], #0x01         /* Set 1 byte */
  258         strgeb  r3, [ip], #0x01         /* Set another byte */
  259         strgtb  r3, [ip]                /* and a third */
  260         RET                     /* Exit */
  261 
  262 .Lmemset_wordunaligned:
  263         rsb     r2, r2, #0x004
  264         strb    r3, [ip], #0x01         /* Set 1 byte */
  265         cmp     r2, #0x02
  266         strgeb  r3, [ip], #0x01         /* Set another byte */
  267         sub     r1, r1, r2
  268         strgtb  r3, [ip], #0x01         /* and a third */
  269         cmp     r1, #0x04               /* More than 4 bytes left? */
  270         bge     .Lmemset_wordaligned    /* Yup */
  271 
  272 .Lmemset_lessthanfour:
  273         cmp     r1, #0x00
  274         RETeq                   /* Zero length so exit */
  275         strb    r3, [ip], #0x01         /* Set 1 byte */
  276         cmp     r1, #0x02
  277         strgeb  r3, [ip], #0x01         /* Set another byte */
  278         strgtb  r3, [ip]                /* and a third */
  279         RET                     /* Exit */
  280 
  281 ENTRY(bcmp)
  282         mov     ip, r0
  283         cmp     r2, #0x06
  284         beq     .Lmemcmp_6bytes
  285         mov     r0, #0x00
  286 
  287         /* Are both addresses aligned the same way? */
  288         cmp     r2, #0x00
  289         eornes  r3, ip, r1
  290         RETeq                   /* len == 0, or same addresses! */
  291         tst     r3, #0x03
  292         subne   r2, r2, #0x01
  293         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  294 
  295         /* Word-align the addresses, if necessary */
  296         sub     r3, r1, #0x05
  297         ands    r3, r3, #0x03
  298         add     r3, r3, r3, lsl #1
  299         addne   pc, pc, r3, lsl #3
  300         nop
  301 
  302         /* Compare up to 3 bytes */
  303         ldrb    r0, [ip], #0x01
  304         ldrb    r3, [r1], #0x01
  305         subs    r0, r0, r3
  306         RETne
  307         subs    r2, r2, #0x01
  308         RETeq
  309 
  310         /* Compare up to 2 bytes */
  311         ldrb    r0, [ip], #0x01
  312         ldrb    r3, [r1], #0x01
  313         subs    r0, r0, r3
  314         RETne
  315         subs    r2, r2, #0x01
  316         RETeq
  317 
  318         /* Compare 1 byte */
  319         ldrb    r0, [ip], #0x01
  320         ldrb    r3, [r1], #0x01
  321         subs    r0, r0, r3
  322         RETne
  323         subs    r2, r2, #0x01
  324         RETeq
  325 
  326         /* Compare 4 bytes at a time, if possible */
  327         subs    r2, r2, #0x04
  328         bcc     .Lmemcmp_bytewise
  329 .Lmemcmp_word_aligned:
  330         ldr     r0, [ip], #0x04
  331         ldr     r3, [r1], #0x04
  332         subs    r2, r2, #0x04
  333         cmpcs   r0, r3
  334         beq     .Lmemcmp_word_aligned
  335         sub     r0, r0, r3
  336 
  337         /* Correct for extra subtraction, and check if done */
  338         adds    r2, r2, #0x04
  339         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  340         RETeq                   /* Yup. Just return */
  341 
  342         /* Re-do the final word byte-wise */
  343         sub     ip, ip, #0x04
  344         sub     r1, r1, #0x04
  345 
  346 .Lmemcmp_bytewise:
  347         add     r2, r2, #0x03
  348 .Lmemcmp_bytewise2:
  349         ldrb    r0, [ip], #0x01
  350         ldrb    r3, [r1], #0x01
  351         subs    r2, r2, #0x01
  352         cmpcs   r0, r3
  353         beq     .Lmemcmp_bytewise2
  354         sub     r0, r0, r3
  355         RET
  356 
  357         /*
  358          * 6 byte compares are very common, thanks to the network stack.
  359          * This code is hand-scheduled to reduce the number of stalls for
  360          * load results. Everything else being equal, this will be ~32%
  361          * faster than a byte-wise memcmp.
  362          */
  363         .align  5
  364 .Lmemcmp_6bytes:
  365         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  366         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  367         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  368         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  369         ldreqb  r3, [ip, #0x01]         /* r3 = b1#1 */
  370         RETne                   /* Return if mismatch on #0 */
  371         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  372         ldreqb  r3, [r1, #0x02]         /* r3 = b2#2 */
  373         ldreqb  r0, [ip, #0x02]         /* r0 = b1#2 */
  374         RETne                   /* Return if mismatch on #1 */
  375         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  376         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  377         ldreqb  r3, [ip, #0x03]         /* r3 = b1#3 */
  378         RETne                   /* Return if mismatch on #2 */
  379         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  380         ldreqb  r3, [r1, #0x04]         /* r3 = b2#4 */
  381         ldreqb  r0, [ip, #0x04]         /* r0 = b1#4 */
  382         RETne                   /* Return if mismatch on #3 */
  383         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  384         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  385         ldreqb  r3, [ip, #0x05]         /* r3 = b1#5 */
  386         RETne                   /* Return if mismatch on #4 */
  387         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  388         RET
  389 
  390 ENTRY(bcopy)
  391         /* switch the source and destination registers */
  392         eor     r0, r1, r0 
  393         eor     r1, r0, r1 
  394         eor     r0, r1, r0 
  395 ENTRY(memmove)
  396         /* Do the buffers overlap? */
  397         cmp     r0, r1
  398         RETeq           /* Bail now if src/dst are the same */
  399         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  400         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  401         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  402         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  403 
  404         /* Determine copy direction */
  405         cmp     r1, r0
  406         bcc     .Lmemmove_backwards
  407 
  408         moveq   r0, #0                  /* Quick abort for len=0 */
  409         RETeq
  410 
  411         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  412         subs    r2, r2, #4
  413         blt     .Lmemmove_fl4           /* less than 4 bytes */
  414         ands    r12, r0, #3
  415         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  416         ands    r12, r1, #3
  417         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  418 
  419 .Lmemmove_ft8:
  420         /* We have aligned source and destination */
  421         subs    r2, r2, #8
  422         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  423         subs    r2, r2, #0x14         
  424         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  425         stmdb   sp!, {r4}               /* borrow r4 */
  426 
  427         /* blat 32 bytes at a time */
  428         /* XXX for really big copies perhaps we should use more registers */
  429 .Lmemmove_floop32:      
  430         ldmia   r1!, {r3, r4, r12, lr}
  431         stmia   r0!, {r3, r4, r12, lr}
  432         ldmia   r1!, {r3, r4, r12, lr}
  433         stmia   r0!, {r3, r4, r12, lr}
  434         subs    r2, r2, #0x20         
  435         bge     .Lmemmove_floop32
  436 
  437         cmn     r2, #0x10
  438         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  439         stmgeia r0!, {r3, r4, r12, lr}
  440         subge   r2, r2, #0x10         
  441         ldmia   sp!, {r4}               /* return r4 */
  442 
  443 .Lmemmove_fl32:
  444         adds    r2, r2, #0x14         
  445 
  446         /* blat 12 bytes at a time */
  447 .Lmemmove_floop12:
  448         ldmgeia r1!, {r3, r12, lr}
  449         stmgeia r0!, {r3, r12, lr}
  450         subges  r2, r2, #0x0c         
  451         bge     .Lmemmove_floop12
  452 
  453 .Lmemmove_fl12:
  454         adds    r2, r2, #8
  455         blt     .Lmemmove_fl4
  456 
  457         subs    r2, r2, #4
  458         ldrlt   r3, [r1], #4
  459         strlt   r3, [r0], #4
  460         ldmgeia r1!, {r3, r12}
  461         stmgeia r0!, {r3, r12}
  462         subge   r2, r2, #4
  463 
  464 .Lmemmove_fl4:
  465         /* less than 4 bytes to go */
  466         adds    r2, r2, #4
  467         ldmeqia sp!, {r0, pc}           /* done */
  468 
  469         /* copy the crud byte at a time */
  470         cmp     r2, #2
  471         ldrb    r3, [r1], #1
  472         strb    r3, [r0], #1
  473         ldrgeb  r3, [r1], #1
  474         strgeb  r3, [r0], #1
  475         ldrgtb  r3, [r1], #1
  476         strgtb  r3, [r0], #1
  477         ldmia   sp!, {r0, pc}
  478 
  479         /* erg - unaligned destination */
  480 .Lmemmove_fdestul:
  481         rsb     r12, r12, #4
  482         cmp     r12, #2
  483 
  484         /* align destination with byte copies */
  485         ldrb    r3, [r1], #1
  486         strb    r3, [r0], #1
  487         ldrgeb  r3, [r1], #1
  488         strgeb  r3, [r0], #1
  489         ldrgtb  r3, [r1], #1
  490         strgtb  r3, [r0], #1
  491         subs    r2, r2, r12
  492         blt     .Lmemmove_fl4           /* less the 4 bytes */
  493 
  494         ands    r12, r1, #3
  495         beq     .Lmemmove_ft8           /* we have an aligned source */
  496 
  497         /* erg - unaligned source */
  498         /* This is where it gets nasty ... */
  499 .Lmemmove_fsrcul:
  500         bic     r1, r1, #3
  501         ldr     lr, [r1], #4
  502         cmp     r12, #2
  503         bgt     .Lmemmove_fsrcul3
  504         beq     .Lmemmove_fsrcul2
  505         cmp     r2, #0x0c            
  506         blt     .Lmemmove_fsrcul1loop4
  507         sub     r2, r2, #0x0c         
  508         stmdb   sp!, {r4, r5}
  509 
  510 .Lmemmove_fsrcul1loop16:
  511 #ifdef __ARMEB__
  512         mov     r3, lr, lsl #8
  513 #else
  514         mov     r3, lr, lsr #8
  515 #endif
  516         ldmia   r1!, {r4, r5, r12, lr}
  517 #ifdef __ARMEB__
  518         orr     r3, r3, r4, lsr #24
  519         mov     r4, r4, lsl #8
  520         orr     r4, r4, r5, lsr #24
  521         mov     r5, r5, lsl #8
  522         orr     r5, r5, r12, lsr #24
  523         mov     r12, r12, lsl #8
  524         orr     r12, r12, lr, lsr #24
  525 #else
  526         orr     r3, r3, r4, lsl #24
  527         mov     r4, r4, lsr #8
  528         orr     r4, r4, r5, lsl #24
  529         mov     r5, r5, lsr #8
  530         orr     r5, r5, r12, lsl #24
  531         mov     r12, r12, lsr #8
  532         orr     r12, r12, lr, lsl #24
  533 #endif
  534         stmia   r0!, {r3-r5, r12}
  535         subs    r2, r2, #0x10         
  536         bge     .Lmemmove_fsrcul1loop16
  537         ldmia   sp!, {r4, r5}
  538         adds    r2, r2, #0x0c         
  539         blt     .Lmemmove_fsrcul1l4
  540 
  541 .Lmemmove_fsrcul1loop4:
  542 #ifdef __ARMEB__
  543         mov     r12, lr, lsl #8
  544 #else
  545         mov     r12, lr, lsr #8
  546 #endif
  547         ldr     lr, [r1], #4
  548 #ifdef __ARMEB__
  549         orr     r12, r12, lr, lsr #24
  550 #else
  551         orr     r12, r12, lr, lsl #24
  552 #endif
  553         str     r12, [r0], #4
  554         subs    r2, r2, #4
  555         bge     .Lmemmove_fsrcul1loop4
  556 
  557 .Lmemmove_fsrcul1l4:
  558         sub     r1, r1, #3
  559         b       .Lmemmove_fl4
  560 
  561 .Lmemmove_fsrcul2:
  562         cmp     r2, #0x0c            
  563         blt     .Lmemmove_fsrcul2loop4
  564         sub     r2, r2, #0x0c         
  565         stmdb   sp!, {r4, r5}
  566 
  567 .Lmemmove_fsrcul2loop16:
  568 #ifdef __ARMEB__
  569         mov     r3, lr, lsl #16
  570 #else
  571         mov     r3, lr, lsr #16
  572 #endif
  573         ldmia   r1!, {r4, r5, r12, lr}
  574 #ifdef __ARMEB__
  575         orr     r3, r3, r4, lsr #16
  576         mov     r4, r4, lsl #16
  577         orr     r4, r4, r5, lsr #16
  578         mov     r5, r5, lsl #16
  579         orr     r5, r5, r12, lsr #16
  580         mov     r12, r12, lsl #16
  581         orr     r12, r12, lr, lsr #16
  582 #else
  583         orr     r3, r3, r4, lsl #16
  584         mov     r4, r4, lsr #16
  585         orr     r4, r4, r5, lsl #16
  586         mov     r5, r5, lsr #16
  587         orr     r5, r5, r12, lsl #16
  588         mov     r12, r12, lsr #16
  589         orr     r12, r12, lr, lsl #16
  590 #endif
  591         stmia   r0!, {r3-r5, r12}
  592         subs    r2, r2, #0x10         
  593         bge     .Lmemmove_fsrcul2loop16
  594         ldmia   sp!, {r4, r5}
  595         adds    r2, r2, #0x0c         
  596         blt     .Lmemmove_fsrcul2l4
  597 
  598 .Lmemmove_fsrcul2loop4:
  599 #ifdef __ARMEB__
  600         mov     r12, lr, lsl #16
  601 #else
  602         mov     r12, lr, lsr #16
  603 #endif
  604         ldr     lr, [r1], #4
  605 #ifdef __ARMEB__
  606         orr     r12, r12, lr, lsr #16
  607 #else
  608         orr     r12, r12, lr, lsl #16
  609 #endif
  610         str     r12, [r0], #4
  611         subs    r2, r2, #4
  612         bge     .Lmemmove_fsrcul2loop4
  613 
  614 .Lmemmove_fsrcul2l4:
  615         sub     r1, r1, #2
  616         b       .Lmemmove_fl4
  617 
  618 .Lmemmove_fsrcul3:
  619         cmp     r2, #0x0c            
  620         blt     .Lmemmove_fsrcul3loop4
  621         sub     r2, r2, #0x0c         
  622         stmdb   sp!, {r4, r5}
  623 
  624 .Lmemmove_fsrcul3loop16:
  625 #ifdef __ARMEB__
  626         mov     r3, lr, lsl #24
  627 #else
  628         mov     r3, lr, lsr #24
  629 #endif
  630         ldmia   r1!, {r4, r5, r12, lr}
  631 #ifdef __ARMEB__
  632         orr     r3, r3, r4, lsr #8
  633         mov     r4, r4, lsl #24
  634         orr     r4, r4, r5, lsr #8
  635         mov     r5, r5, lsl #24
  636         orr     r5, r5, r12, lsr #8
  637         mov     r12, r12, lsl #24
  638         orr     r12, r12, lr, lsr #8
  639 #else
  640         orr     r3, r3, r4, lsl #8
  641         mov     r4, r4, lsr #24
  642         orr     r4, r4, r5, lsl #8
  643         mov     r5, r5, lsr #24
  644         orr     r5, r5, r12, lsl #8
  645         mov     r12, r12, lsr #24
  646         orr     r12, r12, lr, lsl #8
  647 #endif
  648         stmia   r0!, {r3-r5, r12}
  649         subs    r2, r2, #0x10         
  650         bge     .Lmemmove_fsrcul3loop16
  651         ldmia   sp!, {r4, r5}
  652         adds    r2, r2, #0x0c         
  653         blt     .Lmemmove_fsrcul3l4
  654 
  655 .Lmemmove_fsrcul3loop4:
  656 #ifdef __ARMEB__
  657         mov     r12, lr, lsl #24
  658 #else
  659         mov     r12, lr, lsr #24
  660 #endif
  661         ldr     lr, [r1], #4
  662 #ifdef __ARMEB__
  663         orr     r12, r12, lr, lsr #8
  664 #else
  665         orr     r12, r12, lr, lsl #8
  666 #endif
  667         str     r12, [r0], #4
  668         subs    r2, r2, #4
  669         bge     .Lmemmove_fsrcul3loop4
  670 
  671 .Lmemmove_fsrcul3l4:
  672         sub     r1, r1, #1
  673         b       .Lmemmove_fl4
  674 
  675 .Lmemmove_backwards:
  676         add     r1, r1, r2
  677         add     r0, r0, r2
  678         subs    r2, r2, #4
  679         blt     .Lmemmove_bl4           /* less than 4 bytes */
  680         ands    r12, r0, #3
  681         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  682         ands    r12, r1, #3
  683         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  684 
  685 .Lmemmove_bt8:
  686         /* We have aligned source and destination */
  687         subs    r2, r2, #8
  688         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  689         stmdb   sp!, {r4, lr}
  690         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  691         blt     .Lmemmove_bl32
  692 
  693         /* blat 32 bytes at a time */
  694         /* XXX for really big copies perhaps we should use more registers */
  695 .Lmemmove_bloop32:
  696         ldmdb   r1!, {r3, r4, r12, lr}
  697         stmdb   r0!, {r3, r4, r12, lr}
  698         ldmdb   r1!, {r3, r4, r12, lr}
  699         stmdb   r0!, {r3, r4, r12, lr}
  700         subs    r2, r2, #0x20         
  701         bge     .Lmemmove_bloop32
  702 
  703 .Lmemmove_bl32:
  704         cmn     r2, #0x10            
  705         ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  706         stmgedb r0!, {r3, r4, r12, lr}
  707         subge   r2, r2, #0x10         
  708         adds    r2, r2, #0x14         
  709         ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  710         stmgedb r0!, {r3, r12, lr}
  711         subge   r2, r2, #0x0c         
  712         ldmia   sp!, {r4, lr}
  713 
  714 .Lmemmove_bl12:
  715         adds    r2, r2, #8
  716         blt     .Lmemmove_bl4
  717         subs    r2, r2, #4
  718         ldrlt   r3, [r1, #-4]!
  719         strlt   r3, [r0, #-4]!
  720         ldmgedb r1!, {r3, r12}
  721         stmgedb r0!, {r3, r12}
  722         subge   r2, r2, #4
  723 
  724 .Lmemmove_bl4:
  725         /* less than 4 bytes to go */
  726         adds    r2, r2, #4
  727         RETeq                   /* done */
  728 
  729         /* copy the crud byte at a time */
  730         cmp     r2, #2
  731         ldrb    r3, [r1, #-1]!
  732         strb    r3, [r0, #-1]!
  733         ldrgeb  r3, [r1, #-1]!
  734         strgeb  r3, [r0, #-1]!
  735         ldrgtb  r3, [r1, #-1]!
  736         strgtb  r3, [r0, #-1]!
  737         RET
  738 
  739         /* erg - unaligned destination */
  740 .Lmemmove_bdestul:
  741         cmp     r12, #2
  742 
  743         /* align destination with byte copies */
  744         ldrb    r3, [r1, #-1]!
  745         strb    r3, [r0, #-1]!
  746         ldrgeb  r3, [r1, #-1]!
  747         strgeb  r3, [r0, #-1]!
  748         ldrgtb  r3, [r1, #-1]!
  749         strgtb  r3, [r0, #-1]!
  750         subs    r2, r2, r12
  751         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  752         ands    r12, r1, #3
  753         beq     .Lmemmove_bt8           /* we have an aligned source */
  754 
  755         /* erg - unaligned source */
  756         /* This is where it gets nasty ... */
  757 .Lmemmove_bsrcul:
  758         bic     r1, r1, #3
  759         ldr     r3, [r1, #0]
  760         cmp     r12, #2
  761         blt     .Lmemmove_bsrcul1
  762         beq     .Lmemmove_bsrcul2
  763         cmp     r2, #0x0c            
  764         blt     .Lmemmove_bsrcul3loop4
  765         sub     r2, r2, #0x0c         
  766         stmdb   sp!, {r4, r5, lr}
  767 
  768 .Lmemmove_bsrcul3loop16:
  769 #ifdef __ARMEB__
  770         mov     lr, r3, lsr #8
  771 #else
  772         mov     lr, r3, lsl #8
  773 #endif
  774         ldmdb   r1!, {r3-r5, r12}
  775 #ifdef __ARMEB__
  776         orr     lr, lr, r12, lsl #24
  777         mov     r12, r12, lsr #8
  778         orr     r12, r12, r5, lsl #24
  779         mov     r5, r5, lsr #8
  780         orr     r5, r5, r4, lsl #24
  781         mov     r4, r4, lsr #8
  782         orr     r4, r4, r3, lsl #24
  783 #else
  784         orr     lr, lr, r12, lsr #24
  785         mov     r12, r12, lsl #8
  786         orr     r12, r12, r5, lsr #24
  787         mov     r5, r5, lsl #8
  788         orr     r5, r5, r4, lsr #24
  789         mov     r4, r4, lsl #8
  790         orr     r4, r4, r3, lsr #24
  791 #endif
  792         stmdb   r0!, {r4, r5, r12, lr}
  793         subs    r2, r2, #0x10         
  794         bge     .Lmemmove_bsrcul3loop16
  795         ldmia   sp!, {r4, r5, lr}
  796         adds    r2, r2, #0x0c         
  797         blt     .Lmemmove_bsrcul3l4
  798 
  799 .Lmemmove_bsrcul3loop4:
  800 #ifdef __ARMEB__
  801         mov     r12, r3, lsr #8
  802 #else
  803         mov     r12, r3, lsl #8
  804 #endif
  805         ldr     r3, [r1, #-4]!
  806 #ifdef __ARMEB__
  807         orr     r12, r12, r3, lsl #24
  808 #else
  809         orr     r12, r12, r3, lsr #24
  810 #endif
  811         str     r12, [r0, #-4]!
  812         subs    r2, r2, #4
  813         bge     .Lmemmove_bsrcul3loop4
  814 
  815 .Lmemmove_bsrcul3l4:
  816         add     r1, r1, #3
  817         b       .Lmemmove_bl4
  818 
  819 .Lmemmove_bsrcul2:
  820         cmp     r2, #0x0c            
  821         blt     .Lmemmove_bsrcul2loop4
  822         sub     r2, r2, #0x0c         
  823         stmdb   sp!, {r4, r5, lr}
  824 
  825 .Lmemmove_bsrcul2loop16:
  826 #ifdef __ARMEB__
  827         mov     lr, r3, lsr #16
  828 #else
  829         mov     lr, r3, lsl #16
  830 #endif
  831         ldmdb   r1!, {r3-r5, r12}
  832 #ifdef __ARMEB__
  833         orr     lr, lr, r12, lsl #16
  834         mov     r12, r12, lsr #16
  835         orr     r12, r12, r5, lsl #16
  836         mov     r5, r5, lsr #16
  837         orr     r5, r5, r4, lsl #16
  838         mov     r4, r4, lsr #16
  839         orr     r4, r4, r3, lsl #16
  840 #else
  841         orr     lr, lr, r12, lsr #16
  842         mov     r12, r12, lsl #16
  843         orr     r12, r12, r5, lsr #16
  844         mov     r5, r5, lsl #16
  845         orr     r5, r5, r4, lsr #16
  846         mov     r4, r4, lsl #16
  847         orr     r4, r4, r3, lsr #16
  848 #endif
  849         stmdb   r0!, {r4, r5, r12, lr}
  850         subs    r2, r2, #0x10         
  851         bge     .Lmemmove_bsrcul2loop16
  852         ldmia   sp!, {r4, r5, lr}
  853         adds    r2, r2, #0x0c         
  854         blt     .Lmemmove_bsrcul2l4
  855 
  856 .Lmemmove_bsrcul2loop4:
  857 #ifdef __ARMEB__
  858         mov     r12, r3, lsr #16
  859 #else
  860         mov     r12, r3, lsl #16
  861 #endif
  862         ldr     r3, [r1, #-4]!
  863 #ifdef __ARMEB__
  864         orr     r12, r12, r3, lsl #16
  865 #else
  866         orr     r12, r12, r3, lsr #16
  867 #endif
  868         str     r12, [r0, #-4]!
  869         subs    r2, r2, #4
  870         bge     .Lmemmove_bsrcul2loop4
  871 
  872 .Lmemmove_bsrcul2l4:
  873         add     r1, r1, #2
  874         b       .Lmemmove_bl4
  875 
  876 .Lmemmove_bsrcul1:
  877         cmp     r2, #0x0c            
  878         blt     .Lmemmove_bsrcul1loop4
  879         sub     r2, r2, #0x0c         
  880         stmdb   sp!, {r4, r5, lr}
  881 
  882 .Lmemmove_bsrcul1loop32:
  883 #ifdef __ARMEB__
  884         mov     lr, r3, lsr #24
  885 #else
  886         mov     lr, r3, lsl #24
  887 #endif
  888         ldmdb   r1!, {r3-r5, r12}
  889 #ifdef __ARMEB__
  890         orr     lr, lr, r12, lsl #8
  891         mov     r12, r12, lsr #24
  892         orr     r12, r12, r5, lsl #8
  893         mov     r5, r5, lsr #24
  894         orr     r5, r5, r4, lsl #8
  895         mov     r4, r4, lsr #24
  896         orr     r4, r4, r3, lsl #8
  897 #else
  898         orr     lr, lr, r12, lsr #8
  899         mov     r12, r12, lsl #24
  900         orr     r12, r12, r5, lsr #8
  901         mov     r5, r5, lsl #24
  902         orr     r5, r5, r4, lsr #8
  903         mov     r4, r4, lsl #24
  904         orr     r4, r4, r3, lsr #8
  905 #endif
  906         stmdb   r0!, {r4, r5, r12, lr}
  907         subs    r2, r2, #0x10         
  908         bge     .Lmemmove_bsrcul1loop32
  909         ldmia   sp!, {r4, r5, lr}
  910         adds    r2, r2, #0x0c         
  911         blt     .Lmemmove_bsrcul1l4
  912 
  913 .Lmemmove_bsrcul1loop4:
  914 #ifdef __ARMEB__
  915         mov     r12, r3, lsr #24
  916 #else
  917         mov     r12, r3, lsl #24
  918 #endif
  919         ldr     r3, [r1, #-4]!
  920 #ifdef __ARMEB__
  921         orr     r12, r12, r3, lsl #8
  922 #else
  923         orr     r12, r12, r3, lsr #8
  924 #endif
  925         str     r12, [r0, #-4]!
  926         subs    r2, r2, #4
  927         bge     .Lmemmove_bsrcul1loop4
  928 
  929 .Lmemmove_bsrcul1l4:
  930         add     r1, r1, #1
  931         b       .Lmemmove_bl4
  932 
  933 #if !defined(_ARM_ARCH_5E)
  934 ENTRY(memcpy)
  935         /* save leaf functions having to store this away */
  936         /* Do not check arm_memcpy if we're running from flash */
  937 #ifdef FLASHADDR
  938 #if FLASHADDR > PHYSADDR
  939         ldr     r3, =FLASHADDR
  940         cmp     r3, pc
  941         bls     .Lnormal
  942 #else
  943         ldr     r3, =FLASHADDR
  944         cmp     r3, pc
  945         bhi     .Lnormal
  946 #endif
  947 #endif
  948         ldr     r3, .L_arm_memcpy
  949         ldr     r3, [r3]
  950         cmp     r3, #0
  951         beq     .Lnormal
  952         ldr     r3, .L_min_memcpy_size
  953         ldr     r3, [r3]
  954         cmp     r2, r3
  955         blt     .Lnormal
  956         stmfd   sp!, {r0-r2, r4, lr}
  957         mov     r3, #0
  958         ldr     r4, .L_arm_memcpy
  959         mov     lr, pc
  960         ldr     pc, [r4]
  961         cmp     r0, #0
  962         ldmfd   sp!, {r0-r2, r4, lr}
  963         RETeq
  964 
  965 .Lnormal:
  966         stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
  967 
  968         subs    r2, r2, #4
  969         blt     .Lmemcpy_l4             /* less than 4 bytes */
  970         ands    r12, r0, #3
  971         bne     .Lmemcpy_destul         /* oh unaligned destination addr */
  972         ands    r12, r1, #3
  973         bne     .Lmemcpy_srcul          /* oh unaligned source addr */
  974 
  975 .Lmemcpy_t8:
  976         /* We have aligned source and destination */
  977         subs    r2, r2, #8
  978         blt     .Lmemcpy_l12            /* less than 12 bytes (4 from above) */
  979         subs    r2, r2, #0x14         
  980         blt     .Lmemcpy_l32            /* less than 32 bytes (12 from above) */
  981         stmdb   sp!, {r4}               /* borrow r4 */
  982 
  983         /* blat 32 bytes at a time */
  984         /* XXX for really big copies perhaps we should use more registers */
  985 .Lmemcpy_loop32:        
  986         ldmia   r1!, {r3, r4, r12, lr}
  987         stmia   r0!, {r3, r4, r12, lr}
  988         ldmia   r1!, {r3, r4, r12, lr}
  989         stmia   r0!, {r3, r4, r12, lr}
  990         subs    r2, r2, #0x20         
  991         bge     .Lmemcpy_loop32
  992 
  993         cmn     r2, #0x10
  994         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  995         stmgeia r0!, {r3, r4, r12, lr}
  996         subge   r2, r2, #0x10         
  997         ldmia   sp!, {r4}               /* return r4 */
  998 
  999 .Lmemcpy_l32:
 1000         adds    r2, r2, #0x14         
 1001 
 1002         /* blat 12 bytes at a time */
 1003 .Lmemcpy_loop12:
 1004         ldmgeia r1!, {r3, r12, lr}
 1005         stmgeia r0!, {r3, r12, lr}
 1006         subges  r2, r2, #0x0c         
 1007         bge     .Lmemcpy_loop12
 1008 
 1009 .Lmemcpy_l12:
 1010         adds    r2, r2, #8
 1011         blt     .Lmemcpy_l4
 1012 
 1013         subs    r2, r2, #4
 1014         ldrlt   r3, [r1], #4
 1015         strlt   r3, [r0], #4
 1016         ldmgeia r1!, {r3, r12}
 1017         stmgeia r0!, {r3, r12}
 1018         subge   r2, r2, #4
 1019 
 1020 .Lmemcpy_l4:
 1021         /* less than 4 bytes to go */
 1022         adds    r2, r2, #4
 1023 #ifdef __APCS_26_
 1024         ldmeqia sp!, {r0, pc}^          /* done */
 1025 #else
 1026         ldmeqia sp!, {r0, pc}           /* done */
 1027 #endif
 1028         /* copy the crud byte at a time */
 1029         cmp     r2, #2
 1030         ldrb    r3, [r1], #1
 1031         strb    r3, [r0], #1
 1032         ldrgeb  r3, [r1], #1
 1033         strgeb  r3, [r0], #1
 1034         ldrgtb  r3, [r1], #1
 1035         strgtb  r3, [r0], #1
 1036         ldmia   sp!, {r0, pc}
 1037 
 1038         /* erg - unaligned destination */
 1039 .Lmemcpy_destul:
 1040         rsb     r12, r12, #4
 1041         cmp     r12, #2
 1042 
 1043         /* align destination with byte copies */
 1044         ldrb    r3, [r1], #1
 1045         strb    r3, [r0], #1
 1046         ldrgeb  r3, [r1], #1
 1047         strgeb  r3, [r0], #1
 1048         ldrgtb  r3, [r1], #1
 1049         strgtb  r3, [r0], #1
 1050         subs    r2, r2, r12
 1051         blt     .Lmemcpy_l4             /* less the 4 bytes */
 1052 
 1053         ands    r12, r1, #3
 1054         beq     .Lmemcpy_t8             /* we have an aligned source */
 1055 
 1056         /* erg - unaligned source */
 1057         /* This is where it gets nasty ... */
 1058 .Lmemcpy_srcul:
 1059         bic     r1, r1, #3
 1060         ldr     lr, [r1], #4
 1061         cmp     r12, #2
 1062         bgt     .Lmemcpy_srcul3
 1063         beq     .Lmemcpy_srcul2
 1064         cmp     r2, #0x0c            
 1065         blt     .Lmemcpy_srcul1loop4
 1066         sub     r2, r2, #0x0c         
 1067         stmdb   sp!, {r4, r5}
 1068 
 1069 .Lmemcpy_srcul1loop16:
 1070         mov     r3, lr, lsr #8
 1071         ldmia   r1!, {r4, r5, r12, lr}
 1072         orr     r3, r3, r4, lsl #24
 1073         mov     r4, r4, lsr #8
 1074         orr     r4, r4, r5, lsl #24
 1075         mov     r5, r5, lsr #8
 1076         orr     r5, r5, r12, lsl #24
 1077         mov     r12, r12, lsr #8
 1078         orr     r12, r12, lr, lsl #24
 1079         stmia   r0!, {r3-r5, r12}
 1080         subs    r2, r2, #0x10         
 1081         bge     .Lmemcpy_srcul1loop16
 1082         ldmia   sp!, {r4, r5}
 1083         adds    r2, r2, #0x0c         
 1084         blt     .Lmemcpy_srcul1l4
 1085 
 1086 .Lmemcpy_srcul1loop4:
 1087         mov     r12, lr, lsr #8
 1088         ldr     lr, [r1], #4
 1089         orr     r12, r12, lr, lsl #24
 1090         str     r12, [r0], #4
 1091         subs    r2, r2, #4
 1092         bge     .Lmemcpy_srcul1loop4
 1093 
 1094 .Lmemcpy_srcul1l4:
 1095         sub     r1, r1, #3
 1096         b       .Lmemcpy_l4
 1097 
 1098 .Lmemcpy_srcul2:
 1099         cmp     r2, #0x0c            
 1100         blt     .Lmemcpy_srcul2loop4
 1101         sub     r2, r2, #0x0c         
 1102         stmdb   sp!, {r4, r5}
 1103 
 1104 .Lmemcpy_srcul2loop16:
 1105         mov     r3, lr, lsr #16
 1106         ldmia   r1!, {r4, r5, r12, lr}
 1107         orr     r3, r3, r4, lsl #16
 1108         mov     r4, r4, lsr #16
 1109         orr     r4, r4, r5, lsl #16
 1110         mov     r5, r5, lsr #16
 1111         orr     r5, r5, r12, lsl #16
 1112         mov     r12, r12, lsr #16
 1113         orr     r12, r12, lr, lsl #16
 1114         stmia   r0!, {r3-r5, r12}
 1115         subs    r2, r2, #0x10         
 1116         bge     .Lmemcpy_srcul2loop16
 1117         ldmia   sp!, {r4, r5}
 1118         adds    r2, r2, #0x0c         
 1119         blt     .Lmemcpy_srcul2l4
 1120 
 1121 .Lmemcpy_srcul2loop4:
 1122         mov     r12, lr, lsr #16
 1123         ldr     lr, [r1], #4
 1124         orr     r12, r12, lr, lsl #16
 1125         str     r12, [r0], #4
 1126         subs    r2, r2, #4
 1127         bge     .Lmemcpy_srcul2loop4
 1128 
 1129 .Lmemcpy_srcul2l4:
 1130         sub     r1, r1, #2
 1131         b       .Lmemcpy_l4
 1132 
 1133 .Lmemcpy_srcul3:
 1134         cmp     r2, #0x0c            
 1135         blt     .Lmemcpy_srcul3loop4
 1136         sub     r2, r2, #0x0c         
 1137         stmdb   sp!, {r4, r5}
 1138 
 1139 .Lmemcpy_srcul3loop16:
 1140         mov     r3, lr, lsr #24
 1141         ldmia   r1!, {r4, r5, r12, lr}
 1142         orr     r3, r3, r4, lsl #8
 1143         mov     r4, r4, lsr #24
 1144         orr     r4, r4, r5, lsl #8
 1145         mov     r5, r5, lsr #24
 1146         orr     r5, r5, r12, lsl #8
 1147         mov     r12, r12, lsr #24
 1148         orr     r12, r12, lr, lsl #8
 1149         stmia   r0!, {r3-r5, r12}
 1150         subs    r2, r2, #0x10         
 1151         bge     .Lmemcpy_srcul3loop16
 1152         ldmia   sp!, {r4, r5}
 1153         adds    r2, r2, #0x0c         
 1154         blt     .Lmemcpy_srcul3l4
 1155 
 1156 .Lmemcpy_srcul3loop4:
 1157         mov     r12, lr, lsr #24
 1158         ldr     lr, [r1], #4
 1159         orr     r12, r12, lr, lsl #8
 1160         str     r12, [r0], #4
 1161         subs    r2, r2, #4
 1162         bge     .Lmemcpy_srcul3loop4
 1163 
 1164 .Lmemcpy_srcul3l4:
 1165         sub     r1, r1, #1
 1166         b       .Lmemcpy_l4
 1167 #else
 1168 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 1169 ENTRY(memcpy)
 1170         pld     [r1]
 1171         cmp     r2, #0x0c
 1172         ble     .Lmemcpy_short          /* <= 12 bytes */
 1173 #ifdef FLASHADDR
 1174 #if FLASHADDR > PHYSADDR
 1175         ldr     r3, =FLASHADDR
 1176         cmp     r3, pc
 1177         bls     .Lnormal
 1178 #else
 1179         ldr     r3, =FLASHADDR
 1180         cmp     r3, pc
 1181         bhi     .Lnormal
 1182 #endif
 1183 #endif
 1184         ldr     r3, .L_arm_memcpy
 1185         ldr     r3, [r3]
 1186         cmp     r3, #0
 1187         beq     .Lnormal
 1188         ldr     r3, .L_min_memcpy_size
 1189         ldr     r3, [r3]
 1190         cmp     r2, r3
 1191         blt     .Lnormal
 1192         stmfd   sp!, {r0-r2, r4, lr}
 1193         mov     r3, #0
 1194         ldr     r4, .L_arm_memcpy
 1195         mov     lr, pc
 1196         ldr     pc, [r4]
 1197         cmp     r0, #0
 1198         ldmfd   sp!, {r0-r2, r4, lr}
 1199         RETeq
 1200 .Lnormal:
 1201         mov     r3, r0                  /* We must not clobber r0 */
 1202 
 1203         /* Word-align the destination buffer */
 1204         ands    ip, r3, #0x03           /* Already word aligned? */
 1205         beq     .Lmemcpy_wordaligned    /* Yup */
 1206         cmp     ip, #0x02
 1207         ldrb    ip, [r1], #0x01
 1208         sub     r2, r2, #0x01
 1209         strb    ip, [r3], #0x01
 1210         ldrleb  ip, [r1], #0x01
 1211         suble   r2, r2, #0x01
 1212         strleb  ip, [r3], #0x01
 1213         ldrltb  ip, [r1], #0x01
 1214         sublt   r2, r2, #0x01
 1215         strltb  ip, [r3], #0x01
 1216 
 1217         /* Destination buffer is now word aligned */
 1218 .Lmemcpy_wordaligned:
 1219         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 1220         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 1221 
 1222         /* Quad-align the destination buffer */
 1223         tst     r3, #0x07               /* Already quad aligned? */
 1224         ldrne   ip, [r1], #0x04
 1225         stmfd   sp!, {r4-r9}            /* Free up some registers */
 1226         subne   r2, r2, #0x04
 1227         strne   ip, [r3], #0x04
 1228 
 1229         /* Destination buffer quad aligned, source is at least word aligned */
 1230         subs    r2, r2, #0x80
 1231         blt     .Lmemcpy_w_lessthan128
 1232 
 1233         /* Copy 128 bytes at a time */
 1234 .Lmemcpy_w_loop128:
 1235         ldr     r4, [r1], #0x04         /* LD:00-03 */
 1236         ldr     r5, [r1], #0x04         /* LD:04-07 */
 1237         pld     [r1, #0x18]             /* Prefetch 0x20 */
 1238         ldr     r6, [r1], #0x04         /* LD:08-0b */
 1239         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 1240         ldr     r8, [r1], #0x04         /* LD:10-13 */
 1241         ldr     r9, [r1], #0x04         /* LD:14-17 */
 1242         strd    r4, [r3], #0x08         /* ST:00-07 */
 1243         ldr     r4, [r1], #0x04         /* LD:18-1b */
 1244         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 1245         strd    r6, [r3], #0x08         /* ST:08-0f */
 1246         ldr     r6, [r1], #0x04         /* LD:20-23 */
 1247         ldr     r7, [r1], #0x04         /* LD:24-27 */
 1248         pld     [r1, #0x18]             /* Prefetch 0x40 */
 1249         strd    r8, [r3], #0x08         /* ST:10-17 */
 1250         ldr     r8, [r1], #0x04         /* LD:28-2b */
 1251         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 1252         strd    r4, [r3], #0x08         /* ST:18-1f */
 1253         ldr     r4, [r1], #0x04         /* LD:30-33 */
 1254         ldr     r5, [r1], #0x04         /* LD:34-37 */
 1255         strd    r6, [r3], #0x08         /* ST:20-27 */
 1256         ldr     r6, [r1], #0x04         /* LD:38-3b */
 1257         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 1258         strd    r8, [r3], #0x08         /* ST:28-2f */
 1259         ldr     r8, [r1], #0x04         /* LD:40-43 */
 1260         ldr     r9, [r1], #0x04         /* LD:44-47 */
 1261         pld     [r1, #0x18]             /* Prefetch 0x60 */
 1262         strd    r4, [r3], #0x08         /* ST:30-37 */
 1263         ldr     r4, [r1], #0x04         /* LD:48-4b */
 1264         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 1265         strd    r6, [r3], #0x08         /* ST:38-3f */
 1266         ldr     r6, [r1], #0x04         /* LD:50-53 */
 1267         ldr     r7, [r1], #0x04         /* LD:54-57 */
 1268         strd    r8, [r3], #0x08         /* ST:40-47 */
 1269         ldr     r8, [r1], #0x04         /* LD:58-5b */
 1270         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 1271         strd    r4, [r3], #0x08         /* ST:48-4f */
 1272         ldr     r4, [r1], #0x04         /* LD:60-63 */
 1273         ldr     r5, [r1], #0x04         /* LD:64-67 */
 1274         pld     [r1, #0x18]             /* Prefetch 0x80 */
 1275         strd    r6, [r3], #0x08         /* ST:50-57 */
 1276         ldr     r6, [r1], #0x04         /* LD:68-6b */
 1277         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 1278         strd    r8, [r3], #0x08         /* ST:58-5f */
 1279         ldr     r8, [r1], #0x04         /* LD:70-73 */
 1280         ldr     r9, [r1], #0x04         /* LD:74-77 */
 1281         strd    r4, [r3], #0x08         /* ST:60-67 */
 1282         ldr     r4, [r1], #0x04         /* LD:78-7b */
 1283         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 1284         strd    r6, [r3], #0x08         /* ST:68-6f */
 1285         strd    r8, [r3], #0x08         /* ST:70-77 */
 1286         subs    r2, r2, #0x80
 1287         strd    r4, [r3], #0x08         /* ST:78-7f */
 1288         bge     .Lmemcpy_w_loop128
 1289 
 1290 .Lmemcpy_w_lessthan128:
 1291         adds    r2, r2, #0x80           /* Adjust for extra sub */
 1292         ldmeqfd sp!, {r4-r9}
 1293         RETeq                   /* Return now if done */
 1294         subs    r2, r2, #0x20
 1295         blt     .Lmemcpy_w_lessthan32
 1296 
 1297         /* Copy 32 bytes at a time */
 1298 .Lmemcpy_w_loop32:
 1299         ldr     r4, [r1], #0x04
 1300         ldr     r5, [r1], #0x04
 1301         pld     [r1, #0x18]
 1302         ldr     r6, [r1], #0x04
 1303         ldr     r7, [r1], #0x04
 1304         ldr     r8, [r1], #0x04
 1305         ldr     r9, [r1], #0x04
 1306         strd    r4, [r3], #0x08
 1307         ldr     r4, [r1], #0x04
 1308         ldr     r5, [r1], #0x04
 1309         strd    r6, [r3], #0x08
 1310         strd    r8, [r3], #0x08
 1311         subs    r2, r2, #0x20
 1312         strd    r4, [r3], #0x08
 1313         bge     .Lmemcpy_w_loop32
 1314 
 1315 .Lmemcpy_w_lessthan32:
 1316         adds    r2, r2, #0x20           /* Adjust for extra sub */
 1317         ldmeqfd sp!, {r4-r9}
 1318         RETeq                   /* Return now if done */
 1319 
 1320         and     r4, r2, #0x18
 1321         rsbs    r4, r4, #0x18
 1322         addne   pc, pc, r4, lsl #1
 1323         nop
 1324 
 1325         /* At least 24 bytes remaining */
 1326         ldr     r4, [r1], #0x04
 1327         ldr     r5, [r1], #0x04
 1328         sub     r2, r2, #0x08
 1329         strd    r4, [r3], #0x08
 1330 
 1331         /* At least 16 bytes remaining */
 1332         ldr     r4, [r1], #0x04
 1333         ldr     r5, [r1], #0x04
 1334         sub     r2, r2, #0x08
 1335         strd    r4, [r3], #0x08
 1336 
 1337         /* At least 8 bytes remaining */
 1338         ldr     r4, [r1], #0x04
 1339         ldr     r5, [r1], #0x04
 1340         subs    r2, r2, #0x08
 1341         strd    r4, [r3], #0x08
 1342 
 1343         /* Less than 8 bytes remaining */
 1344         ldmfd   sp!, {r4-r9}
 1345         RETeq                   /* Return now if done */
 1346         subs    r2, r2, #0x04
 1347         ldrge   ip, [r1], #0x04
 1348         strge   ip, [r3], #0x04
 1349         RETeq                   /* Return now if done */
 1350         addlt   r2, r2, #0x04
 1351         ldrb    ip, [r1], #0x01
 1352         cmp     r2, #0x02
 1353         ldrgeb  r2, [r1], #0x01
 1354         strb    ip, [r3], #0x01
 1355         ldrgtb  ip, [r1]
 1356         strgeb  r2, [r3], #0x01
 1357         strgtb  ip, [r3]
 1358         RET
 1359 
 1360 
 1361 /*
 1362  * At this point, it has not been possible to word align both buffers.
 1363  * The destination buffer is word aligned, but the source buffer is not.
 1364  */
 1365 .Lmemcpy_bad_align:
 1366         stmfd   sp!, {r4-r7}
 1367         bic     r1, r1, #0x03
 1368         cmp     ip, #2
 1369         ldr     ip, [r1], #0x04
 1370         bgt     .Lmemcpy_bad3
 1371         beq     .Lmemcpy_bad2
 1372         b       .Lmemcpy_bad1
 1373 
 1374 .Lmemcpy_bad1_loop16:
 1375 #ifdef __ARMEB__
 1376         mov     r4, ip, lsl #8
 1377 #else
 1378         mov     r4, ip, lsr #8
 1379 #endif
 1380         ldr     r5, [r1], #0x04
 1381         pld     [r1, #0x018]
 1382         ldr     r6, [r1], #0x04
 1383         ldr     r7, [r1], #0x04
 1384         ldr     ip, [r1], #0x04
 1385 #ifdef __ARMEB__
 1386         orr     r4, r4, r5, lsr #24
 1387         mov     r5, r5, lsl #8
 1388         orr     r5, r5, r6, lsr #24
 1389         mov     r6, r6, lsl #8
 1390         orr     r6, r6, r7, lsr #24
 1391         mov     r7, r7, lsl #8
 1392         orr     r7, r7, ip, lsr #24
 1393 #else
 1394         orr     r4, r4, r5, lsl #24
 1395         mov     r5, r5, lsr #8
 1396         orr     r5, r5, r6, lsl #24
 1397         mov     r6, r6, lsr #8
 1398         orr     r6, r6, r7, lsl #24
 1399         mov     r7, r7, lsr #8
 1400         orr     r7, r7, ip, lsl #24
 1401 #endif
 1402         str     r4, [r3], #0x04
 1403         str     r5, [r3], #0x04
 1404         str     r6, [r3], #0x04
 1405         str     r7, [r3], #0x04
 1406 .Lmemcpy_bad1:
 1407         subs    r2, r2, #0x10         
 1408         bge     .Lmemcpy_bad1_loop16
 1409 
 1410         adds    r2, r2, #0x10         
 1411         ldmeqfd sp!, {r4-r7}
 1412         RETeq                   /* Return now if done */
 1413         subs    r2, r2, #0x04
 1414         sublt   r1, r1, #0x03
 1415         blt     .Lmemcpy_bad_done
 1416 
 1417 .Lmemcpy_bad1_loop4:
 1418 #ifdef __ARMEB__
 1419         mov     r4, ip, lsl #8
 1420 #else
 1421         mov     r4, ip, lsr #8
 1422 #endif
 1423         ldr     ip, [r1], #0x04
 1424         subs    r2, r2, #0x04
 1425 #ifdef __ARMEB__
 1426         orr     r4, r4, ip, lsr #24
 1427 #else
 1428         orr     r4, r4, ip, lsl #24
 1429 #endif
 1430         str     r4, [r3], #0x04
 1431         bge     .Lmemcpy_bad1_loop4
 1432         sub     r1, r1, #0x03
 1433         b       .Lmemcpy_bad_done
 1434 
 1435 .Lmemcpy_bad2_loop16:
 1436 #ifdef __ARMEB__
 1437         mov     r4, ip, lsl #16
 1438 #else
 1439         mov     r4, ip, lsr #16
 1440 #endif
 1441         ldr     r5, [r1], #0x04
 1442         pld     [r1, #0x018]
 1443         ldr     r6, [r1], #0x04
 1444         ldr     r7, [r1], #0x04
 1445         ldr     ip, [r1], #0x04
 1446 #ifdef __ARMEB__
 1447         orr     r4, r4, r5, lsr #16
 1448         mov     r5, r5, lsl #16
 1449         orr     r5, r5, r6, lsr #16
 1450         mov     r6, r6, lsl #16
 1451         orr     r6, r6, r7, lsr #16
 1452         mov     r7, r7, lsl #16
 1453         orr     r7, r7, ip, lsr #16
 1454 #else
 1455         orr     r4, r4, r5, lsl #16
 1456         mov     r5, r5, lsr #16
 1457         orr     r5, r5, r6, lsl #16
 1458         mov     r6, r6, lsr #16
 1459         orr     r6, r6, r7, lsl #16
 1460         mov     r7, r7, lsr #16
 1461         orr     r7, r7, ip, lsl #16
 1462 #endif
 1463         str     r4, [r3], #0x04
 1464         str     r5, [r3], #0x04
 1465         str     r6, [r3], #0x04
 1466         str     r7, [r3], #0x04
 1467 .Lmemcpy_bad2:
 1468         subs    r2, r2, #0x10         
 1469         bge     .Lmemcpy_bad2_loop16
 1470 
 1471         adds    r2, r2, #0x10         
 1472         ldmeqfd sp!, {r4-r7}
 1473         RETeq                   /* Return now if done */
 1474         subs    r2, r2, #0x04
 1475         sublt   r1, r1, #0x02
 1476         blt     .Lmemcpy_bad_done
 1477 
 1478 .Lmemcpy_bad2_loop4:
 1479 #ifdef __ARMEB__
 1480         mov     r4, ip, lsl #16
 1481 #else
 1482         mov     r4, ip, lsr #16
 1483 #endif
 1484         ldr     ip, [r1], #0x04
 1485         subs    r2, r2, #0x04
 1486 #ifdef __ARMEB__
 1487         orr     r4, r4, ip, lsr #16
 1488 #else
 1489         orr     r4, r4, ip, lsl #16
 1490 #endif
 1491         str     r4, [r3], #0x04
 1492         bge     .Lmemcpy_bad2_loop4
 1493         sub     r1, r1, #0x02
 1494         b       .Lmemcpy_bad_done
 1495 
 1496 .Lmemcpy_bad3_loop16:
 1497 #ifdef __ARMEB__
 1498         mov     r4, ip, lsl #24
 1499 #else
 1500         mov     r4, ip, lsr #24
 1501 #endif
 1502         ldr     r5, [r1], #0x04
 1503         pld     [r1, #0x018]
 1504         ldr     r6, [r1], #0x04
 1505         ldr     r7, [r1], #0x04
 1506         ldr     ip, [r1], #0x04
 1507 #ifdef __ARMEB__
 1508         orr     r4, r4, r5, lsr #8
 1509         mov     r5, r5, lsl #24
 1510         orr     r5, r5, r6, lsr #8
 1511         mov     r6, r6, lsl #24
 1512         orr     r6, r6, r7, lsr #8
 1513         mov     r7, r7, lsl #24
 1514         orr     r7, r7, ip, lsr #8
 1515 #else
 1516         orr     r4, r4, r5, lsl #8
 1517         mov     r5, r5, lsr #24
 1518         orr     r5, r5, r6, lsl #8
 1519         mov     r6, r6, lsr #24
 1520         orr     r6, r6, r7, lsl #8
 1521         mov     r7, r7, lsr #24
 1522         orr     r7, r7, ip, lsl #8
 1523 #endif
 1524         str     r4, [r3], #0x04
 1525         str     r5, [r3], #0x04
 1526         str     r6, [r3], #0x04
 1527         str     r7, [r3], #0x04
 1528 .Lmemcpy_bad3:
 1529         subs    r2, r2, #0x10         
 1530         bge     .Lmemcpy_bad3_loop16
 1531 
 1532         adds    r2, r2, #0x10         
 1533         ldmeqfd sp!, {r4-r7}
 1534         RETeq                   /* Return now if done */
 1535         subs    r2, r2, #0x04
 1536         sublt   r1, r1, #0x01
 1537         blt     .Lmemcpy_bad_done
 1538 
 1539 .Lmemcpy_bad3_loop4:
 1540 #ifdef __ARMEB__
 1541         mov     r4, ip, lsl #24
 1542 #else
 1543         mov     r4, ip, lsr #24
 1544 #endif
 1545         ldr     ip, [r1], #0x04
 1546         subs    r2, r2, #0x04
 1547 #ifdef __ARMEB__
 1548         orr     r4, r4, ip, lsr #8
 1549 #else
 1550         orr     r4, r4, ip, lsl #8
 1551 #endif
 1552         str     r4, [r3], #0x04
 1553         bge     .Lmemcpy_bad3_loop4
 1554         sub     r1, r1, #0x01
 1555 
 1556 .Lmemcpy_bad_done:
 1557         ldmfd   sp!, {r4-r7}
 1558         adds    r2, r2, #0x04
 1559         RETeq
 1560         ldrb    ip, [r1], #0x01
 1561         cmp     r2, #0x02
 1562         ldrgeb  r2, [r1], #0x01
 1563         strb    ip, [r3], #0x01
 1564         ldrgtb  ip, [r1]
 1565         strgeb  r2, [r3], #0x01
 1566         strgtb  ip, [r3]
 1567         RET
 1568 
 1569 
 1570 /*
 1571  * Handle short copies (less than 16 bytes), possibly misaligned.
 1572  * Some of these are *very* common, thanks to the network stack,
 1573  * and so are handled specially.
 1574  */
 1575 .Lmemcpy_short:
 1576         add     pc, pc, r2, lsl #2
 1577         nop
 1578         RET                     /* 0x00 */
 1579         b       .Lmemcpy_bytewise       /* 0x01 */
 1580         b       .Lmemcpy_bytewise       /* 0x02 */
 1581         b       .Lmemcpy_bytewise       /* 0x03 */
 1582         b       .Lmemcpy_4              /* 0x04 */
 1583         b       .Lmemcpy_bytewise       /* 0x05 */
 1584         b       .Lmemcpy_6              /* 0x06 */
 1585         b       .Lmemcpy_bytewise       /* 0x07 */
 1586         b       .Lmemcpy_8              /* 0x08 */
 1587         b       .Lmemcpy_bytewise       /* 0x09 */
 1588         b       .Lmemcpy_bytewise       /* 0x0a */
 1589         b       .Lmemcpy_bytewise       /* 0x0b */
 1590         b       .Lmemcpy_c              /* 0x0c */
 1591 .Lmemcpy_bytewise:
 1592         mov     r3, r0                  /* We must not clobber r0 */
 1593         ldrb    ip, [r1], #0x01
 1594 1:      subs    r2, r2, #0x01
 1595         strb    ip, [r3], #0x01
 1596         ldrneb  ip, [r1], #0x01
 1597         bne     1b
 1598         RET
 1599 
 1600 /******************************************************************************
 1601  * Special case for 4 byte copies
 1602  */
 1603 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1604 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1605         LMEMCPY_4_PAD
 1606 .Lmemcpy_4:
 1607         and     r2, r1, #0x03
 1608         orr     r2, r2, r0, lsl #2
 1609         ands    r2, r2, #0x0f
 1610         sub     r3, pc, #0x14
 1611         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1612 
 1613 /*
 1614  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1615  */
 1616         ldr     r2, [r1]
 1617         str     r2, [r0]
 1618         RET
 1619         LMEMCPY_4_PAD
 1620 
 1621 /*
 1622  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1623  */
 1624         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1625         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1626 #ifdef __ARMEB__
 1627         mov     r3, r3, lsl #8          /* r3 = 012. */
 1628         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 1629 #else
 1630         mov     r3, r3, lsr #8          /* r3 = .210 */
 1631         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1632 #endif
 1633         str     r3, [r0]
 1634         RET
 1635         LMEMCPY_4_PAD
 1636 
 1637 /*
 1638  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1639  */
 1640 #ifdef __ARMEB__
 1641         ldrh    r3, [r1]
 1642         ldrh    r2, [r1, #0x02]
 1643 #else
 1644         ldrh    r3, [r1, #0x02]
 1645         ldrh    r2, [r1]
 1646 #endif
 1647         orr     r3, r2, r3, lsl #16
 1648         str     r3, [r0]
 1649         RET
 1650         LMEMCPY_4_PAD
 1651 
 1652 /*
 1653  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1654  */
 1655         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1656         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1657 #ifdef __ARMEB__
 1658         mov     r3, r3, lsl #24         /* r3 = 0... */
 1659         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 1660 #else
 1661         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1662         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1663 #endif
 1664         str     r3, [r0]
 1665         RET
 1666         LMEMCPY_4_PAD
 1667 
 1668 /*
 1669  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1670  */
 1671         ldr     r2, [r1]
 1672 #ifdef __ARMEB__
 1673         strb    r2, [r0, #0x03]
 1674         mov     r3, r2, lsr #8
 1675         mov     r1, r2, lsr #24
 1676         strb    r1, [r0]
 1677 #else
 1678         strb    r2, [r0]
 1679         mov     r3, r2, lsr #8
 1680         mov     r1, r2, lsr #24
 1681         strb    r1, [r0, #0x03]
 1682 #endif
 1683         strh    r3, [r0, #0x01]
 1684         RET
 1685         LMEMCPY_4_PAD
 1686 
 1687 /*
 1688  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1689  */
 1690         ldrb    r2, [r1]
 1691         ldrh    r3, [r1, #0x01]
 1692         ldrb    r1, [r1, #0x03]
 1693         strb    r2, [r0]
 1694         strh    r3, [r0, #0x01]
 1695         strb    r1, [r0, #0x03]
 1696         RET
 1697         LMEMCPY_4_PAD
 1698 
 1699 /*
 1700  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1701  */
 1702         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1703         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1704 #ifdef __ARMEB__
 1705         mov     r1, r2, lsr #8          /* r1 = ...0 */
 1706         strb    r1, [r0]
 1707         mov     r2, r2, lsl #8          /* r2 = .01. */
 1708         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
 1709 #else
 1710         strb    r2, [r0]
 1711         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1712         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1713         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1714 #endif
 1715         strh    r2, [r0, #0x01]
 1716         strb    r3, [r0, #0x03]
 1717         RET
 1718         LMEMCPY_4_PAD
 1719 
 1720 /*
 1721  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1722  */
 1723         ldrb    r2, [r1]
 1724         ldrh    r3, [r1, #0x01]
 1725         ldrb    r1, [r1, #0x03]
 1726         strb    r2, [r0]
 1727         strh    r3, [r0, #0x01]
 1728         strb    r1, [r0, #0x03]
 1729         RET
 1730         LMEMCPY_4_PAD
 1731 
 1732 /*
 1733  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1734  */
 1735         ldr     r2, [r1]
 1736 #ifdef __ARMEB__
 1737         strh    r2, [r0, #0x02]
 1738         mov     r3, r2, lsr #16
 1739         strh    r3, [r0]
 1740 #else
 1741         strh    r2, [r0]
 1742         mov     r3, r2, lsr #16
 1743         strh    r3, [r0, #0x02]
 1744 #endif
 1745         RET
 1746         LMEMCPY_4_PAD
 1747 
 1748 /*
 1749  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1750  */
 1751         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1752         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1753         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1754         strh    r1, [r0]
 1755 #ifdef __ARMEB__
 1756         mov     r2, r2, lsl #8          /* r2 = 012. */
 1757         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1758 #else
 1759         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1760         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1761 #endif
 1762         strh    r2, [r0, #0x02]
 1763         RET
 1764         LMEMCPY_4_PAD
 1765 
 1766 /*
 1767  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1768  */
 1769         ldrh    r2, [r1]
 1770         ldrh    r3, [r1, #0x02]
 1771         strh    r2, [r0]
 1772         strh    r3, [r0, #0x02]
 1773         RET
 1774         LMEMCPY_4_PAD
 1775 
 1776 /*
 1777  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1778  */
 1779         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1780         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1781         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1782         strh    r1, [r0, #0x02]
 1783 #ifdef __ARMEB__
 1784         mov     r3, r3, lsr #24         /* r3 = ...1 */
 1785         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
 1786 #else
 1787         mov     r3, r3, lsl #8          /* r3 = 321. */
 1788         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1789 #endif
 1790         strh    r3, [r0]
 1791         RET
 1792         LMEMCPY_4_PAD
 1793 
 1794 /*
 1795  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1796  */
 1797         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1798 #ifdef __ARMEB__
 1799         strb    r2, [r0, #0x03]
 1800         mov     r3, r2, lsr #8
 1801         mov     r1, r2, lsr #24
 1802         strh    r3, [r0, #0x01]
 1803         strb    r1, [r0]
 1804 #else
 1805         strb    r2, [r0]
 1806         mov     r3, r2, lsr #8
 1807         mov     r1, r2, lsr #24
 1808         strh    r3, [r0, #0x01]
 1809         strb    r1, [r0, #0x03]
 1810 #endif
 1811         RET
 1812         LMEMCPY_4_PAD
 1813 
 1814 /*
 1815  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1816  */
 1817         ldrb    r2, [r1]
 1818         ldrh    r3, [r1, #0x01]
 1819         ldrb    r1, [r1, #0x03]
 1820         strb    r2, [r0]
 1821         strh    r3, [r0, #0x01]
 1822         strb    r1, [r0, #0x03]
 1823         RET
 1824         LMEMCPY_4_PAD
 1825 
 1826 /*
 1827  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1828  */
 1829 #ifdef __ARMEB__
 1830         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1831         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1832         strb    r3, [r0, #0x03]
 1833         mov     r3, r3, lsr #8          /* r3 = ...2 */
 1834         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
 1835         strh    r3, [r0, #0x01]
 1836         mov     r2, r2, lsr #8          /* r2 = ...0 */
 1837         strb    r2, [r0]
 1838 #else
 1839         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1840         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1841         strb    r2, [r0]
 1842         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1843         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1844         strh    r2, [r0, #0x01]
 1845         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1846         strb    r3, [r0, #0x03]
 1847 #endif
 1848         RET
 1849         LMEMCPY_4_PAD
 1850 
 1851 /*
 1852  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1853  */
 1854         ldrb    r2, [r1]
 1855         ldrh    r3, [r1, #0x01]
 1856         ldrb    r1, [r1, #0x03]
 1857         strb    r2, [r0]
 1858         strh    r3, [r0, #0x01]
 1859         strb    r1, [r0, #0x03]
 1860         RET
 1861         LMEMCPY_4_PAD
 1862 
 1863 
 1864 /******************************************************************************
 1865  * Special case for 6 byte copies
 1866  */
 1867 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1868 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1869         LMEMCPY_6_PAD
 1870 .Lmemcpy_6:
 1871         and     r2, r1, #0x03
 1872         orr     r2, r2, r0, lsl #2
 1873         ands    r2, r2, #0x0f
 1874         sub     r3, pc, #0x14
 1875         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1876 
 1877 /*
 1878  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1879  */
 1880         ldr     r2, [r1]
 1881         ldrh    r3, [r1, #0x04]
 1882         str     r2, [r0]
 1883         strh    r3, [r0, #0x04]
 1884         RET
 1885         LMEMCPY_6_PAD
 1886 
 1887 /*
 1888  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1889  */
 1890         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1891         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1892 #ifdef __ARMEB__
 1893         mov     r2, r2, lsl #8          /* r2 = 012. */
 1894         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1895 #else
 1896         mov     r2, r2, lsr #8          /* r2 = .210 */
 1897         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1898 #endif
 1899         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1900         str     r2, [r0]
 1901         strh    r3, [r0, #0x04]
 1902         RET
 1903         LMEMCPY_6_PAD
 1904 
 1905 /*
 1906  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1907  */
 1908         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1909         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1910 #ifdef __ARMEB__
 1911         mov     r1, r3, lsr #16         /* r1 = ..23 */
 1912         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
 1913         str     r1, [r0]
 1914         strh    r3, [r0, #0x04]
 1915 #else
 1916         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1917         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1918         str     r2, [r0]
 1919         strh    r1, [r0, #0x04]
 1920 #endif
 1921         RET
 1922         LMEMCPY_6_PAD
 1923 
 1924 /*
 1925  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1926  */
 1927         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1928         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1929         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1930 #ifdef __ARMEB__
 1931         mov     r2, r2, lsl #24         /* r2 = 0... */
 1932         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 1933         mov     r3, r3, lsl #8          /* r3 = 234. */
 1934         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
 1935 #else
 1936         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1937         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1938         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1939         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1940 #endif
 1941         str     r2, [r0]
 1942         strh    r1, [r0, #0x04]
 1943         RET
 1944         LMEMCPY_6_PAD
 1945 
 1946 /*
 1947  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1948  */
 1949         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1950         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1951         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1952         strh    r1, [r0, #0x01]
 1953 #ifdef __ARMEB__
 1954         mov     r1, r3, lsr #24         /* r1 = ...0 */
 1955         strb    r1, [r0]
 1956         mov     r3, r3, lsl #8          /* r3 = 123. */
 1957         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
 1958 #else
 1959         strb    r3, [r0]
 1960         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1961         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1962         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1963 #endif
 1964         strh    r3, [r0, #0x03]
 1965         strb    r2, [r0, #0x05]
 1966         RET
 1967         LMEMCPY_6_PAD
 1968 
 1969 /*
 1970  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1971  */
 1972         ldrb    r2, [r1]
 1973         ldrh    r3, [r1, #0x01]
 1974         ldrh    ip, [r1, #0x03]
 1975         ldrb    r1, [r1, #0x05]
 1976         strb    r2, [r0]
 1977         strh    r3, [r0, #0x01]
 1978         strh    ip, [r0, #0x03]
 1979         strb    r1, [r0, #0x05]
 1980         RET
 1981         LMEMCPY_6_PAD
 1982 
 1983 /*
 1984  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1985  */
 1986         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1987         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1988 #ifdef __ARMEB__
 1989         mov     r3, r2, lsr #8          /* r3 = ...0 */
 1990         strb    r3, [r0]
 1991         strb    r1, [r0, #0x05]
 1992         mov     r3, r1, lsr #8          /* r3 = .234 */
 1993         strh    r3, [r0, #0x03]
 1994         mov     r3, r2, lsl #8          /* r3 = .01. */
 1995         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
 1996         strh    r3, [r0, #0x01]
 1997 #else
 1998         strb    r2, [r0]
 1999         mov     r3, r1, lsr #24
 2000         strb    r3, [r0, #0x05]
 2001         mov     r3, r1, lsr #8          /* r3 = .543 */
 2002         strh    r3, [r0, #0x03]
 2003         mov     r3, r2, lsr #8          /* r3 = ...1 */
 2004         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 2005         strh    r3, [r0, #0x01]
 2006 #endif
 2007         RET
 2008         LMEMCPY_6_PAD
 2009 
 2010 /*
 2011  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2012  */
 2013         ldrb    r2, [r1]
 2014         ldrh    r3, [r1, #0x01]
 2015         ldrh    ip, [r1, #0x03]
 2016         ldrb    r1, [r1, #0x05]
 2017         strb    r2, [r0]
 2018         strh    r3, [r0, #0x01]
 2019         strh    ip, [r0, #0x03]
 2020         strb    r1, [r0, #0x05]
 2021         RET
 2022         LMEMCPY_6_PAD
 2023 
 2024 /*
 2025  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2026  */
 2027 #ifdef __ARMEB__
 2028         ldr     r2, [r1]                /* r2 = 0123 */
 2029         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
 2030         mov     r1, r2, lsr #16         /* r1 = ..01 */
 2031         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
 2032         strh    r1, [r0]
 2033         str     r3, [r0, #0x02]
 2034 #else
 2035         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 2036         ldr     r3, [r1]                /* r3 = 3210 */
 2037         mov     r2, r2, lsl #16         /* r2 = 54.. */
 2038         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 2039         strh    r3, [r0]
 2040         str     r2, [r0, #0x02]
 2041 #endif
 2042         RET
 2043         LMEMCPY_6_PAD
 2044 
 2045 /*
 2046  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2047  */
 2048         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2049         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 2050         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2051 #ifdef __ARMEB__
 2052         mov     r2, r2, lsr #8          /* r2 = .345 */
 2053         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
 2054 #else
 2055         mov     r2, r2, lsl #8          /* r2 = 543. */
 2056         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 2057 #endif
 2058         strh    r1, [r0]
 2059         str     r2, [r0, #0x02]
 2060         RET
 2061         LMEMCPY_6_PAD
 2062 
 2063 /*
 2064  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2065  */
 2066         ldrh    r2, [r1]
 2067         ldr     r3, [r1, #0x02]
 2068         strh    r2, [r0]
 2069         str     r3, [r0, #0x02]
 2070         RET
 2071         LMEMCPY_6_PAD
 2072 
 2073 /*
 2074  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2075  */
 2076         ldrb    r3, [r1]                /* r3 = ...0 */
 2077         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2078         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 2079 #ifdef __ARMEB__
 2080         mov     r3, r3, lsl #8          /* r3 = ..0. */
 2081         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
 2082         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
 2083 #else
 2084         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2085         mov     r1, r1, lsl #24         /* r1 = 5... */
 2086         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 2087 #endif
 2088         strh    r3, [r0]
 2089         str     r1, [r0, #0x02]
 2090         RET
 2091         LMEMCPY_6_PAD
 2092 
 2093 /*
 2094  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2095  */
 2096         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2097         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 2098 #ifdef __ARMEB__
 2099         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2100         strb    r3, [r0]
 2101         mov     r2, r2, lsl #8          /* r2 = 123. */
 2102         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2103 #else
 2104         strb    r2, [r0]
 2105         mov     r2, r2, lsr #8          /* r2 = .321 */
 2106         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 2107         mov     r1, r1, lsr #8          /* r1 = ...5 */
 2108 #endif
 2109         str     r2, [r0, #0x01]
 2110         strb    r1, [r0, #0x05]
 2111         RET
 2112         LMEMCPY_6_PAD
 2113 
 2114 /*
 2115  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2116  */
 2117         ldrb    r2, [r1]
 2118         ldrh    r3, [r1, #0x01]
 2119         ldrh    ip, [r1, #0x03]
 2120         ldrb    r1, [r1, #0x05]
 2121         strb    r2, [r0]
 2122         strh    r3, [r0, #0x01]
 2123         strh    ip, [r0, #0x03]
 2124         strb    r1, [r0, #0x05]
 2125         RET
 2126         LMEMCPY_6_PAD
 2127 
 2128 /*
 2129  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2130  */
 2131         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2132         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 2133 #ifdef __ARMEB__
 2134         mov     r3, r2, lsr #8          /* r3 = ...0 */
 2135         strb    r3, [r0]
 2136         mov     r2, r2, lsl #24         /* r2 = 1... */
 2137         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2138 #else
 2139         strb    r2, [r0]
 2140         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2141         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 2142         mov     r1, r1, lsr #24         /* r1 = ...5 */
 2143 #endif
 2144         str     r2, [r0, #0x01]
 2145         strb    r1, [r0, #0x05]
 2146         RET
 2147         LMEMCPY_6_PAD
 2148 
 2149 /*
 2150  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2151  */
 2152         ldrb    r2, [r1]
 2153         ldr     r3, [r1, #0x01]
 2154         ldrb    r1, [r1, #0x05]
 2155         strb    r2, [r0]
 2156         str     r3, [r0, #0x01]
 2157         strb    r1, [r0, #0x05]
 2158         RET
 2159         LMEMCPY_6_PAD
 2160 
 2161 
 2162 /******************************************************************************
 2163  * Special case for 8 byte copies
 2164  */
 2165 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 2166 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 2167         LMEMCPY_8_PAD
 2168 .Lmemcpy_8:
 2169         and     r2, r1, #0x03
 2170         orr     r2, r2, r0, lsl #2
 2171         ands    r2, r2, #0x0f
 2172         sub     r3, pc, #0x14
 2173         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 2174 
 2175 /*
 2176  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2177  */
 2178         ldr     r2, [r1]
 2179         ldr     r3, [r1, #0x04]
 2180         str     r2, [r0]
 2181         str     r3, [r0, #0x04]
 2182         RET
 2183         LMEMCPY_8_PAD
 2184 
 2185 /*
 2186  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2187  */
 2188         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2189         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 2190         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2191 #ifdef __ARMEB__
 2192         mov     r3, r3, lsl #8          /* r3 = 012. */
 2193         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 2194         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
 2195 #else
 2196         mov     r3, r3, lsr #8          /* r3 = .210 */
 2197         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 2198         mov     r1, r1, lsl #24         /* r1 = 7... */
 2199         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 2200 #endif
 2201         str     r3, [r0]
 2202         str     r2, [r0, #0x04]
 2203         RET
 2204         LMEMCPY_8_PAD
 2205 
 2206 /*
 2207  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2208  */
 2209         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2210         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2211         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2212 #ifdef __ARMEB__
 2213         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2214         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2215         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
 2216 #else
 2217         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2218         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2219         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 2220 #endif
 2221         str     r2, [r0]
 2222         str     r3, [r0, #0x04]
 2223         RET
 2224         LMEMCPY_8_PAD
 2225 
 2226 /*
 2227  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2228  */
 2229         ldrb    r3, [r1]                /* r3 = ...0 */
 2230         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2231         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 2232 #ifdef __ARMEB__
 2233         mov     r3, r3, lsl #24         /* r3 = 0... */
 2234         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 2235         mov     r2, r2, lsl #24         /* r2 = 4... */
 2236         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
 2237 #else
 2238         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2239         mov     r2, r2, lsr #24         /* r2 = ...4 */
 2240         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 2241 #endif
 2242         str     r3, [r0]
 2243         str     r2, [r0, #0x04]
 2244         RET
 2245         LMEMCPY_8_PAD
 2246 
 2247 /*
 2248  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 2249  */
 2250         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 2251         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 2252 #ifdef __ARMEB__
 2253         mov     r1, r3, lsr #24         /* r1 = ...0 */
 2254         strb    r1, [r0]
 2255         mov     r1, r3, lsr #8          /* r1 = .012 */
 2256         strb    r2, [r0, #0x07]
 2257         mov     r3, r3, lsl #24         /* r3 = 3... */
 2258         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
 2259 #else
 2260         strb    r3, [r0]
 2261         mov     r1, r2, lsr #24         /* r1 = ...7 */
 2262         strb    r1, [r0, #0x07]
 2263         mov     r1, r3, lsr #8          /* r1 = .321 */
 2264         mov     r3, r3, lsr #24         /* r3 = ...3 */
 2265         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 2266 #endif
 2267         strh    r1, [r0, #0x01]
 2268         str     r3, [r0, #0x03]
 2269         RET
 2270         LMEMCPY_8_PAD
 2271 
 2272 /*
 2273  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 2274  */
 2275         ldrb    r2, [r1]
 2276         ldrh    r3, [r1, #0x01]
 2277         ldr     ip, [r1, #0x03]
 2278         ldrb    r1, [r1, #0x07]
 2279         strb    r2, [r0]
 2280         strh    r3, [r0, #0x01]
 2281         str     ip, [r0, #0x03]
 2282         strb    r1, [r0, #0x07]
 2283         RET
 2284         LMEMCPY_8_PAD
 2285 
 2286 /*
 2287  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 2288  */
 2289         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2290         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2291         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2292 #ifdef __ARMEB__
 2293         mov     ip, r2, lsr #8          /* ip = ...0 */
 2294         strb    ip, [r0]
 2295         mov     ip, r2, lsl #8          /* ip = .01. */
 2296         orr     ip, ip, r3, lsr #24     /* ip = .012 */
 2297         strb    r1, [r0, #0x07]
 2298         mov     r3, r3, lsl #8          /* r3 = 345. */
 2299         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
 2300 #else
 2301         strb    r2, [r0]                /* 0 */
 2302         mov     ip, r1, lsr #8          /* ip = ...7 */
 2303         strb    ip, [r0, #0x07]         /* 7 */
 2304         mov     ip, r2, lsr #8          /* ip = ...1 */
 2305         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2306         mov     r3, r3, lsr #8          /* r3 = .543 */
 2307         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 2308 #endif
 2309         strh    ip, [r0, #0x01]
 2310         str     r3, [r0, #0x03]
 2311         RET
 2312         LMEMCPY_8_PAD
 2313 
 2314 /*
 2315  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2316  */
 2317         ldrb    r3, [r1]                /* r3 = ...0 */
 2318         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2319         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 2320         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2321         strb    r3, [r0]
 2322         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 2323 #ifdef __ARMEB__
 2324         strh    r3, [r0, #0x01]
 2325         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
 2326 #else
 2327         strh    ip, [r0, #0x01]
 2328         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 2329 #endif
 2330         str     r2, [r0, #0x03]
 2331         strb    r1, [r0, #0x07]
 2332         RET
 2333         LMEMCPY_8_PAD
 2334 
 2335 /*
 2336  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2337  */
 2338         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2339         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2340         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2341 #ifdef __ARMEB__
 2342         strh    r1, [r0]
 2343         mov     r1, r3, lsr #16         /* r1 = ..45 */
 2344         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
 2345 #else
 2346         strh    r2, [r0]
 2347         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 2348         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2349 #endif
 2350         str     r2, [r0, #0x02]
 2351         strh    r3, [r0, #0x06]
 2352         RET
 2353         LMEMCPY_8_PAD
 2354 
 2355 /*
 2356  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2357  */
 2358         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2359         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2360         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 2361         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2362         strh    r1, [r0]
 2363 #ifdef __ARMEB__
 2364         mov     r1, r2, lsl #24         /* r1 = 2... */
 2365         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
 2366         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
 2367 #else
 2368         mov     r1, r2, lsr #24         /* r1 = ...2 */
 2369         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 2370         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2371         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 2372 #endif
 2373         str     r1, [r0, #0x02]
 2374         strh    r3, [r0, #0x06]
 2375         RET
 2376         LMEMCPY_8_PAD
 2377 
 2378 /*
 2379  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2380  */
 2381         ldrh    r2, [r1]
 2382         ldr     ip, [r1, #0x02]
 2383         ldrh    r3, [r1, #0x06]
 2384         strh    r2, [r0]
 2385         str     ip, [r0, #0x02]
 2386         strh    r3, [r0, #0x06]
 2387         RET
 2388         LMEMCPY_8_PAD
 2389 
 2390 /*
 2391  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2392  */
 2393         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 2394         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2395         ldrb    ip, [r1]                /* ip = ...0 */
 2396         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 2397         strh    r1, [r0, #0x06]
 2398 #ifdef __ARMEB__
 2399         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2400         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
 2401         mov     r2, r2, lsr #24         /* r2 = ...1 */
 2402         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
 2403 #else
 2404         mov     r3, r3, lsl #24         /* r3 = 5... */
 2405         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 2406         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 2407 #endif
 2408         str     r3, [r0, #0x02]
 2409         strh    r2, [r0]
 2410         RET
 2411         LMEMCPY_8_PAD
 2412 
 2413 /*
 2414  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2415  */
 2416         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2417         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2418         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 2419         strh    r1, [r0, #0x05]
 2420 #ifdef __ARMEB__
 2421         strb    r3, [r0, #0x07]
 2422         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2423         strb    r1, [r0]
 2424         mov     r2, r2, lsl #8          /* r2 = 123. */
 2425         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
 2426         str     r2, [r0, #0x01]
 2427 #else
 2428         strb    r2, [r0]
 2429         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2430         strb    r1, [r0, #0x07]
 2431         mov     r2, r2, lsr #8          /* r2 = .321 */
 2432         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 2433         str     r2, [r0, #0x01]
 2434 #endif
 2435         RET
 2436         LMEMCPY_8_PAD
 2437 
 2438 /*
 2439  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2440  */
 2441         ldrb    r3, [r1]                /* r3 = ...0 */
 2442         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 2443         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2444         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2445         strb    r3, [r0]
 2446         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 2447 #ifdef __ARMEB__
 2448         strh    ip, [r0, #0x05]
 2449         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
 2450 #else
 2451         strh    r3, [r0, #0x05]
 2452         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 2453 #endif
 2454         str     r2, [r0, #0x01]
 2455         strb    r1, [r0, #0x07]
 2456         RET
 2457         LMEMCPY_8_PAD
 2458 
 2459 /*
 2460  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2461  */
 2462         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2463         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2464         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2465 #ifdef __ARMEB__
 2466         mov     ip, r2, lsr #8          /* ip = ...0 */
 2467         strb    ip, [r0]
 2468         mov     ip, r2, lsl #24         /* ip = 1... */
 2469         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
 2470         strb    r1, [r0, #0x07]
 2471         mov     r1, r1, lsr #8          /* r1 = ...6 */
 2472         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
 2473 #else
 2474         strb    r2, [r0]
 2475         mov     ip, r2, lsr #8          /* ip = ...1 */
 2476         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2477         mov     r2, r1, lsr #8          /* r2 = ...7 */
 2478         strb    r2, [r0, #0x07]
 2479         mov     r1, r1, lsl #8          /* r1 = .76. */
 2480         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 2481 #endif
 2482         str     ip, [r0, #0x01]
 2483         strh    r1, [r0, #0x05]
 2484         RET
 2485         LMEMCPY_8_PAD
 2486 
 2487 /*
 2488  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2489  */
 2490         ldrb    r2, [r1]
 2491         ldr     ip, [r1, #0x01]
 2492         ldrh    r3, [r1, #0x05]
 2493         ldrb    r1, [r1, #0x07]
 2494         strb    r2, [r0]
 2495         str     ip, [r0, #0x01]
 2496         strh    r3, [r0, #0x05]
 2497         strb    r1, [r0, #0x07]
 2498         RET
 2499         LMEMCPY_8_PAD
 2500 
 2501 /******************************************************************************
 2502  * Special case for 12 byte copies
 2503  */
 2504 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 2505 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 2506         LMEMCPY_C_PAD
 2507 .Lmemcpy_c:
 2508         and     r2, r1, #0x03
 2509         orr     r2, r2, r0, lsl #2
 2510         ands    r2, r2, #0x0f
 2511         sub     r3, pc, #0x14
 2512         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 2513 
 2514 /*
 2515  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2516  */
 2517         ldr     r2, [r1]
 2518         ldr     r3, [r1, #0x04]
 2519         ldr     r1, [r1, #0x08]
 2520         str     r2, [r0]
 2521         str     r3, [r0, #0x04]
 2522         str     r1, [r0, #0x08]
 2523         RET
 2524         LMEMCPY_C_PAD
 2525 
 2526 /*
 2527  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2528  */
 2529         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 2530         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2531         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2532         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2533 #ifdef __ARMEB__
 2534         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
 2535         str     r2, [r0, #0x08]
 2536         mov     r2, ip, lsr #24         /* r2 = ...7 */
 2537         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
 2538         mov     r1, r1, lsl #8          /* r1 = 012. */
 2539         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
 2540 #else
 2541         mov     r2, r2, lsl #24         /* r2 = B... */
 2542         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 2543         str     r2, [r0, #0x08]
 2544         mov     r2, ip, lsl #24         /* r2 = 7... */
 2545         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 2546         mov     r1, r1, lsr #8          /* r1 = .210 */
 2547         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 2548 #endif
 2549         str     r2, [r0, #0x04]
 2550         str     r1, [r0]
 2551         RET
 2552         LMEMCPY_C_PAD
 2553 
 2554 /*
 2555  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2556  */
 2557         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2558         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2559         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2560         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2561 #ifdef __ARMEB__
 2562         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2563         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2564         str     r2, [r0]
 2565         mov     r3, r3, lsl #16         /* r3 = 45.. */
 2566         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
 2567         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
 2568 #else
 2569         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2570         str     r2, [r0]
 2571         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2572         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 2573         mov     r1, r1, lsl #16         /* r1 = BA.. */
 2574         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 2575 #endif
 2576         str     r3, [r0, #0x04]
 2577         str     r1, [r0, #0x08]
 2578         RET
 2579         LMEMCPY_C_PAD
 2580 
 2581 /*
 2582  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2583  */
 2584         ldrb    r2, [r1]                /* r2 = ...0 */
 2585         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2586         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2587         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2588 #ifdef __ARMEB__
 2589         mov     r2, r2, lsl #24         /* r2 = 0... */
 2590         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 2591         str     r2, [r0]
 2592         mov     r3, r3, lsl #24         /* r3 = 4... */
 2593         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
 2594         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2595         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
 2596 #else
 2597         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 2598         str     r2, [r0]
 2599         mov     r3, r3, lsr #24         /* r3 = ...4 */
 2600         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 2601         mov     r1, r1, lsl #8          /* r1 = BA9. */
 2602         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 2603 #endif
 2604         str     r3, [r0, #0x04]
 2605         str     r1, [r0, #0x08]
 2606         RET
 2607         LMEMCPY_C_PAD
 2608 
 2609 /*
 2610  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 2611  */
 2612         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2613         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2614         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 2615         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 2616         strh    r1, [r0, #0x01]
 2617 #ifdef __ARMEB__
 2618         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2619         strb    r1, [r0]
 2620         mov     r1, r2, lsl #24         /* r1 = 3... */
 2621         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
 2622         mov     r1, r3, lsl #24         /* r1 = 7... */
 2623         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
 2624 #else
 2625         strb    r2, [r0]
 2626         mov     r1, r2, lsr #24         /* r1 = ...3 */
 2627         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 2628         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2629         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 2630         mov     ip, ip, lsr #24         /* ip = ...B */
 2631 #endif
 2632         str     r2, [r0, #0x03]
 2633         str     r1, [r0, #0x07]
 2634         strb    ip, [r0, #0x0b]
 2635         RET
 2636         LMEMCPY_C_PAD
 2637 
 2638 /*
 2639  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 2640  */
 2641         ldrb    r2, [r1]
 2642         ldrh    r3, [r1, #0x01]
 2643         ldr     ip, [r1, #0x03]
 2644         strb    r2, [r0]
 2645         ldr     r2, [r1, #0x07]
 2646         ldrb    r1, [r1, #0x0b]
 2647         strh    r3, [r0, #0x01]
 2648         str     ip, [r0, #0x03]
 2649         str     r2, [r0, #0x07]
 2650         strb    r1, [r0, #0x0b]
 2651         RET
 2652         LMEMCPY_C_PAD
 2653 
 2654 /*
 2655  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 2656  */
 2657         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2658         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2659         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2660         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2661 #ifdef __ARMEB__
 2662         mov     r2, r2, ror #8          /* r2 = 1..0 */
 2663         strb    r2, [r0]
 2664         mov     r2, r2, lsr #16         /* r2 = ..1. */
 2665         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
 2666         strh    r2, [r0, #0x01]
 2667         mov     r2, r3, lsl #8          /* r2 = 345. */
 2668         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
 2669         mov     r2, ip, lsl #8          /* r2 = 789. */
 2670         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
 2671 #else
 2672         strb    r2, [r0]
 2673         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2674         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2675         strh    r2, [r0, #0x01]
 2676         mov     r2, r3, lsr #8          /* r2 = .543 */
 2677         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 2678         mov     r2, ip, lsr #8          /* r2 = .987 */
 2679         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 2680         mov     r1, r1, lsr #8          /* r1 = ...B */
 2681 #endif
 2682         str     r3, [r0, #0x03]
 2683         str     r2, [r0, #0x07]
 2684         strb    r1, [r0, #0x0b]
 2685         RET
 2686         LMEMCPY_C_PAD
 2687 
 2688 /*
 2689  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 2690  */
 2691         ldrb    r2, [r1]
 2692         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2693         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2694         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2695         strb    r2, [r0]
 2696 #ifdef __ARMEB__
 2697         mov     r2, r3, lsr #16         /* r2 = ..12 */
 2698         strh    r2, [r0, #0x01]
 2699         mov     r3, r3, lsl #16         /* r3 = 34.. */
 2700         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
 2701         mov     ip, ip, lsl #16         /* ip = 78.. */
 2702         orr     ip, ip, r1, lsr #16     /* ip = 789A */
 2703         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2704 #else
 2705         strh    r3, [r0, #0x01]
 2706         mov     r3, r3, lsr #16         /* r3 = ..43 */
 2707         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 2708         mov     ip, ip, lsr #16         /* ip = ..87 */
 2709         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 2710         mov     r1, r1, lsr #16         /* r1 = ..xB */
 2711 #endif
 2712         str     r3, [r0, #0x03]
 2713         str     ip, [r0, #0x07]
 2714         strb    r1, [r0, #0x0b]
 2715         RET
 2716         LMEMCPY_C_PAD
 2717 
 2718 /*
 2719  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2720  */
 2721         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 2722         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2723         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 2724         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2725 #ifdef __ARMEB__
 2726         strh    r1, [r0]
 2727         mov     r1, ip, lsl #16         /* r1 = 23.. */
 2728         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
 2729         mov     r3, r3, lsl #16         /* r3 = 67.. */
 2730         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
 2731 #else
 2732         strh    ip, [r0]
 2733         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 2734         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2735         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 2736         mov     r2, r2, lsr #16         /* r2 = ..BA */
 2737 #endif
 2738         str     r1, [r0, #0x02]
 2739         str     r3, [r0, #0x06]
 2740         strh    r2, [r0, #0x0a]
 2741         RET
 2742         LMEMCPY_C_PAD
 2743 
 2744 /*
 2745  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 2746  */
 2747         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2748         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2749         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 2750         strh    ip, [r0]
 2751         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2752         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 2753 #ifdef __ARMEB__
 2754         mov     r2, r2, lsl #24         /* r2 = 2... */
 2755         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
 2756         mov     r3, r3, lsl #24         /* r3 = 6... */
 2757         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
 2758         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
 2759 #else
 2760         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2761         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2762         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2763         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2764         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2765         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2766 #endif
 2767         str     r2, [r0, #0x02]
 2768         str     r3, [r0, #0x06]
 2769         strh    r1, [r0, #0x0a]
 2770         RET
 2771         LMEMCPY_C_PAD
 2772 
 2773 /*
 2774  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2775  */
 2776         ldrh    r2, [r1]
 2777         ldr     r3, [r1, #0x02]
 2778         ldr     ip, [r1, #0x06]
 2779         ldrh    r1, [r1, #0x0a]
 2780         strh    r2, [r0]
 2781         str     r3, [r0, #0x02]
 2782         str     ip, [r0, #0x06]
 2783         strh    r1, [r0, #0x0a]
 2784         RET
 2785         LMEMCPY_C_PAD
 2786 
 2787 /*
 2788  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2789  */
 2790         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2791         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2792         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2793         strh    ip, [r0, #0x0a]
 2794         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2795         ldrb    r1, [r1]                /* r1 = ...0 */
 2796 #ifdef __ARMEB__
 2797         mov     r2, r2, lsr #24         /* r2 = ...9 */
 2798         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
 2799         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2800         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
 2801         mov     r1, r1, lsl #8          /* r1 = ..0. */
 2802         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
 2803 #else
 2804         mov     r2, r2, lsl #24         /* r2 = 9... */
 2805         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2806         mov     r3, r3, lsl #24         /* r3 = 5... */
 2807         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2808         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2809 #endif
 2810         str     r2, [r0, #0x06]
 2811         str     r3, [r0, #0x02]
 2812         strh    r1, [r0]
 2813         RET
 2814         LMEMCPY_C_PAD
 2815 
 2816 /*
 2817  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2818  */
 2819         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2820         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2821         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2822 #ifdef __ARMEB__
 2823         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2824         strb    r3, [r0]
 2825         mov     r2, r2, lsl #8          /* r2 = 123. */
 2826         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
 2827         str     r2, [r0, #0x01]
 2828         mov     r2, ip, lsl #8          /* r2 = 567. */
 2829         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
 2830         str     r2, [r0, #0x05]
 2831         mov     r2, r1, lsr #8          /* r2 = ..9A */
 2832         strh    r2, [r0, #0x09]
 2833         strb    r1, [r0, #0x0b]
 2834 #else
 2835         strb    r2, [r0]
 2836         mov     r3, r2, lsr #8          /* r3 = .321 */
 2837         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2838         str     r3, [r0, #0x01]
 2839         mov     r3, ip, lsr #8          /* r3 = .765 */
 2840         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2841         str     r3, [r0, #0x05]
 2842         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2843         strh    r1, [r0, #0x09]
 2844         mov     r1, r1, lsr #16         /* r1 = ...B */
 2845         strb    r1, [r0, #0x0b]
 2846 #endif
 2847         RET
 2848         LMEMCPY_C_PAD
 2849 
 2850 /*
 2851  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2852  */
 2853         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2854         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2855         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2856         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2857         strb    r2, [r0, #0x0b]
 2858 #ifdef __ARMEB__
 2859         strh    r3, [r0, #0x09]
 2860         mov     r3, r3, lsr #16         /* r3 = ..78 */
 2861         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
 2862         mov     ip, ip, lsr #16         /* ip = ..34 */
 2863         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
 2864         mov     r1, r1, lsr #16         /* r1 = ..x0 */
 2865 #else
 2866         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2867         strh    r2, [r0, #0x09]
 2868         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2869         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2870         mov     ip, ip, lsl #16         /* ip = 43.. */
 2871         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2872         mov     r1, r1, lsr #8          /* r1 = .210 */
 2873 #endif
 2874         str     r3, [r0, #0x05]
 2875         str     ip, [r0, #0x01]
 2876         strb    r1, [r0]
 2877         RET
 2878         LMEMCPY_C_PAD
 2879 
 2880 /*
 2881  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2882  */
 2883 #ifdef __ARMEB__
 2884         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
 2885         ldr     ip, [r1, #0x06]         /* ip = 6789 */
 2886         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
 2887         ldrh    r1, [r1]                /* r1 = ..01 */
 2888         strb    r2, [r0, #0x0b]
 2889         mov     r2, r2, lsr #8          /* r2 = ...A */
 2890         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
 2891         mov     ip, ip, lsr #8          /* ip = .678 */
 2892         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
 2893         mov     r3, r3, lsr #8          /* r3 = .234 */
 2894         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
 2895         mov     r1, r1, lsr #8          /* r1 = ...0 */
 2896         strb    r1, [r0]
 2897         str     r3, [r0, #0x01]
 2898         str     ip, [r0, #0x05]
 2899         strh    r2, [r0, #0x09]
 2900 #else
 2901         ldrh    r2, [r1]                /* r2 = ..10 */
 2902         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2903         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2904         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2905         strb    r2, [r0]
 2906         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2907         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2908         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2909         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2910         mov     ip, ip, lsr #24         /* ip = ...9 */
 2911         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2912         mov     r1, r1, lsr #8          /* r1 = ...B */
 2913         str     r2, [r0, #0x01]
 2914         str     r3, [r0, #0x05]
 2915         strh    ip, [r0, #0x09]
 2916         strb    r1, [r0, #0x0b]
 2917 #endif
 2918         RET
 2919         LMEMCPY_C_PAD
 2920 
 2921 /*
 2922  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2923  */
 2924         ldrb    r2, [r1]
 2925         ldr     r3, [r1, #0x01]
 2926         ldr     ip, [r1, #0x05]
 2927         strb    r2, [r0]
 2928         ldrh    r2, [r1, #0x09]
 2929         ldrb    r1, [r1, #0x0b]
 2930         str     r3, [r0, #0x01]
 2931         str     ip, [r0, #0x05]
 2932         strh    r2, [r0, #0x09]
 2933         strb    r1, [r0, #0x0b]
 2934         RET
 2935 #endif /* _ARM_ARCH_5E */
 2936 
 2937 #ifdef GPROF
 2938 
 2939 ENTRY(user)
 2940         nop
 2941 ENTRY(btrap)
 2942         nop
 2943 ENTRY(etrap)
 2944         nop
 2945 ENTRY(bintr)
 2946         nop
 2947 ENTRY(eintr)
 2948         nop
 2949 
 2950 #endif
Cache object: c57a09b203bf749691ffe98d08926ecc
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S