support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  *
   76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   86  * POSSIBILITY OF SUCH DAMAGE.
   87  */
   88 
   89 #include <machine/asm.h>
   90 __FBSDID("$FreeBSD$");
   91 
   92 #include "assym.inc"
   93 
   94         .syntax unified
   95 
   96 .L_arm_memcpy:
   97         .word   _C_LABEL(_arm_memcpy)
   98 .L_arm_bzero:
   99         .word   _C_LABEL(_arm_bzero)
  100 .L_min_memcpy_size:
  101         .word   _C_LABEL(_min_memcpy_size)
  102 .L_min_bzero_size:
  103         .word   _C_LABEL(_min_bzero_size)
  104 /*
  105  * memset: Sets a block of memory to the specified value
  106  *
  107  * On entry:
  108  *   r0 - dest address
  109  *   r1 - byte to write
  110  *   r2 - number of bytes to write
  111  *
  112  * On exit:
  113  *   r0 - dest address
  114  */
  115 /* LINTSTUB: Func: void bzero(void *, size_t) */
  116 ENTRY(bzero)
  117         ldr     r3, .L_arm_bzero
  118         ldr     r3, [r3]
  119         cmp     r3, #0
  120         beq     .Lnormal0
  121         ldr     r2, .L_min_bzero_size
  122         ldr     r2, [r2]
  123         cmp     r1, r2
  124         blt     .Lnormal0
  125         stmfd   sp!, {r0, r1, lr}
  126         mov     r2, #0
  127         mov     lr, pc
  128         mov     pc, r3
  129         cmp     r0, #0
  130         ldmfd   sp!, {r0, r1, lr}
  131         RETeq
  132 .Lnormal0:
  133         mov     r3, #0x00
  134         b       do_memset
  135 END(bzero)
  136 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  137 ENTRY(memset)
  138         and     r3, r1, #0xff           /* We deal with bytes */
  139         mov     r1, r2
  140 do_memset:
  141         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  142         mov     ip, r0
  143         blt     .Lmemset_lessthanfour
  144 
  145         /* Ok first we will word align the address */
  146         ands    r2, ip, #0x03           /* Get the bottom two bits */
  147         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  148 
  149         /* We are now word aligned */
  150 .Lmemset_wordaligned:
  151         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  152         tst     ip, #0x04               /* Quad-align for armv5e */
  153         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  154         subne   r1, r1, #0x04           /* Quad-align if necessary */
  155         strne   r3, [ip], #0x04
  156         cmp     r1, #0x10
  157         blt     .Lmemset_loop4          /* If less than 16 then use words */
  158         mov     r2, r3                  /* Duplicate data */
  159         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  160         blt     .Lmemset_loop32
  161 
  162         /* Do 128 bytes at a time */
  163 .Lmemset_loop128:
  164         subs    r1, r1, #0x80
  165         strdge  r2, [ip], #0x08
  166         strdge  r2, [ip], #0x08
  167         strdge  r2, [ip], #0x08
  168         strdge  r2, [ip], #0x08
  169         strdge  r2, [ip], #0x08
  170         strdge  r2, [ip], #0x08
  171         strdge  r2, [ip], #0x08
  172         strdge  r2, [ip], #0x08
  173         strdge  r2, [ip], #0x08
  174         strdge  r2, [ip], #0x08
  175         strdge  r2, [ip], #0x08
  176         strdge  r2, [ip], #0x08
  177         strdge  r2, [ip], #0x08
  178         strdge  r2, [ip], #0x08
  179         strdge  r2, [ip], #0x08
  180         strdge  r2, [ip], #0x08
  181         bgt     .Lmemset_loop128
  182         RETeq                   /* Zero length so just exit */
  183 
  184         add     r1, r1, #0x80           /* Adjust for extra sub */
  185 
  186         /* Do 32 bytes at a time */
  187 .Lmemset_loop32:
  188         subs    r1, r1, #0x20
  189         strdge  r2, [ip], #0x08
  190         strdge  r2, [ip], #0x08
  191         strdge  r2, [ip], #0x08
  192         strdge  r2, [ip], #0x08
  193         bgt     .Lmemset_loop32
  194         RETeq                   /* Zero length so just exit */
  195 
  196         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  197 
  198         /* Deal with 16 bytes or more */
  199         strdge  r2, [ip], #0x08
  200         strdge  r2, [ip], #0x08
  201         RETeq                   /* Zero length so just exit */
  202 
  203         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  204 
  205         /* We have at least 4 bytes so copy as words */
  206 .Lmemset_loop4:
  207         subs    r1, r1, #0x04
  208         strge   r3, [ip], #0x04
  209         bgt     .Lmemset_loop4
  210         RETeq                   /* Zero length so just exit */
  211 
  212         /* Compensate for 64-bit alignment check */
  213         adds    r1, r1, #0x04
  214         RETeq
  215         cmp     r1, #2
  216 
  217         strb    r3, [ip], #0x01         /* Set 1 byte */
  218         strbge  r3, [ip], #0x01         /* Set another byte */
  219         strbgt  r3, [ip]                /* and a third */
  220         RET                     /* Exit */
  221 
  222 .Lmemset_wordunaligned:
  223         rsb     r2, r2, #0x004
  224         strb    r3, [ip], #0x01         /* Set 1 byte */
  225         cmp     r2, #0x02
  226         strbge  r3, [ip], #0x01         /* Set another byte */
  227         sub     r1, r1, r2
  228         strbgt  r3, [ip], #0x01         /* and a third */
  229         cmp     r1, #0x04               /* More than 4 bytes left? */
  230         bge     .Lmemset_wordaligned    /* Yup */
  231 
  232 .Lmemset_lessthanfour:
  233         cmp     r1, #0x00
  234         RETeq                   /* Zero length so exit */
  235         strb    r3, [ip], #0x01         /* Set 1 byte */
  236         cmp     r1, #0x02
  237         strbge  r3, [ip], #0x01         /* Set another byte */
  238         strbgt  r3, [ip]                /* and a third */
  239         RET                     /* Exit */
  240 EEND(memset)
  241 END(bzero)
  242 
  243 ENTRY(bcmp)
  244         mov     ip, r0
  245         cmp     r2, #0x06
  246         beq     .Lmemcmp_6bytes
  247         mov     r0, #0x00
  248 
  249         /* Are both addresses aligned the same way? */
  250         cmp     r2, #0x00
  251         eorsne  r3, ip, r1
  252         RETeq                   /* len == 0, or same addresses! */
  253         tst     r3, #0x03
  254         subne   r2, r2, #0x01
  255         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  256 
  257         /* Word-align the addresses, if necessary */
  258         sub     r3, r1, #0x05
  259         ands    r3, r3, #0x03
  260         add     r3, r3, r3, lsl #1
  261         addne   pc, pc, r3, lsl #3
  262         nop
  263 
  264         /* Compare up to 3 bytes */
  265         ldrb    r0, [ip], #0x01
  266         ldrb    r3, [r1], #0x01
  267         subs    r0, r0, r3
  268         RETne
  269         subs    r2, r2, #0x01
  270         RETeq
  271 
  272         /* Compare up to 2 bytes */
  273         ldrb    r0, [ip], #0x01
  274         ldrb    r3, [r1], #0x01
  275         subs    r0, r0, r3
  276         RETne
  277         subs    r2, r2, #0x01
  278         RETeq
  279 
  280         /* Compare 1 byte */
  281         ldrb    r0, [ip], #0x01
  282         ldrb    r3, [r1], #0x01
  283         subs    r0, r0, r3
  284         RETne
  285         subs    r2, r2, #0x01
  286         RETeq
  287 
  288         /* Compare 4 bytes at a time, if possible */
  289         subs    r2, r2, #0x04
  290         bcc     .Lmemcmp_bytewise
  291 .Lmemcmp_word_aligned:
  292         ldr     r0, [ip], #0x04
  293         ldr     r3, [r1], #0x04
  294         subs    r2, r2, #0x04
  295         cmpcs   r0, r3
  296         beq     .Lmemcmp_word_aligned
  297         sub     r0, r0, r3
  298 
  299         /* Correct for extra subtraction, and check if done */
  300         adds    r2, r2, #0x04
  301         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  302         RETeq                   /* Yup. Just return */
  303 
  304         /* Re-do the final word byte-wise */
  305         sub     ip, ip, #0x04
  306         sub     r1, r1, #0x04
  307 
  308 .Lmemcmp_bytewise:
  309         add     r2, r2, #0x03
  310 .Lmemcmp_bytewise2:
  311         ldrb    r0, [ip], #0x01
  312         ldrb    r3, [r1], #0x01
  313         subs    r2, r2, #0x01
  314         cmpcs   r0, r3
  315         beq     .Lmemcmp_bytewise2
  316         sub     r0, r0, r3
  317         RET
  318 
  319         /*
  320          * 6 byte compares are very common, thanks to the network stack.
  321          * This code is hand-scheduled to reduce the number of stalls for
  322          * load results. Everything else being equal, this will be ~32%
  323          * faster than a byte-wise memcmp.
  324          */
  325         .align  5
  326 .Lmemcmp_6bytes:
  327         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  328         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  329         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  330         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  331         ldrbeq  r3, [ip, #0x01]         /* r3 = b1#1 */
  332         RETne                   /* Return if mismatch on #0 */
  333         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  334         ldrbeq  r3, [r1, #0x02]         /* r3 = b2#2 */
  335         ldrbeq  r0, [ip, #0x02]         /* r0 = b1#2 */
  336         RETne                   /* Return if mismatch on #1 */
  337         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  338         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  339         ldrbeq  r3, [ip, #0x03]         /* r3 = b1#3 */
  340         RETne                   /* Return if mismatch on #2 */
  341         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  342         ldrbeq  r3, [r1, #0x04]         /* r3 = b2#4 */
  343         ldrbeq  r0, [ip, #0x04]         /* r0 = b1#4 */
  344         RETne                   /* Return if mismatch on #3 */
  345         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  346         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  347         ldrbeq  r3, [ip, #0x05]         /* r3 = b1#5 */
  348         RETne                   /* Return if mismatch on #4 */
  349         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  350         RET
  351 END(bcmp)
  352 
  353 ENTRY(bcopy)
  354         /* switch the source and destination registers */
  355         eor     r0, r1, r0
  356         eor     r1, r0, r1
  357         eor     r0, r1, r0
  358 EENTRY(memmove)
  359         /* Do the buffers overlap? */
  360         cmp     r0, r1
  361         RETeq           /* Bail now if src/dst are the same */
  362         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  363         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  364         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  365         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  366 
  367         /* Determine copy direction */
  368         cmp     r1, r0
  369         bcc     .Lmemmove_backwards
  370 
  371         moveq   r0, #0                  /* Quick abort for len=0 */
  372         RETeq
  373 
  374         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  375         subs    r2, r2, #4
  376         blt     .Lmemmove_fl4           /* less than 4 bytes */
  377         ands    r12, r0, #3
  378         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  379         ands    r12, r1, #3
  380         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  381 
  382 .Lmemmove_ft8:
  383         /* We have aligned source and destination */
  384         subs    r2, r2, #8
  385         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  386         subs    r2, r2, #0x14
  387         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  388         stmdb   sp!, {r4}               /* borrow r4 */
  389 
  390         /* blat 32 bytes at a time */
  391         /* XXX for really big copies perhaps we should use more registers */
  392 .Lmemmove_floop32:
  393         ldmia   r1!, {r3, r4, r12, lr}
  394         stmia   r0!, {r3, r4, r12, lr}
  395         ldmia   r1!, {r3, r4, r12, lr}
  396         stmia   r0!, {r3, r4, r12, lr}
  397         subs    r2, r2, #0x20
  398         bge     .Lmemmove_floop32
  399 
  400         cmn     r2, #0x10
  401         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  402         stmiage r0!, {r3, r4, r12, lr}
  403         subge   r2, r2, #0x10
  404         ldmia   sp!, {r4}               /* return r4 */
  405 
  406 .Lmemmove_fl32:
  407         adds    r2, r2, #0x14
  408 
  409         /* blat 12 bytes at a time */
  410 .Lmemmove_floop12:
  411         ldmiage r1!, {r3, r12, lr}
  412         stmiage r0!, {r3, r12, lr}
  413         subsge  r2, r2, #0x0c
  414         bge     .Lmemmove_floop12
  415 
  416 .Lmemmove_fl12:
  417         adds    r2, r2, #8
  418         blt     .Lmemmove_fl4
  419 
  420         subs    r2, r2, #4
  421         ldrlt   r3, [r1], #4
  422         strlt   r3, [r0], #4
  423         ldmiage r1!, {r3, r12}
  424         stmiage r0!, {r3, r12}
  425         subge   r2, r2, #4
  426 
  427 .Lmemmove_fl4:
  428         /* less than 4 bytes to go */
  429         adds    r2, r2, #4
  430         ldmiaeq sp!, {r0, pc}           /* done */
  431 
  432         /* copy the crud byte at a time */
  433         cmp     r2, #2
  434         ldrb    r3, [r1], #1
  435         strb    r3, [r0], #1
  436         ldrbge  r3, [r1], #1
  437         strbge  r3, [r0], #1
  438         ldrbgt  r3, [r1], #1
  439         strbgt  r3, [r0], #1
  440         ldmia   sp!, {r0, pc}
  441 
  442         /* erg - unaligned destination */
  443 .Lmemmove_fdestul:
  444         rsb     r12, r12, #4
  445         cmp     r12, #2
  446 
  447         /* align destination with byte copies */
  448         ldrb    r3, [r1], #1
  449         strb    r3, [r0], #1
  450         ldrbge  r3, [r1], #1
  451         strbge  r3, [r0], #1
  452         ldrbgt  r3, [r1], #1
  453         strbgt  r3, [r0], #1
  454         subs    r2, r2, r12
  455         blt     .Lmemmove_fl4           /* less the 4 bytes */
  456 
  457         ands    r12, r1, #3
  458         beq     .Lmemmove_ft8           /* we have an aligned source */
  459 
  460         /* erg - unaligned source */
  461         /* This is where it gets nasty ... */
  462 .Lmemmove_fsrcul:
  463         bic     r1, r1, #3
  464         ldr     lr, [r1], #4
  465         cmp     r12, #2
  466         bgt     .Lmemmove_fsrcul3
  467         beq     .Lmemmove_fsrcul2
  468         cmp     r2, #0x0c
  469         blt     .Lmemmove_fsrcul1loop4
  470         sub     r2, r2, #0x0c
  471         stmdb   sp!, {r4, r5}
  472 
  473 .Lmemmove_fsrcul1loop16:
  474         mov     r3, lr, lsr #8
  475         ldmia   r1!, {r4, r5, r12, lr}
  476         orr     r3, r3, r4, lsl #24
  477         mov     r4, r4, lsr #8
  478         orr     r4, r4, r5, lsl #24
  479         mov     r5, r5, lsr #8
  480         orr     r5, r5, r12, lsl #24
  481         mov     r12, r12, lsr #8
  482         orr     r12, r12, lr, lsl #24
  483         stmia   r0!, {r3-r5, r12}
  484         subs    r2, r2, #0x10
  485         bge     .Lmemmove_fsrcul1loop16
  486         ldmia   sp!, {r4, r5}
  487         adds    r2, r2, #0x0c
  488         blt     .Lmemmove_fsrcul1l4
  489 
  490 .Lmemmove_fsrcul1loop4:
  491         mov     r12, lr, lsr #8
  492         ldr     lr, [r1], #4
  493         orr     r12, r12, lr, lsl #24
  494         str     r12, [r0], #4
  495         subs    r2, r2, #4
  496         bge     .Lmemmove_fsrcul1loop4
  497 
  498 .Lmemmove_fsrcul1l4:
  499         sub     r1, r1, #3
  500         b       .Lmemmove_fl4
  501 
  502 .Lmemmove_fsrcul2:
  503         cmp     r2, #0x0c
  504         blt     .Lmemmove_fsrcul2loop4
  505         sub     r2, r2, #0x0c
  506         stmdb   sp!, {r4, r5}
  507 
  508 .Lmemmove_fsrcul2loop16:
  509         mov     r3, lr, lsr #16
  510         ldmia   r1!, {r4, r5, r12, lr}
  511         orr     r3, r3, r4, lsl #16
  512         mov     r4, r4, lsr #16
  513         orr     r4, r4, r5, lsl #16
  514         mov     r5, r5, lsr #16
  515         orr     r5, r5, r12, lsl #16
  516         mov     r12, r12, lsr #16
  517         orr     r12, r12, lr, lsl #16
  518         stmia   r0!, {r3-r5, r12}
  519         subs    r2, r2, #0x10
  520         bge     .Lmemmove_fsrcul2loop16
  521         ldmia   sp!, {r4, r5}
  522         adds    r2, r2, #0x0c
  523         blt     .Lmemmove_fsrcul2l4
  524 
  525 .Lmemmove_fsrcul2loop4:
  526         mov     r12, lr, lsr #16
  527         ldr     lr, [r1], #4
  528         orr     r12, r12, lr, lsl #16
  529         str     r12, [r0], #4
  530         subs    r2, r2, #4
  531         bge     .Lmemmove_fsrcul2loop4
  532 
  533 .Lmemmove_fsrcul2l4:
  534         sub     r1, r1, #2
  535         b       .Lmemmove_fl4
  536 
  537 .Lmemmove_fsrcul3:
  538         cmp     r2, #0x0c
  539         blt     .Lmemmove_fsrcul3loop4
  540         sub     r2, r2, #0x0c
  541         stmdb   sp!, {r4, r5}
  542 
  543 .Lmemmove_fsrcul3loop16:
  544         mov     r3, lr, lsr #24
  545         ldmia   r1!, {r4, r5, r12, lr}
  546         orr     r3, r3, r4, lsl #8
  547         mov     r4, r4, lsr #24
  548         orr     r4, r4, r5, lsl #8
  549         mov     r5, r5, lsr #24
  550         orr     r5, r5, r12, lsl #8
  551         mov     r12, r12, lsr #24
  552         orr     r12, r12, lr, lsl #8
  553         stmia   r0!, {r3-r5, r12}
  554         subs    r2, r2, #0x10
  555         bge     .Lmemmove_fsrcul3loop16
  556         ldmia   sp!, {r4, r5}
  557         adds    r2, r2, #0x0c
  558         blt     .Lmemmove_fsrcul3l4
  559 
  560 .Lmemmove_fsrcul3loop4:
  561         mov     r12, lr, lsr #24
  562         ldr     lr, [r1], #4
  563         orr     r12, r12, lr, lsl #8
  564         str     r12, [r0], #4
  565         subs    r2, r2, #4
  566         bge     .Lmemmove_fsrcul3loop4
  567 
  568 .Lmemmove_fsrcul3l4:
  569         sub     r1, r1, #1
  570         b       .Lmemmove_fl4
  571 
  572 .Lmemmove_backwards:
  573         add     r1, r1, r2
  574         add     r0, r0, r2
  575         subs    r2, r2, #4
  576         blt     .Lmemmove_bl4           /* less than 4 bytes */
  577         ands    r12, r0, #3
  578         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  579         ands    r12, r1, #3
  580         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  581 
  582 .Lmemmove_bt8:
  583         /* We have aligned source and destination */
  584         subs    r2, r2, #8
  585         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  586         stmdb   sp!, {r4, lr}
  587         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  588         blt     .Lmemmove_bl32
  589 
  590         /* blat 32 bytes at a time */
  591         /* XXX for really big copies perhaps we should use more registers */
  592 .Lmemmove_bloop32:
  593         ldmdb   r1!, {r3, r4, r12, lr}
  594         stmdb   r0!, {r3, r4, r12, lr}
  595         ldmdb   r1!, {r3, r4, r12, lr}
  596         stmdb   r0!, {r3, r4, r12, lr}
  597         subs    r2, r2, #0x20
  598         bge     .Lmemmove_bloop32
  599 
  600 .Lmemmove_bl32:
  601         cmn     r2, #0x10
  602         ldmdbge r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  603         stmdbge r0!, {r3, r4, r12, lr}
  604         subge   r2, r2, #0x10
  605         adds    r2, r2, #0x14
  606         ldmdbge r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  607         stmdbge r0!, {r3, r12, lr}
  608         subge   r2, r2, #0x0c
  609         ldmia   sp!, {r4, lr}
  610 
  611 .Lmemmove_bl12:
  612         adds    r2, r2, #8
  613         blt     .Lmemmove_bl4
  614         subs    r2, r2, #4
  615         ldrlt   r3, [r1, #-4]!
  616         strlt   r3, [r0, #-4]!
  617         ldmdbge r1!, {r3, r12}
  618         stmdbge r0!, {r3, r12}
  619         subge   r2, r2, #4
  620 
  621 .Lmemmove_bl4:
  622         /* less than 4 bytes to go */
  623         adds    r2, r2, #4
  624         RETeq                   /* done */
  625 
  626         /* copy the crud byte at a time */
  627         cmp     r2, #2
  628         ldrb    r3, [r1, #-1]!
  629         strb    r3, [r0, #-1]!
  630         ldrbge  r3, [r1, #-1]!
  631         strbge  r3, [r0, #-1]!
  632         ldrbgt  r3, [r1, #-1]!
  633         strbgt  r3, [r0, #-1]!
  634         RET
  635 
  636         /* erg - unaligned destination */
  637 .Lmemmove_bdestul:
  638         cmp     r12, #2
  639 
  640         /* align destination with byte copies */
  641         ldrb    r3, [r1, #-1]!
  642         strb    r3, [r0, #-1]!
  643         ldrbge  r3, [r1, #-1]!
  644         strbge  r3, [r0, #-1]!
  645         ldrbgt  r3, [r1, #-1]!
  646         strbgt  r3, [r0, #-1]!
  647         subs    r2, r2, r12
  648         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  649         ands    r12, r1, #3
  650         beq     .Lmemmove_bt8           /* we have an aligned source */
  651 
  652         /* erg - unaligned source */
  653         /* This is where it gets nasty ... */
  654 .Lmemmove_bsrcul:
  655         bic     r1, r1, #3
  656         ldr     r3, [r1, #0]
  657         cmp     r12, #2
  658         blt     .Lmemmove_bsrcul1
  659         beq     .Lmemmove_bsrcul2
  660         cmp     r2, #0x0c
  661         blt     .Lmemmove_bsrcul3loop4
  662         sub     r2, r2, #0x0c
  663         stmdb   sp!, {r4, r5, lr}
  664 
  665 .Lmemmove_bsrcul3loop16:
  666         mov     lr, r3, lsl #8
  667         ldmdb   r1!, {r3-r5, r12}
  668         orr     lr, lr, r12, lsr #24
  669         mov     r12, r12, lsl #8
  670         orr     r12, r12, r5, lsr #24
  671         mov     r5, r5, lsl #8
  672         orr     r5, r5, r4, lsr #24
  673         mov     r4, r4, lsl #8
  674         orr     r4, r4, r3, lsr #24
  675         stmdb   r0!, {r4, r5, r12, lr}
  676         subs    r2, r2, #0x10
  677         bge     .Lmemmove_bsrcul3loop16
  678         ldmia   sp!, {r4, r5, lr}
  679         adds    r2, r2, #0x0c
  680         blt     .Lmemmove_bsrcul3l4
  681 
  682 .Lmemmove_bsrcul3loop4:
  683         mov     r12, r3, lsl #8
  684         ldr     r3, [r1, #-4]!
  685         orr     r12, r12, r3, lsr #24
  686         str     r12, [r0, #-4]!
  687         subs    r2, r2, #4
  688         bge     .Lmemmove_bsrcul3loop4
  689 
  690 .Lmemmove_bsrcul3l4:
  691         add     r1, r1, #3
  692         b       .Lmemmove_bl4
  693 
  694 .Lmemmove_bsrcul2:
  695         cmp     r2, #0x0c
  696         blt     .Lmemmove_bsrcul2loop4
  697         sub     r2, r2, #0x0c
  698         stmdb   sp!, {r4, r5, lr}
  699 
  700 .Lmemmove_bsrcul2loop16:
  701         mov     lr, r3, lsl #16
  702         ldmdb   r1!, {r3-r5, r12}
  703         orr     lr, lr, r12, lsr #16
  704         mov     r12, r12, lsl #16
  705         orr     r12, r12, r5, lsr #16
  706         mov     r5, r5, lsl #16
  707         orr     r5, r5, r4, lsr #16
  708         mov     r4, r4, lsl #16
  709         orr     r4, r4, r3, lsr #16
  710         stmdb   r0!, {r4, r5, r12, lr}
  711         subs    r2, r2, #0x10
  712         bge     .Lmemmove_bsrcul2loop16
  713         ldmia   sp!, {r4, r5, lr}
  714         adds    r2, r2, #0x0c
  715         blt     .Lmemmove_bsrcul2l4
  716 
  717 .Lmemmove_bsrcul2loop4:
  718         mov     r12, r3, lsl #16
  719         ldr     r3, [r1, #-4]!
  720         orr     r12, r12, r3, lsr #16
  721         str     r12, [r0, #-4]!
  722         subs    r2, r2, #4
  723         bge     .Lmemmove_bsrcul2loop4
  724 
  725 .Lmemmove_bsrcul2l4:
  726         add     r1, r1, #2
  727         b       .Lmemmove_bl4
  728 
  729 .Lmemmove_bsrcul1:
  730         cmp     r2, #0x0c
  731         blt     .Lmemmove_bsrcul1loop4
  732         sub     r2, r2, #0x0c
  733         stmdb   sp!, {r4, r5, lr}
  734 
  735 .Lmemmove_bsrcul1loop32:
  736         mov     lr, r3, lsl #24
  737         ldmdb   r1!, {r3-r5, r12}
  738         orr     lr, lr, r12, lsr #8
  739         mov     r12, r12, lsl #24
  740         orr     r12, r12, r5, lsr #8
  741         mov     r5, r5, lsl #24
  742         orr     r5, r5, r4, lsr #8
  743         mov     r4, r4, lsl #24
  744         orr     r4, r4, r3, lsr #8
  745         stmdb   r0!, {r4, r5, r12, lr}
  746         subs    r2, r2, #0x10
  747         bge     .Lmemmove_bsrcul1loop32
  748         ldmia   sp!, {r4, r5, lr}
  749         adds    r2, r2, #0x0c
  750         blt     .Lmemmove_bsrcul1l4
  751 
  752 .Lmemmove_bsrcul1loop4:
  753         mov     r12, r3, lsl #24
  754         ldr     r3, [r1, #-4]!
  755         orr     r12, r12, r3, lsr #8
  756         str     r12, [r0, #-4]!
  757         subs    r2, r2, #4
  758         bge     .Lmemmove_bsrcul1loop4
  759 
  760 .Lmemmove_bsrcul1l4:
  761         add     r1, r1, #1
  762         b       .Lmemmove_bl4
  763 EEND(memmove)
  764 END(bcopy)
  765 
  766 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
  767 ENTRY(memcpy)
  768         pld     [r1]
  769         cmp     r2, #0x0c
  770         ble     .Lmemcpy_short          /* <= 12 bytes */
  771 #ifdef FLASHADDR
  772 #if FLASHADDR > PHYSADDR
  773         ldr     r3, =FLASHADDR
  774         cmp     r3, pc
  775         bls     .Lnormal
  776 #else
  777         ldr     r3, =FLASHADDR
  778         cmp     r3, pc
  779         bhi     .Lnormal
  780 #endif
  781 #endif
  782         ldr     r3, .L_arm_memcpy
  783         ldr     r3, [r3]
  784         cmp     r3, #0
  785         beq     .Lnormal
  786         ldr     r3, .L_min_memcpy_size
  787         ldr     r3, [r3]
  788         cmp     r2, r3
  789         blt     .Lnormal
  790         stmfd   sp!, {r0-r2, r4, lr}
  791         mov     r3, #0
  792         ldr     r4, .L_arm_memcpy
  793         mov     lr, pc
  794         ldr     pc, [r4]
  795         cmp     r0, #0
  796         ldmfd   sp!, {r0-r2, r4, lr}
  797         RETeq
  798 .Lnormal:
  799         mov     r3, r0                  /* We must not clobber r0 */
  800 
  801         /* Word-align the destination buffer */
  802         ands    ip, r3, #0x03           /* Already word aligned? */
  803         beq     .Lmemcpy_wordaligned    /* Yup */
  804         cmp     ip, #0x02
  805         ldrb    ip, [r1], #0x01
  806         sub     r2, r2, #0x01
  807         strb    ip, [r3], #0x01
  808         ldrble  ip, [r1], #0x01
  809         suble   r2, r2, #0x01
  810         strble  ip, [r3], #0x01
  811         ldrblt  ip, [r1], #0x01
  812         sublt   r2, r2, #0x01
  813         strblt  ip, [r3], #0x01
  814 
  815         /* Destination buffer is now word aligned */
  816 .Lmemcpy_wordaligned:
  817         ands    ip, r1, #0x03           /* Is src also word-aligned? */
  818         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
  819 
  820         /* Quad-align the destination buffer */
  821         tst     r3, #0x07               /* Already quad aligned? */
  822         ldrne   ip, [r1], #0x04
  823         stmfd   sp!, {r4-r9}            /* Free up some registers */
  824         subne   r2, r2, #0x04
  825         strne   ip, [r3], #0x04
  826 
  827         /* Destination buffer quad aligned, source is at least word aligned */
  828         subs    r2, r2, #0x80
  829         blt     .Lmemcpy_w_lessthan128
  830 
  831         /* Copy 128 bytes at a time */
  832 .Lmemcpy_w_loop128:
  833         ldr     r4, [r1], #0x04         /* LD:00-03 */
  834         ldr     r5, [r1], #0x04         /* LD:04-07 */
  835         pld     [r1, #0x18]             /* Prefetch 0x20 */
  836         ldr     r6, [r1], #0x04         /* LD:08-0b */
  837         ldr     r7, [r1], #0x04         /* LD:0c-0f */
  838         ldr     r8, [r1], #0x04         /* LD:10-13 */
  839         ldr     r9, [r1], #0x04         /* LD:14-17 */
  840         strd    r4, [r3], #0x08         /* ST:00-07 */
  841         ldr     r4, [r1], #0x04         /* LD:18-1b */
  842         ldr     r5, [r1], #0x04         /* LD:1c-1f */
  843         strd    r6, [r3], #0x08         /* ST:08-0f */
  844         ldr     r6, [r1], #0x04         /* LD:20-23 */
  845         ldr     r7, [r1], #0x04         /* LD:24-27 */
  846         pld     [r1, #0x18]             /* Prefetch 0x40 */
  847         strd    r8, [r3], #0x08         /* ST:10-17 */
  848         ldr     r8, [r1], #0x04         /* LD:28-2b */
  849         ldr     r9, [r1], #0x04         /* LD:2c-2f */
  850         strd    r4, [r3], #0x08         /* ST:18-1f */
  851         ldr     r4, [r1], #0x04         /* LD:30-33 */
  852         ldr     r5, [r1], #0x04         /* LD:34-37 */
  853         strd    r6, [r3], #0x08         /* ST:20-27 */
  854         ldr     r6, [r1], #0x04         /* LD:38-3b */
  855         ldr     r7, [r1], #0x04         /* LD:3c-3f */
  856         strd    r8, [r3], #0x08         /* ST:28-2f */
  857         ldr     r8, [r1], #0x04         /* LD:40-43 */
  858         ldr     r9, [r1], #0x04         /* LD:44-47 */
  859         pld     [r1, #0x18]             /* Prefetch 0x60 */
  860         strd    r4, [r3], #0x08         /* ST:30-37 */
  861         ldr     r4, [r1], #0x04         /* LD:48-4b */
  862         ldr     r5, [r1], #0x04         /* LD:4c-4f */
  863         strd    r6, [r3], #0x08         /* ST:38-3f */
  864         ldr     r6, [r1], #0x04         /* LD:50-53 */
  865         ldr     r7, [r1], #0x04         /* LD:54-57 */
  866         strd    r8, [r3], #0x08         /* ST:40-47 */
  867         ldr     r8, [r1], #0x04         /* LD:58-5b */
  868         ldr     r9, [r1], #0x04         /* LD:5c-5f */
  869         strd    r4, [r3], #0x08         /* ST:48-4f */
  870         ldr     r4, [r1], #0x04         /* LD:60-63 */
  871         ldr     r5, [r1], #0x04         /* LD:64-67 */
  872         pld     [r1, #0x18]             /* Prefetch 0x80 */
  873         strd    r6, [r3], #0x08         /* ST:50-57 */
  874         ldr     r6, [r1], #0x04         /* LD:68-6b */
  875         ldr     r7, [r1], #0x04         /* LD:6c-6f */
  876         strd    r8, [r3], #0x08         /* ST:58-5f */
  877         ldr     r8, [r1], #0x04         /* LD:70-73 */
  878         ldr     r9, [r1], #0x04         /* LD:74-77 */
  879         strd    r4, [r3], #0x08         /* ST:60-67 */
  880         ldr     r4, [r1], #0x04         /* LD:78-7b */
  881         ldr     r5, [r1], #0x04         /* LD:7c-7f */
  882         strd    r6, [r3], #0x08         /* ST:68-6f */
  883         strd    r8, [r3], #0x08         /* ST:70-77 */
  884         subs    r2, r2, #0x80
  885         strd    r4, [r3], #0x08         /* ST:78-7f */
  886         bge     .Lmemcpy_w_loop128
  887 
  888 .Lmemcpy_w_lessthan128:
  889         adds    r2, r2, #0x80           /* Adjust for extra sub */
  890         ldmfdeq sp!, {r4-r9}
  891         RETeq                   /* Return now if done */
  892         subs    r2, r2, #0x20
  893         blt     .Lmemcpy_w_lessthan32
  894 
  895         /* Copy 32 bytes at a time */
  896 .Lmemcpy_w_loop32:
  897         ldr     r4, [r1], #0x04
  898         ldr     r5, [r1], #0x04
  899         pld     [r1, #0x18]
  900         ldr     r6, [r1], #0x04
  901         ldr     r7, [r1], #0x04
  902         ldr     r8, [r1], #0x04
  903         ldr     r9, [r1], #0x04
  904         strd    r4, [r3], #0x08
  905         ldr     r4, [r1], #0x04
  906         ldr     r5, [r1], #0x04
  907         strd    r6, [r3], #0x08
  908         strd    r8, [r3], #0x08
  909         subs    r2, r2, #0x20
  910         strd    r4, [r3], #0x08
  911         bge     .Lmemcpy_w_loop32
  912 
  913 .Lmemcpy_w_lessthan32:
  914         adds    r2, r2, #0x20           /* Adjust for extra sub */
  915         ldmfdeq sp!, {r4-r9}
  916         RETeq                   /* Return now if done */
  917 
  918         and     r4, r2, #0x18
  919         rsbs    r4, r4, #0x18
  920         addne   pc, pc, r4, lsl #1
  921         nop
  922 
  923         /* At least 24 bytes remaining */
  924         ldr     r4, [r1], #0x04
  925         ldr     r5, [r1], #0x04
  926         sub     r2, r2, #0x08
  927         strd    r4, [r3], #0x08
  928 
  929         /* At least 16 bytes remaining */
  930         ldr     r4, [r1], #0x04
  931         ldr     r5, [r1], #0x04
  932         sub     r2, r2, #0x08
  933         strd    r4, [r3], #0x08
  934 
  935         /* At least 8 bytes remaining */
  936         ldr     r4, [r1], #0x04
  937         ldr     r5, [r1], #0x04
  938         subs    r2, r2, #0x08
  939         strd    r4, [r3], #0x08
  940 
  941         /* Less than 8 bytes remaining */
  942         ldmfd   sp!, {r4-r9}
  943         RETeq                   /* Return now if done */
  944         subs    r2, r2, #0x04
  945         ldrge   ip, [r1], #0x04
  946         strge   ip, [r3], #0x04
  947         RETeq                   /* Return now if done */
  948         addlt   r2, r2, #0x04
  949         ldrb    ip, [r1], #0x01
  950         cmp     r2, #0x02
  951         ldrbge  r2, [r1], #0x01
  952         strb    ip, [r3], #0x01
  953         ldrbgt  ip, [r1]
  954         strbge  r2, [r3], #0x01
  955         strbgt  ip, [r3]
  956         RET
  957 /* Place a literal pool here for the above ldr instructions to use */
  958 .ltorg
  959 
  960 
  961 /*
  962  * At this point, it has not been possible to word align both buffers.
  963  * The destination buffer is word aligned, but the source buffer is not.
  964  */
  965 .Lmemcpy_bad_align:
  966         stmfd   sp!, {r4-r7}
  967         bic     r1, r1, #0x03
  968         cmp     ip, #2
  969         ldr     ip, [r1], #0x04
  970         bgt     .Lmemcpy_bad3
  971         beq     .Lmemcpy_bad2
  972         b       .Lmemcpy_bad1
  973 
  974 .Lmemcpy_bad1_loop16:
  975         mov     r4, ip, lsr #8
  976         ldr     r5, [r1], #0x04
  977         pld     [r1, #0x018]
  978         ldr     r6, [r1], #0x04
  979         ldr     r7, [r1], #0x04
  980         ldr     ip, [r1], #0x04
  981         orr     r4, r4, r5, lsl #24
  982         mov     r5, r5, lsr #8
  983         orr     r5, r5, r6, lsl #24
  984         mov     r6, r6, lsr #8
  985         orr     r6, r6, r7, lsl #24
  986         mov     r7, r7, lsr #8
  987         orr     r7, r7, ip, lsl #24
  988         str     r4, [r3], #0x04
  989         str     r5, [r3], #0x04
  990         str     r6, [r3], #0x04
  991         str     r7, [r3], #0x04
  992 .Lmemcpy_bad1:
  993         subs    r2, r2, #0x10
  994         bge     .Lmemcpy_bad1_loop16
  995 
  996         adds    r2, r2, #0x10
  997         ldmfdeq sp!, {r4-r7}
  998         RETeq                   /* Return now if done */
  999         subs    r2, r2, #0x04
 1000         sublt   r1, r1, #0x03
 1001         blt     .Lmemcpy_bad_done
 1002 
 1003 .Lmemcpy_bad1_loop4:
 1004         mov     r4, ip, lsr #8
 1005         ldr     ip, [r1], #0x04
 1006         subs    r2, r2, #0x04
 1007         orr     r4, r4, ip, lsl #24
 1008         str     r4, [r3], #0x04
 1009         bge     .Lmemcpy_bad1_loop4
 1010         sub     r1, r1, #0x03
 1011         b       .Lmemcpy_bad_done
 1012 
 1013 .Lmemcpy_bad2_loop16:
 1014         mov     r4, ip, lsr #16
 1015         ldr     r5, [r1], #0x04
 1016         pld     [r1, #0x018]
 1017         ldr     r6, [r1], #0x04
 1018         ldr     r7, [r1], #0x04
 1019         ldr     ip, [r1], #0x04
 1020         orr     r4, r4, r5, lsl #16
 1021         mov     r5, r5, lsr #16
 1022         orr     r5, r5, r6, lsl #16
 1023         mov     r6, r6, lsr #16
 1024         orr     r6, r6, r7, lsl #16
 1025         mov     r7, r7, lsr #16
 1026         orr     r7, r7, ip, lsl #16
 1027         str     r4, [r3], #0x04
 1028         str     r5, [r3], #0x04
 1029         str     r6, [r3], #0x04
 1030         str     r7, [r3], #0x04
 1031 .Lmemcpy_bad2:
 1032         subs    r2, r2, #0x10
 1033         bge     .Lmemcpy_bad2_loop16
 1034 
 1035         adds    r2, r2, #0x10
 1036         ldmfdeq sp!, {r4-r7}
 1037         RETeq                   /* Return now if done */
 1038         subs    r2, r2, #0x04
 1039         sublt   r1, r1, #0x02
 1040         blt     .Lmemcpy_bad_done
 1041 
 1042 .Lmemcpy_bad2_loop4:
 1043         mov     r4, ip, lsr #16
 1044         ldr     ip, [r1], #0x04
 1045         subs    r2, r2, #0x04
 1046         orr     r4, r4, ip, lsl #16
 1047         str     r4, [r3], #0x04
 1048         bge     .Lmemcpy_bad2_loop4
 1049         sub     r1, r1, #0x02
 1050         b       .Lmemcpy_bad_done
 1051 
 1052 .Lmemcpy_bad3_loop16:
 1053         mov     r4, ip, lsr #24
 1054         ldr     r5, [r1], #0x04
 1055         pld     [r1, #0x018]
 1056         ldr     r6, [r1], #0x04
 1057         ldr     r7, [r1], #0x04
 1058         ldr     ip, [r1], #0x04
 1059         orr     r4, r4, r5, lsl #8
 1060         mov     r5, r5, lsr #24
 1061         orr     r5, r5, r6, lsl #8
 1062         mov     r6, r6, lsr #24
 1063         orr     r6, r6, r7, lsl #8
 1064         mov     r7, r7, lsr #24
 1065         orr     r7, r7, ip, lsl #8
 1066         str     r4, [r3], #0x04
 1067         str     r5, [r3], #0x04
 1068         str     r6, [r3], #0x04
 1069         str     r7, [r3], #0x04
 1070 .Lmemcpy_bad3:
 1071         subs    r2, r2, #0x10
 1072         bge     .Lmemcpy_bad3_loop16
 1073 
 1074         adds    r2, r2, #0x10
 1075         ldmfdeq sp!, {r4-r7}
 1076         RETeq                   /* Return now if done */
 1077         subs    r2, r2, #0x04
 1078         sublt   r1, r1, #0x01
 1079         blt     .Lmemcpy_bad_done
 1080 
 1081 .Lmemcpy_bad3_loop4:
 1082         mov     r4, ip, lsr #24
 1083         ldr     ip, [r1], #0x04
 1084         subs    r2, r2, #0x04
 1085         orr     r4, r4, ip, lsl #8
 1086         str     r4, [r3], #0x04
 1087         bge     .Lmemcpy_bad3_loop4
 1088         sub     r1, r1, #0x01
 1089 
 1090 .Lmemcpy_bad_done:
 1091         ldmfd   sp!, {r4-r7}
 1092         adds    r2, r2, #0x04
 1093         RETeq
 1094         ldrb    ip, [r1], #0x01
 1095         cmp     r2, #0x02
 1096         ldrbge  r2, [r1], #0x01
 1097         strb    ip, [r3], #0x01
 1098         ldrbgt  ip, [r1]
 1099         strbge  r2, [r3], #0x01
 1100         strbgt  ip, [r3]
 1101         RET
 1102 
 1103 
 1104 /*
 1105  * Handle short copies (less than 16 bytes), possibly misaligned.
 1106  * Some of these are *very* common, thanks to the network stack,
 1107  * and so are handled specially.
 1108  */
 1109 .Lmemcpy_short:
 1110         add     pc, pc, r2, lsl #2
 1111         nop
 1112         RET                     /* 0x00 */
 1113         b       .Lmemcpy_bytewise       /* 0x01 */
 1114         b       .Lmemcpy_bytewise       /* 0x02 */
 1115         b       .Lmemcpy_bytewise       /* 0x03 */
 1116         b       .Lmemcpy_4              /* 0x04 */
 1117         b       .Lmemcpy_bytewise       /* 0x05 */
 1118         b       .Lmemcpy_6              /* 0x06 */
 1119         b       .Lmemcpy_bytewise       /* 0x07 */
 1120         b       .Lmemcpy_8              /* 0x08 */
 1121         b       .Lmemcpy_bytewise       /* 0x09 */
 1122         b       .Lmemcpy_bytewise       /* 0x0a */
 1123         b       .Lmemcpy_bytewise       /* 0x0b */
 1124         b       .Lmemcpy_c              /* 0x0c */
 1125 .Lmemcpy_bytewise:
 1126         mov     r3, r0                  /* We must not clobber r0 */
 1127         ldrb    ip, [r1], #0x01
 1128 1:      subs    r2, r2, #0x01
 1129         strb    ip, [r3], #0x01
 1130         ldrbne  ip, [r1], #0x01
 1131         bne     1b
 1132         RET
 1133 
 1134 /******************************************************************************
 1135  * Special case for 4 byte copies
 1136  */
 1137 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1138 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1139         LMEMCPY_4_PAD
 1140 .Lmemcpy_4:
 1141         and     r2, r1, #0x03
 1142         orr     r2, r2, r0, lsl #2
 1143         ands    r2, r2, #0x0f
 1144         sub     r3, pc, #0x14
 1145         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1146 
 1147 /*
 1148  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1149  */
 1150         ldr     r2, [r1]
 1151         str     r2, [r0]
 1152         RET
 1153         LMEMCPY_4_PAD
 1154 
 1155 /*
 1156  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1157  */
 1158         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1159         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1160         mov     r3, r3, lsr #8          /* r3 = .210 */
 1161         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1162         str     r3, [r0]
 1163         RET
 1164         LMEMCPY_4_PAD
 1165 
 1166 /*
 1167  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1168  */
 1169         ldrh    r3, [r1, #0x02]
 1170         ldrh    r2, [r1]
 1171         orr     r3, r2, r3, lsl #16
 1172         str     r3, [r0]
 1173         RET
 1174         LMEMCPY_4_PAD
 1175 
 1176 /*
 1177  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1178  */
 1179         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1180         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1181         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1182         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1183         str     r3, [r0]
 1184         RET
 1185         LMEMCPY_4_PAD
 1186 
 1187 /*
 1188  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1189  */
 1190         ldr     r2, [r1]
 1191         strb    r2, [r0]
 1192         mov     r3, r2, lsr #8
 1193         mov     r1, r2, lsr #24
 1194         strb    r1, [r0, #0x03]
 1195         strh    r3, [r0, #0x01]
 1196         RET
 1197         LMEMCPY_4_PAD
 1198 
 1199 /*
 1200  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1201  */
 1202         ldrb    r2, [r1]
 1203         ldrh    r3, [r1, #0x01]
 1204         ldrb    r1, [r1, #0x03]
 1205         strb    r2, [r0]
 1206         strh    r3, [r0, #0x01]
 1207         strb    r1, [r0, #0x03]
 1208         RET
 1209         LMEMCPY_4_PAD
 1210 
 1211 /*
 1212  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1213  */
 1214         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1215         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1216         strb    r2, [r0]
 1217         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1218         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1219         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1220         strh    r2, [r0, #0x01]
 1221         strb    r3, [r0, #0x03]
 1222         RET
 1223         LMEMCPY_4_PAD
 1224 
 1225 /*
 1226  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1227  */
 1228         ldrb    r2, [r1]
 1229         ldrh    r3, [r1, #0x01]
 1230         ldrb    r1, [r1, #0x03]
 1231         strb    r2, [r0]
 1232         strh    r3, [r0, #0x01]
 1233         strb    r1, [r0, #0x03]
 1234         RET
 1235         LMEMCPY_4_PAD
 1236 
 1237 /*
 1238  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1239  */
 1240         ldr     r2, [r1]
 1241         strh    r2, [r0]
 1242         mov     r3, r2, lsr #16
 1243         strh    r3, [r0, #0x02]
 1244         RET
 1245         LMEMCPY_4_PAD
 1246 
 1247 /*
 1248  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1249  */
 1250         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1251         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1252         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1253         strh    r1, [r0]
 1254         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1255         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1256         strh    r2, [r0, #0x02]
 1257         RET
 1258         LMEMCPY_4_PAD
 1259 
 1260 /*
 1261  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1262  */
 1263         ldrh    r2, [r1]
 1264         ldrh    r3, [r1, #0x02]
 1265         strh    r2, [r0]
 1266         strh    r3, [r0, #0x02]
 1267         RET
 1268         LMEMCPY_4_PAD
 1269 
 1270 /*
 1271  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1272  */
 1273         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1274         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1275         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1276         strh    r1, [r0, #0x02]
 1277         mov     r3, r3, lsl #8          /* r3 = 321. */
 1278         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1279         strh    r3, [r0]
 1280         RET
 1281         LMEMCPY_4_PAD
 1282 
 1283 /*
 1284  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1285  */
 1286         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1287         strb    r2, [r0]
 1288         mov     r3, r2, lsr #8
 1289         mov     r1, r2, lsr #24
 1290         strh    r3, [r0, #0x01]
 1291         strb    r1, [r0, #0x03]
 1292         RET
 1293         LMEMCPY_4_PAD
 1294 
 1295 /*
 1296  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1297  */
 1298         ldrb    r2, [r1]
 1299         ldrh    r3, [r1, #0x01]
 1300         ldrb    r1, [r1, #0x03]
 1301         strb    r2, [r0]
 1302         strh    r3, [r0, #0x01]
 1303         strb    r1, [r0, #0x03]
 1304         RET
 1305         LMEMCPY_4_PAD
 1306 
 1307 /*
 1308  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1309  */
 1310         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1311         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1312         strb    r2, [r0]
 1313         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1314         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1315         strh    r2, [r0, #0x01]
 1316         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1317         strb    r3, [r0, #0x03]
 1318         RET
 1319         LMEMCPY_4_PAD
 1320 
 1321 /*
 1322  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1323  */
 1324         ldrb    r2, [r1]
 1325         ldrh    r3, [r1, #0x01]
 1326         ldrb    r1, [r1, #0x03]
 1327         strb    r2, [r0]
 1328         strh    r3, [r0, #0x01]
 1329         strb    r1, [r0, #0x03]
 1330         RET
 1331         LMEMCPY_4_PAD
 1332 
 1333 
 1334 /******************************************************************************
 1335  * Special case for 6 byte copies
 1336  */
 1337 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1338 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1339         LMEMCPY_6_PAD
 1340 .Lmemcpy_6:
 1341         and     r2, r1, #0x03
 1342         orr     r2, r2, r0, lsl #2
 1343         ands    r2, r2, #0x0f
 1344         sub     r3, pc, #0x14
 1345         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1346 
 1347 /*
 1348  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1349  */
 1350         ldr     r2, [r1]
 1351         ldrh    r3, [r1, #0x04]
 1352         str     r2, [r0]
 1353         strh    r3, [r0, #0x04]
 1354         RET
 1355         LMEMCPY_6_PAD
 1356 
 1357 /*
 1358  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1359  */
 1360         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1361         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1362         mov     r2, r2, lsr #8          /* r2 = .210 */
 1363         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1364         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1365         str     r2, [r0]
 1366         strh    r3, [r0, #0x04]
 1367         RET
 1368         LMEMCPY_6_PAD
 1369 
 1370 /*
 1371  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1372  */
 1373         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1374         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1375         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1376         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1377         str     r2, [r0]
 1378         strh    r1, [r0, #0x04]
 1379         RET
 1380         LMEMCPY_6_PAD
 1381 
 1382 /*
 1383  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1384  */
 1385         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1386         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1387         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1388         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1389         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1390         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1391         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1392         str     r2, [r0]
 1393         strh    r1, [r0, #0x04]
 1394         RET
 1395         LMEMCPY_6_PAD
 1396 
 1397 /*
 1398  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1399  */
 1400         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1401         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1402         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1403         strh    r1, [r0, #0x01]
 1404         strb    r3, [r0]
 1405         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1406         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1407         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1408         strh    r3, [r0, #0x03]
 1409         strb    r2, [r0, #0x05]
 1410         RET
 1411         LMEMCPY_6_PAD
 1412 
 1413 /*
 1414  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1415  */
 1416         ldrb    r2, [r1]
 1417         ldrh    r3, [r1, #0x01]
 1418         ldrh    ip, [r1, #0x03]
 1419         ldrb    r1, [r1, #0x05]
 1420         strb    r2, [r0]
 1421         strh    r3, [r0, #0x01]
 1422         strh    ip, [r0, #0x03]
 1423         strb    r1, [r0, #0x05]
 1424         RET
 1425         LMEMCPY_6_PAD
 1426 
 1427 /*
 1428  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1429  */
 1430         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1431         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1432         strb    r2, [r0]
 1433         mov     r3, r1, lsr #24
 1434         strb    r3, [r0, #0x05]
 1435         mov     r3, r1, lsr #8          /* r3 = .543 */
 1436         strh    r3, [r0, #0x03]
 1437         mov     r3, r2, lsr #8          /* r3 = ...1 */
 1438         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 1439         strh    r3, [r0, #0x01]
 1440         RET
 1441         LMEMCPY_6_PAD
 1442 
 1443 /*
 1444  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1445  */
 1446         ldrb    r2, [r1]
 1447         ldrh    r3, [r1, #0x01]
 1448         ldrh    ip, [r1, #0x03]
 1449         ldrb    r1, [r1, #0x05]
 1450         strb    r2, [r0]
 1451         strh    r3, [r0, #0x01]
 1452         strh    ip, [r0, #0x03]
 1453         strb    r1, [r0, #0x05]
 1454         RET
 1455         LMEMCPY_6_PAD
 1456 
 1457 /*
 1458  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1459  */
 1460         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 1461         ldr     r3, [r1]                /* r3 = 3210 */
 1462         mov     r2, r2, lsl #16         /* r2 = 54.. */
 1463         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 1464         strh    r3, [r0]
 1465         str     r2, [r0, #0x02]
 1466         RET
 1467         LMEMCPY_6_PAD
 1468 
 1469 /*
 1470  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1471  */
 1472         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1473         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 1474         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1475         mov     r2, r2, lsl #8          /* r2 = 543. */
 1476         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 1477         strh    r1, [r0]
 1478         str     r2, [r0, #0x02]
 1479         RET
 1480         LMEMCPY_6_PAD
 1481 
 1482 /*
 1483  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1484  */
 1485         ldrh    r2, [r1]
 1486         ldr     r3, [r1, #0x02]
 1487         strh    r2, [r0]
 1488         str     r3, [r0, #0x02]
 1489         RET
 1490         LMEMCPY_6_PAD
 1491 
 1492 /*
 1493  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1494  */
 1495         ldrb    r3, [r1]                /* r3 = ...0 */
 1496         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1497         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 1498         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1499         mov     r1, r1, lsl #24         /* r1 = 5... */
 1500         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 1501         strh    r3, [r0]
 1502         str     r1, [r0, #0x02]
 1503         RET
 1504         LMEMCPY_6_PAD
 1505 
 1506 /*
 1507  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1508  */
 1509         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1510         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 1511         strb    r2, [r0]
 1512         mov     r2, r2, lsr #8          /* r2 = .321 */
 1513         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 1514         mov     r1, r1, lsr #8          /* r1 = ...5 */
 1515         str     r2, [r0, #0x01]
 1516         strb    r1, [r0, #0x05]
 1517         RET
 1518         LMEMCPY_6_PAD
 1519 
 1520 /*
 1521  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1522  */
 1523         ldrb    r2, [r1]
 1524         ldrh    r3, [r1, #0x01]
 1525         ldrh    ip, [r1, #0x03]
 1526         ldrb    r1, [r1, #0x05]
 1527         strb    r2, [r0]
 1528         strh    r3, [r0, #0x01]
 1529         strh    ip, [r0, #0x03]
 1530         strb    r1, [r0, #0x05]
 1531         RET
 1532         LMEMCPY_6_PAD
 1533 
 1534 /*
 1535  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1536  */
 1537         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1538         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1539         strb    r2, [r0]
 1540         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1541         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 1542         mov     r1, r1, lsr #24         /* r1 = ...5 */
 1543         str     r2, [r0, #0x01]
 1544         strb    r1, [r0, #0x05]
 1545         RET
 1546         LMEMCPY_6_PAD
 1547 
 1548 /*
 1549  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1550  */
 1551         ldrb    r2, [r1]
 1552         ldr     r3, [r1, #0x01]
 1553         ldrb    r1, [r1, #0x05]
 1554         strb    r2, [r0]
 1555         str     r3, [r0, #0x01]
 1556         strb    r1, [r0, #0x05]
 1557         RET
 1558         LMEMCPY_6_PAD
 1559 
 1560 
 1561 /******************************************************************************
 1562  * Special case for 8 byte copies
 1563  */
 1564 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 1565 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 1566         LMEMCPY_8_PAD
 1567 .Lmemcpy_8:
 1568         and     r2, r1, #0x03
 1569         orr     r2, r2, r0, lsl #2
 1570         ands    r2, r2, #0x0f
 1571         sub     r3, pc, #0x14
 1572         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 1573 
 1574 /*
 1575  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1576  */
 1577         ldr     r2, [r1]
 1578         ldr     r3, [r1, #0x04]
 1579         str     r2, [r0]
 1580         str     r3, [r0, #0x04]
 1581         RET
 1582         LMEMCPY_8_PAD
 1583 
 1584 /*
 1585  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1586  */
 1587         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1588         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 1589         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1590         mov     r3, r3, lsr #8          /* r3 = .210 */
 1591         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1592         mov     r1, r1, lsl #24         /* r1 = 7... */
 1593         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 1594         str     r3, [r0]
 1595         str     r2, [r0, #0x04]
 1596         RET
 1597         LMEMCPY_8_PAD
 1598 
 1599 /*
 1600  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1601  */
 1602         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1603         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1604         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1605         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1606         mov     r3, r3, lsr #16         /* r3 = ..54 */
 1607         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 1608         str     r2, [r0]
 1609         str     r3, [r0, #0x04]
 1610         RET
 1611         LMEMCPY_8_PAD
 1612 
 1613 /*
 1614  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1615  */
 1616         ldrb    r3, [r1]                /* r3 = ...0 */
 1617         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1618         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 1619         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1620         mov     r2, r2, lsr #24         /* r2 = ...4 */
 1621         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 1622         str     r3, [r0]
 1623         str     r2, [r0, #0x04]
 1624         RET
 1625         LMEMCPY_8_PAD
 1626 
 1627 /*
 1628  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1629  */
 1630         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1631         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 1632         strb    r3, [r0]
 1633         mov     r1, r2, lsr #24         /* r1 = ...7 */
 1634         strb    r1, [r0, #0x07]
 1635         mov     r1, r3, lsr #8          /* r1 = .321 */
 1636         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1637         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 1638         strh    r1, [r0, #0x01]
 1639         str     r3, [r0, #0x03]
 1640         RET
 1641         LMEMCPY_8_PAD
 1642 
 1643 /*
 1644  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1645  */
 1646         ldrb    r2, [r1]
 1647         ldrh    r3, [r1, #0x01]
 1648         ldr     ip, [r1, #0x03]
 1649         ldrb    r1, [r1, #0x07]
 1650         strb    r2, [r0]
 1651         strh    r3, [r0, #0x01]
 1652         str     ip, [r0, #0x03]
 1653         strb    r1, [r0, #0x07]
 1654         RET
 1655         LMEMCPY_8_PAD
 1656 
 1657 /*
 1658  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1659  */
 1660         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1661         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1662         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1663         strb    r2, [r0]                /* 0 */
 1664         mov     ip, r1, lsr #8          /* ip = ...7 */
 1665         strb    ip, [r0, #0x07]         /* 7 */
 1666         mov     ip, r2, lsr #8          /* ip = ...1 */
 1667         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 1668         mov     r3, r3, lsr #8          /* r3 = .543 */
 1669         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 1670         strh    ip, [r0, #0x01]
 1671         str     r3, [r0, #0x03]
 1672         RET
 1673         LMEMCPY_8_PAD
 1674 
 1675 /*
 1676  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1677  */
 1678         ldrb    r3, [r1]                /* r3 = ...0 */
 1679         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 1680         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 1681         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1682         strb    r3, [r0]
 1683         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 1684         strh    ip, [r0, #0x01]
 1685         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 1686         str     r2, [r0, #0x03]
 1687         strb    r1, [r0, #0x07]
 1688         RET
 1689         LMEMCPY_8_PAD
 1690 
 1691 /*
 1692  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1693  */
 1694         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1695         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1696         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 1697         strh    r2, [r0]
 1698         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 1699         mov     r3, r3, lsr #16         /* r3 = ..76 */
 1700         str     r2, [r0, #0x02]
 1701         strh    r3, [r0, #0x06]
 1702         RET
 1703         LMEMCPY_8_PAD
 1704 
 1705 /*
 1706  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1707  */
 1708         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1709         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1710         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 1711         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1712         strh    r1, [r0]
 1713         mov     r1, r2, lsr #24         /* r1 = ...2 */
 1714         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 1715         mov     r3, r3, lsr #24         /* r3 = ...6 */
 1716         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 1717         str     r1, [r0, #0x02]
 1718         strh    r3, [r0, #0x06]
 1719         RET
 1720         LMEMCPY_8_PAD
 1721 
 1722 /*
 1723  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1724  */
 1725         ldrh    r2, [r1]
 1726         ldr     ip, [r1, #0x02]
 1727         ldrh    r3, [r1, #0x06]
 1728         strh    r2, [r0]
 1729         str     ip, [r0, #0x02]
 1730         strh    r3, [r0, #0x06]
 1731         RET
 1732         LMEMCPY_8_PAD
 1733 
 1734 /*
 1735  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1736  */
 1737         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 1738         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1739         ldrb    ip, [r1]                /* ip = ...0 */
 1740         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 1741         strh    r1, [r0, #0x06]
 1742         mov     r3, r3, lsl #24         /* r3 = 5... */
 1743         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 1744         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 1745         str     r3, [r0, #0x02]
 1746         strh    r2, [r0]
 1747         RET
 1748         LMEMCPY_8_PAD
 1749 
 1750 /*
 1751  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1752  */
 1753         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1754         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1755         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 1756         strh    r1, [r0, #0x05]
 1757         strb    r2, [r0]
 1758         mov     r1, r3, lsr #24         /* r1 = ...7 */
 1759         strb    r1, [r0, #0x07]
 1760         mov     r2, r2, lsr #8          /* r2 = .321 */
 1761         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 1762         str     r2, [r0, #0x01]
 1763         RET
 1764         LMEMCPY_8_PAD
 1765 
 1766 /*
 1767  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1768  */
 1769         ldrb    r3, [r1]                /* r3 = ...0 */
 1770         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 1771         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 1772         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1773         strb    r3, [r0]
 1774         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 1775         strh    r3, [r0, #0x05]
 1776         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 1777         str     r2, [r0, #0x01]
 1778         strb    r1, [r0, #0x07]
 1779         RET
 1780         LMEMCPY_8_PAD
 1781 
 1782 /*
 1783  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1784  */
 1785         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1786         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1787         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1788         strb    r2, [r0]
 1789         mov     ip, r2, lsr #8          /* ip = ...1 */
 1790         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 1791         mov     r2, r1, lsr #8          /* r2 = ...7 */
 1792         strb    r2, [r0, #0x07]
 1793         mov     r1, r1, lsl #8          /* r1 = .76. */
 1794         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 1795         str     ip, [r0, #0x01]
 1796         strh    r1, [r0, #0x05]
 1797         RET
 1798         LMEMCPY_8_PAD
 1799 
 1800 /*
 1801  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1802  */
 1803         ldrb    r2, [r1]
 1804         ldr     ip, [r1, #0x01]
 1805         ldrh    r3, [r1, #0x05]
 1806         ldrb    r1, [r1, #0x07]
 1807         strb    r2, [r0]
 1808         str     ip, [r0, #0x01]
 1809         strh    r3, [r0, #0x05]
 1810         strb    r1, [r0, #0x07]
 1811         RET
 1812         LMEMCPY_8_PAD
 1813 
 1814 /******************************************************************************
 1815  * Special case for 12 byte copies
 1816  */
 1817 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 1818 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 1819         LMEMCPY_C_PAD
 1820 .Lmemcpy_c:
 1821         and     r2, r1, #0x03
 1822         orr     r2, r2, r0, lsl #2
 1823         ands    r2, r2, #0x0f
 1824         sub     r3, pc, #0x14
 1825         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 1826 
 1827 /*
 1828  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1829  */
 1830         ldr     r2, [r1]
 1831         ldr     r3, [r1, #0x04]
 1832         ldr     r1, [r1, #0x08]
 1833         str     r2, [r0]
 1834         str     r3, [r0, #0x04]
 1835         str     r1, [r0, #0x08]
 1836         RET
 1837         LMEMCPY_C_PAD
 1838 
 1839 /*
 1840  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1841  */
 1842         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 1843         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 1844         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1845         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 1846         mov     r2, r2, lsl #24         /* r2 = B... */
 1847         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 1848         str     r2, [r0, #0x08]
 1849         mov     r2, ip, lsl #24         /* r2 = 7... */
 1850         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 1851         mov     r1, r1, lsr #8          /* r1 = .210 */
 1852         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 1853         str     r2, [r0, #0x04]
 1854         str     r1, [r0]
 1855         RET
 1856         LMEMCPY_C_PAD
 1857 
 1858 /*
 1859  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1860  */
 1861         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1862         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1863         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 1864         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 1865         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1866         str     r2, [r0]
 1867         mov     r3, r3, lsr #16         /* r3 = ..54 */
 1868         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 1869         mov     r1, r1, lsl #16         /* r1 = BA.. */
 1870         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 1871         str     r3, [r0, #0x04]
 1872         str     r1, [r0, #0x08]
 1873         RET
 1874         LMEMCPY_C_PAD
 1875 
 1876 /*
 1877  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1878  */
 1879         ldrb    r2, [r1]                /* r2 = ...0 */
 1880         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 1881         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 1882         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 1883         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1884         str     r2, [r0]
 1885         mov     r3, r3, lsr #24         /* r3 = ...4 */
 1886         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 1887         mov     r1, r1, lsl #8          /* r1 = BA9. */
 1888         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 1889         str     r3, [r0, #0x04]
 1890         str     r1, [r0, #0x08]
 1891         RET
 1892         LMEMCPY_C_PAD
 1893 
 1894 /*
 1895  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 1896  */
 1897         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1898         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1899         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 1900         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1901         strh    r1, [r0, #0x01]
 1902         strb    r2, [r0]
 1903         mov     r1, r2, lsr #24         /* r1 = ...3 */
 1904         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 1905         mov     r1, r3, lsr #24         /* r1 = ...7 */
 1906         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 1907         mov     ip, ip, lsr #24         /* ip = ...B */
 1908         str     r2, [r0, #0x03]
 1909         str     r1, [r0, #0x07]
 1910         strb    ip, [r0, #0x0b]
 1911         RET
 1912         LMEMCPY_C_PAD
 1913 
 1914 /*
 1915  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 1916  */
 1917         ldrb    r2, [r1]
 1918         ldrh    r3, [r1, #0x01]
 1919         ldr     ip, [r1, #0x03]
 1920         strb    r2, [r0]
 1921         ldr     r2, [r1, #0x07]
 1922         ldrb    r1, [r1, #0x0b]
 1923         strh    r3, [r0, #0x01]
 1924         str     ip, [r0, #0x03]
 1925         str     r2, [r0, #0x07]
 1926         strb    r1, [r0, #0x0b]
 1927         RET
 1928         LMEMCPY_C_PAD
 1929 
 1930 /*
 1931  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 1932  */
 1933         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1934         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1935         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 1936         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 1937         strb    r2, [r0]
 1938         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1939         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 1940         strh    r2, [r0, #0x01]
 1941         mov     r2, r3, lsr #8          /* r2 = .543 */
 1942         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 1943         mov     r2, ip, lsr #8          /* r2 = .987 */
 1944         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 1945         mov     r1, r1, lsr #8          /* r1 = ...B */
 1946         str     r3, [r0, #0x03]
 1947         str     r2, [r0, #0x07]
 1948         strb    r1, [r0, #0x0b]
 1949         RET
 1950         LMEMCPY_C_PAD
 1951 
 1952 /*
 1953  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 1954  */
 1955         ldrb    r2, [r1]
 1956         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 1957         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 1958         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 1959         strb    r2, [r0]
 1960         strh    r3, [r0, #0x01]
 1961         mov     r3, r3, lsr #16         /* r3 = ..43 */
 1962         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 1963         mov     ip, ip, lsr #16         /* ip = ..87 */
 1964         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 1965         mov     r1, r1, lsr #16         /* r1 = ..xB */
 1966         str     r3, [r0, #0x03]
 1967         str     ip, [r0, #0x07]
 1968         strb    r1, [r0, #0x0b]
 1969         RET
 1970         LMEMCPY_C_PAD
 1971 
 1972 /*
 1973  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1974  */
 1975         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 1976         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1977         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 1978         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 1979         strh    ip, [r0]
 1980         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 1981         mov     r3, r3, lsr #16         /* r3 = ..76 */
 1982         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 1983         mov     r2, r2, lsr #16         /* r2 = ..BA */
 1984         str     r1, [r0, #0x02]
 1985         str     r3, [r0, #0x06]
 1986         strh    r2, [r0, #0x0a]
 1987         RET
 1988         LMEMCPY_C_PAD
 1989 
 1990 /*
 1991  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 1992  */
 1993         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1994         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1995         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 1996         strh    ip, [r0]
 1997         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 1998         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 1999         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2000         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2001         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2002         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2003         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2004         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2005         str     r2, [r0, #0x02]
 2006         str     r3, [r0, #0x06]
 2007         strh    r1, [r0, #0x0a]
 2008         RET
 2009         LMEMCPY_C_PAD
 2010 
 2011 /*
 2012  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2013  */
 2014         ldrh    r2, [r1]
 2015         ldr     r3, [r1, #0x02]
 2016         ldr     ip, [r1, #0x06]
 2017         ldrh    r1, [r1, #0x0a]
 2018         strh    r2, [r0]
 2019         str     r3, [r0, #0x02]
 2020         str     ip, [r0, #0x06]
 2021         strh    r1, [r0, #0x0a]
 2022         RET
 2023         LMEMCPY_C_PAD
 2024 
 2025 /*
 2026  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2027  */
 2028         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2029         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2030         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2031         strh    ip, [r0, #0x0a]
 2032         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2033         ldrb    r1, [r1]                /* r1 = ...0 */
 2034         mov     r2, r2, lsl #24         /* r2 = 9... */
 2035         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2036         mov     r3, r3, lsl #24         /* r3 = 5... */
 2037         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2038         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2039         str     r2, [r0, #0x06]
 2040         str     r3, [r0, #0x02]
 2041         strh    r1, [r0]
 2042         RET
 2043         LMEMCPY_C_PAD
 2044 
 2045 /*
 2046  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2047  */
 2048         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2049         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2050         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2051         strb    r2, [r0]
 2052         mov     r3, r2, lsr #8          /* r3 = .321 */
 2053         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2054         str     r3, [r0, #0x01]
 2055         mov     r3, ip, lsr #8          /* r3 = .765 */
 2056         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2057         str     r3, [r0, #0x05]
 2058         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2059         strh    r1, [r0, #0x09]
 2060         mov     r1, r1, lsr #16         /* r1 = ...B */
 2061         strb    r1, [r0, #0x0b]
 2062         RET
 2063         LMEMCPY_C_PAD
 2064 
 2065 /*
 2066  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2067  */
 2068         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2069         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2070         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2071         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2072         strb    r2, [r0, #0x0b]
 2073         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2074         strh    r2, [r0, #0x09]
 2075         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2076         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2077         mov     ip, ip, lsl #16         /* ip = 43.. */
 2078         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2079         mov     r1, r1, lsr #8          /* r1 = .210 */
 2080         str     r3, [r0, #0x05]
 2081         str     ip, [r0, #0x01]
 2082         strb    r1, [r0]
 2083         RET
 2084         LMEMCPY_C_PAD
 2085 
 2086 /*
 2087  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2088  */
 2089         ldrh    r2, [r1]                /* r2 = ..10 */
 2090         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2091         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2092         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2093         strb    r2, [r0]
 2094         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2095         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2096         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2097         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2098         mov     ip, ip, lsr #24         /* ip = ...9 */
 2099         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2100         mov     r1, r1, lsr #8          /* r1 = ...B */
 2101         str     r2, [r0, #0x01]
 2102         str     r3, [r0, #0x05]
 2103         strh    ip, [r0, #0x09]
 2104         strb    r1, [r0, #0x0b]
 2105         RET
 2106         LMEMCPY_C_PAD
 2107 
 2108 /*
 2109  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2110  */
 2111         ldrb    r2, [r1]
 2112         ldr     r3, [r1, #0x01]
 2113         ldr     ip, [r1, #0x05]
 2114         strb    r2, [r0]
 2115         ldrh    r2, [r1, #0x09]
 2116         ldrb    r1, [r1, #0x0b]
 2117         str     r3, [r0, #0x01]
 2118         str     ip, [r0, #0x05]
 2119         strh    r2, [r0, #0x09]
 2120         strb    r1, [r0, #0x0b]
 2121         RET
 2122 END(memcpy)
Cache object: 8b03d036167bbfec4105c97f415d4e04
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S