The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arm64/arm64/memset.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* Copyright (c) 2012, Linaro Limited
    2    All rights reserved.
    3 
    4    Redistribution and use in source and binary forms, with or without
    5    modification, are permitted provided that the following conditions are met:
    6        * Redistributions of source code must retain the above copyright
    7          notice, this list of conditions and the following disclaimer.
    8        * Redistributions in binary form must reproduce the above copyright
    9          notice, this list of conditions and the following disclaimer in the
   10          documentation and/or other materials provided with the distribution.
   11        * Neither the name of the Linaro nor the
   12          names of its contributors may be used to endorse or promote products
   13          derived from this software without specific prior written permission.
   14 
   15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   16    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   19    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   20    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   21    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   25    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
   26 
   27 /* Assumptions:
   28  *
   29  * ARMv8-a, AArch64
   30  * Unaligned accesses
   31  *
   32  */
   33 
   34 #include <machine/asm.h>
   35 
   36 #define dstin           x0
   37 #define val             w1
   38 #define count           x2
   39 #define tmp1            x3
   40 #define tmp1w           w3
   41 #define tmp2            x4
   42 #define tmp2w           w4
   43 #define zva_len_x       x5
   44 #define zva_len         w5
   45 #define zva_bits_x      x6
   46 
   47 #define A_l             x7
   48 #define A_lw            w7
   49 #define dst             x8
   50 #define tmp3w           w9
   51 
   52 ENTRY(memset)
   53 
   54         mov     dst, dstin              /* Preserve return value.  */
   55         ands    A_lw, val, #255
   56 #ifndef DONT_USE_DC
   57         b.eq    .Lzero_mem
   58 #endif
   59         orr     A_lw, A_lw, A_lw, lsl #8
   60         orr     A_lw, A_lw, A_lw, lsl #16
   61         orr     A_l, A_l, A_l, lsl #32
   62 .Ltail_maybe_long:
   63         cmp     count, #64
   64         b.ge    .Lnot_short
   65 .Ltail_maybe_tiny:
   66         cmp     count, #15
   67         b.le    .Ltail15tiny
   68 .Ltail63:
   69         ands    tmp1, count, #0x30
   70         b.eq    .Ltail15
   71         add     dst, dst, tmp1
   72         cmp     tmp1w, #0x20
   73         b.eq    1f
   74         b.lt    2f
   75         stp     A_l, A_l, [dst, #-48]
   76 1:
   77         stp     A_l, A_l, [dst, #-32]
   78 2:
   79         stp     A_l, A_l, [dst, #-16]
   80 
   81 .Ltail15:
   82         and     count, count, #15
   83         add     dst, dst, count
   84         stp     A_l, A_l, [dst, #-16]   /* Repeat some/all of last store. */
   85         ret
   86 
   87 .Ltail15tiny:
   88         /* Set up to 15 bytes.  Does not assume earlier memory
   89            being set.  */
   90         tbz     count, #3, 1f
   91         str     A_l, [dst], #8
   92 1:
   93         tbz     count, #2, 1f
   94         str     A_lw, [dst], #4
   95 1:
   96         tbz     count, #1, 1f
   97         strh    A_lw, [dst], #2
   98 1:
   99         tbz     count, #0, 1f
  100         strb    A_lw, [dst]
  101 1:
  102         ret
  103 
  104         /* Critical loop.  Start at a new cache line boundary.  Assuming
  105          * 64 bytes per line, this ensures the entire loop is in one line.  */
  106         .p2align 6
  107 .Lnot_short:
  108         neg     tmp2, dst
  109         ands    tmp2, tmp2, #15
  110         b.eq    2f
  111         /* Bring DST to 128-bit (16-byte) alignment.  We know that there's
  112          * more than that to set, so we simply store 16 bytes and advance by
  113          * the amount required to reach alignment.  */
  114         sub     count, count, tmp2
  115         stp     A_l, A_l, [dst]
  116         add     dst, dst, tmp2
  117         /* There may be less than 63 bytes to go now.  */
  118         cmp     count, #63
  119         b.le    .Ltail63
  120 2:
  121         sub     dst, dst, #16           /* Pre-bias.  */
  122         sub     count, count, #64
  123 1:
  124         stp     A_l, A_l, [dst, #16]
  125         stp     A_l, A_l, [dst, #32]
  126         stp     A_l, A_l, [dst, #48]
  127         stp     A_l, A_l, [dst, #64]!
  128         subs    count, count, #64
  129         b.ge    1b
  130         tst     count, #0x3f
  131         add     dst, dst, #16
  132         b.ne    .Ltail63
  133         ret
  134 
  135         /* For zeroing memory, check to see if we can use the ZVA feature to
  136          * zero entire 'cache' lines.  */
  137 .Lzero_mem:
  138         mov     A_l, #0
  139         cmp     count, #63
  140         b.le    .Ltail_maybe_tiny
  141         neg     tmp2, dst
  142         ands    tmp2, tmp2, #15
  143         b.eq    1f
  144         sub     count, count, tmp2
  145         stp     A_l, A_l, [dst]
  146         add     dst, dst, tmp2
  147         cmp     count, #63
  148         b.le    .Ltail63
  149 1:
  150         /* For zeroing small amounts of memory, it's not worth setting up
  151          * the line-clear code.  */
  152         cmp     count, #128
  153         b.lt    .Lnot_short
  154 
  155         adrp    tmp2, dczva_line_size
  156         add     tmp2, tmp2, :lo12:dczva_line_size
  157         ldr     zva_len, [tmp2]
  158         cbz     zva_len, .Lnot_short
  159 
  160 .Lzero_by_line:
  161         /* Compute how far we need to go to become suitably aligned.  We're
  162          * already at quad-word alignment.  */
  163         cmp     count, zva_len_x
  164         b.lt    .Lnot_short             /* Not enough to reach alignment.  */
  165         sub     zva_bits_x, zva_len_x, #1
  166         neg     tmp2, dst
  167         ands    tmp2, tmp2, zva_bits_x
  168         b.eq    1f                      /* Already aligned.  */
  169         /* Not aligned, check that there's enough to copy after alignment.  */
  170         sub     tmp1, count, tmp2
  171         cmp     tmp1, #64
  172         ccmp    tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
  173         b.lt    .Lnot_short
  174         /* We know that there's at least 64 bytes to zero and that it's safe
  175          * to overrun by 64 bytes.  */
  176         mov     count, tmp1
  177 2:
  178         stp     A_l, A_l, [dst]
  179         stp     A_l, A_l, [dst, #16]
  180         stp     A_l, A_l, [dst, #32]
  181         subs    tmp2, tmp2, #64
  182         stp     A_l, A_l, [dst, #48]
  183         add     dst, dst, #64
  184         b.ge    2b
  185         /* We've overrun a bit, so adjust dst downwards.  */
  186         add     dst, dst, tmp2
  187 1:
  188         sub     count, count, zva_len_x
  189 3:
  190         dc      zva, dst
  191         add     dst, dst, zva_len_x
  192         subs    count, count, zva_len_x
  193         b.ge    3b
  194         ands    count, count, zva_bits_x
  195         b.ne    .Ltail_maybe_long
  196         ret
  197 END(memset)

Cache object: e3e4b8a553e6bc637eb231d4e93a9d71


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.