The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/osfmk/ppc/movc.s

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
    3  *
    4  * @APPLE_LICENSE_HEADER_START@
    5  * 
    6  * The contents of this file constitute Original Code as defined in and
    7  * are subject to the Apple Public Source License Version 1.1 (the
    8  * "License").  You may not use this file except in compliance with the
    9  * License.  Please obtain a copy of the License at
   10  * http://www.apple.com/publicsource and read it before using this file.
   11  * 
   12  * This Original Code and all software distributed under the License are
   13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
   17  * License for the specific language governing rights and limitations
   18  * under the License.
   19  * 
   20  * @APPLE_LICENSE_HEADER_END@
   21  */
   22 /*
   23  * @OSF_COPYRIGHT@
   24  */
   25 #include <debug.h>
   26 #include <ppc/asm.h>
   27 #include <ppc/proc_reg.h>
   28 #include <mach/ppc/vm_param.h>
   29 #include <assym.s>
   30 #include <sys/errno.h>
   31 
   32 #define INSTRUMENT 0
   33 
   34 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
   35 /*
   36  * void pmap_zero_page(vm_offset_t pa)
   37  *
   38  * Zero a page of physical memory.  This routine runs in 32 or 64-bit mode,
   39  * and handles 32 and 128-byte cache lines.
   40  */
   41 
   42 
   43                 .align  5
   44                 .globl  EXT(pmap_zero_page)
   45 
   46 LEXT(pmap_zero_page)
   47 
   48         mflr    r12                                                             // save return address
   49         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
   50         mtlr    r12                                                             // restore return address
   51         andi.   r9,r10,pf32Byte+pf128Byte               // r9 <- cache line size
   52 
   53         subfic  r4,r9,PPC_PGBYTES                               // r4 <- starting offset in page
   54                 
   55                 bt++    pf64Bitb,page0S4                                // Go do the big guys...
   56                 
   57                 slwi    r3,r3,12                                                // get page address from page num
   58                 b               page_zero_1                                             // Jump to line aligned loop...
   59 
   60         .align  5
   61 
   62                 nop
   63                 nop
   64                 nop
   65                 nop
   66                 nop
   67                 nop
   68                 nop
   69                 
   70 page0S4:
   71                 sldi    r3,r3,12                                                // get page address from page num
   72 
   73 page_zero_1:                                                                    // loop zeroing cache lines
   74         sub.    r5,r4,r9                                                // more to go?
   75         dcbz128 r3,r4                                                   // zero either 32 or 128 bytes
   76         sub             r4,r5,r9                                                // generate next offset
   77         dcbz128 r3,r5
   78         bne--   page_zero_1
   79         
   80         b               EXT(ml_restore)                                 // restore MSR and do the isync
   81 
   82 
   83 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
   84 /* void
   85  * phys_copy(src, dst, bytecount)
   86  *      addr64_t            src;
   87  *      addr64_t            dst;
   88  *      int             bytecount
   89  *
   90  * This routine will copy bytecount bytes from physical address src to physical
   91  * address dst.  It runs in 64-bit mode if necessary, but does not handle
   92  * overlap or make any attempt to be optimal.  Length must be a signed word.
   93  * Not performance critical.
   94  */
   95 
   96 
   97                 .align  5
   98                 .globl  EXT(phys_copy)
   99 
  100 LEXT(phys_copy)
  101 
  102                 rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
  103         mflr    r12                                                             // get return address
  104                 rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
  105                 rlwinm  r4,r5,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
  106         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
  107                 rlwimi  r4,r6,0,0,31                            ; Combine bottom of long long to full 64-bits
  108         mtlr    r12                                                             // restore return address
  109         subic.  r5,r7,4                                                 // a word to copy?
  110         b               phys_copy_2
  111         
  112                 .align  5
  113          
  114 phys_copy_1:                                                                    // loop copying words
  115         subic.  r5,r5,4                                                 // more to go?
  116         lwz             r0,0(r3)
  117         addi    r3,r3,4
  118         stw             r0,0(r4)
  119         addi    r4,r4,4
  120 phys_copy_2:
  121         bge             phys_copy_1
  122         addic.  r5,r5,4                                                 // restore count
  123         ble             phys_copy_4                                             // no more
  124         
  125                                                                                         // Loop is aligned here
  126         
  127 phys_copy_3:                                                                    // loop copying bytes
  128         subic.  r5,r5,1                                                 // more to go?
  129         lbz             r0,0(r3)
  130         addi    r3,r3,1
  131         stb             r0,0(r4)
  132         addi    r4,r4,1
  133         bgt             phys_copy_3
  134 phys_copy_4:        
  135         b               EXT(ml_restore)                                 // restore MSR and do the isync
  136 
  137 
  138 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  139 /* void
  140  * pmap_copy_page(src, dst)
  141  *      ppnum_t     src;
  142  *      ppnum_t     dst;
  143  *
  144  * This routine will copy the physical page src to physical page dst
  145  * 
  146  * This routine assumes that the src and dst are page numbers and that the
  147  * destination is cached.  It runs on 32 and 64 bit processors, with and
  148  * without altivec, and with 32 and 128 byte cache lines.
  149  * We also must assume that no-one will be executing within the destination
  150  * page, and that this will be used for paging.  Because this
  151  * is a common routine, we have tuned loops for each processor class.
  152  *
  153  */
  154 #define kSFSize (FM_SIZE+160)
  155 
  156 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
  157 
  158                 lis             r2,hi16(MASK(MSR_VEC))                  ; Get the vector flag
  159         mflr    r0                                                              // get return
  160                 ori             r2,r2,lo16(MASK(MSR_FP))                ; Add the FP flag
  161                 stw             r0,8(r1)                                                // save
  162         stwu    r1,-kSFSize(r1)                                 // set up a stack frame for VRs or FPRs
  163         mfmsr   r11                                                             // save MSR at entry
  164         mfsprg  r10,2                                                   // get feature flags
  165         andc    r11,r11,r2                                              // Clear out vec and fp
  166         ori             r2,r2,lo16(MASK(MSR_EE))                // Get EE on also
  167         andc    r2,r11,r2                                               // Clear out EE as well
  168         mtcrf   0x02,r10                                                // we need to test pf64Bit
  169         ori             r2,r2,MASK(MSR_FP)                              // must enable FP for G3...
  170         mtcrf   0x80,r10                                                // we need to test pfAltivec too
  171         oris    r2,r2,hi16(MASK(MSR_VEC))               // enable altivec for G4 (ignored if G3)
  172         mtmsr   r2                                                              // turn EE off, FP and VEC on
  173         isync
  174         bt++    pf64Bitb,pmap_copy_64                   // skip if 64-bit processor (only they take hint)
  175                 slwi    r3,r3,12                                                // get page address from page num
  176                 slwi    r4,r4,12                                                // get page address from page num
  177         rlwinm  r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1      // get ready to turn off DR
  178         bt              pfAltivecb,pmap_copy_g4                 // altivec but not 64-bit means G4
  179         
  180         
  181         // G3 -- copy using FPRs
  182         
  183         stfd    f0,FM_SIZE+0(r1)                                // save the 4 FPRs we use to copy
  184         stfd    f1,FM_SIZE+8(r1)
  185         li              r5,PPC_PGBYTES/32                               // count of cache lines in a page
  186         stfd    f2,FM_SIZE+16(r1)
  187         mtctr   r5
  188         stfd    f3,FM_SIZE+24(r1)
  189         mtmsr   r12                                                             // turn off DR after saving FPRs on stack
  190         isync
  191         
  192 pmap_g3_copy_loop:                                                              // loop over 32-byte cache lines
  193         dcbz    0,r4                                                    // avoid read of dest line
  194         lfd             f0,0(r3)
  195         lfd             f1,8(r3)
  196         lfd             f2,16(r3)
  197         lfd             f3,24(r3)
  198         addi    r3,r3,32
  199         stfd    f0,0(r4)
  200         stfd    f1,8(r4)
  201         stfd    f2,16(r4)
  202         stfd    f3,24(r4)
  203         dcbst   0,r4                                                    // flush dest line to RAM
  204         addi    r4,r4,32
  205         bdnz    pmap_g3_copy_loop
  206         
  207         sync                                                                    // wait for stores to take
  208         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
  209         li              r6,PPC_PGBYTES-32                               // point to last line in page
  210 pmap_g3_icache_flush:
  211         subic.  r5,r6,32                                                // more to go?
  212         icbi    r4,r6                                                   // flush another line in icache
  213         subi    r6,r5,32                                                // get offset to next line
  214         icbi    r4,r5
  215         bne             pmap_g3_icache_flush
  216         
  217         sync
  218         mtmsr   r2                                                              // turn DR back on
  219         isync
  220         lfd             f0,FM_SIZE+0(r1)                                // restore the FPRs
  221         lfd             f1,FM_SIZE+8(r1)
  222         lfd             f2,FM_SIZE+16(r1)
  223         lfd             f3,FM_SIZE+24(r1)        
  224         
  225         b               pmap_g4_restore                                 // restore MSR and done
  226 
  227         
  228         // G4 -- copy using VRs
  229 
  230 pmap_copy_g4:                                                                   // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
  231         la              r9,FM_SIZE+16(r1)                               // place where we save VRs to r9
  232         li              r5,16                                                   // load x-form offsets into r5-r9
  233         li              r6,32                                                   // another offset
  234         stvx    v0,0,r9                                                 // save some VRs so we can use to copy
  235         li              r7,48                                                   // another offset
  236         stvx    v1,r5,r9
  237         li              r0,PPC_PGBYTES/64                               // we loop over 64-byte chunks
  238         stvx    v2,r6,r9
  239         mtctr   r0
  240         li              r8,96                                                   // get look-ahead for touch
  241         stvx    v3,r7,r9
  242         li              r9,128
  243         mtmsr   r12                                                             // now we've saved VRs on stack, turn off DR
  244         isync                                                                   // wait for it to happen
  245         b               pmap_g4_copy_loop
  246         
  247         .align  5                                                               // align inner loops
  248 pmap_g4_copy_loop:                                                              // loop over 64-byte chunks
  249         dcbt    r3,r8                                                   // touch 3 lines ahead
  250         nop                                                                             // avoid a 17-word loop...
  251         dcbt    r3,r9                                                   // touch 4 lines ahead
  252         nop                                                                             // more padding
  253         dcba    0,r4                                                    // avoid pre-fetch of 1st dest line
  254         lvx             v0,0,r3                                                 // offset 0
  255         lvx             v1,r5,r3                                                // offset 16
  256         lvx             v2,r6,r3                                                // offset 32
  257         lvx             v3,r7,r3                                                // offset 48
  258         addi    r3,r3,64
  259         dcba    r6,r4                                                   // avoid pre-fetch of 2nd line
  260         stvx    v0,0,r4                                                 // offset 0
  261         stvx    v1,r5,r4                                                // offset 16
  262         stvx    v2,r6,r4                                                // offset 32
  263         stvx    v3,r7,r4                                                // offset 48
  264         dcbf    0,r4                                                    // push line 1
  265         dcbf    r6,r4                                                   // and line 2
  266         addi    r4,r4,64
  267         bdnz    pmap_g4_copy_loop
  268 
  269         sync                                                                    // wait for stores to take
  270         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
  271         li              r8,PPC_PGBYTES-32                               // point to last line in page
  272 pmap_g4_icache_flush:
  273         subic.  r9,r8,32                                                // more to go?
  274         icbi    r4,r8                                                   // flush from icache
  275         subi    r8,r9,32                                                // get offset to next line
  276         icbi    r4,r9
  277         bne             pmap_g4_icache_flush
  278         
  279         sync
  280         mtmsr   r2                                                              // turn DR back on
  281         isync
  282         la              r9,FM_SIZE+16(r1)                               // get base of VR save area
  283         lvx             v0,0,r9                                                 // restore the VRs
  284         lvx             v1,r5,r9
  285         lvx             v2,r6,r9
  286         lvx             v3,r7,r9        
  287         
  288 pmap_g4_restore:                                                                // r11=MSR
  289         mtmsr   r11                                                             // turn EE on, VEC and FR off
  290         isync                                                                   // wait for it to happen
  291         addi    r1,r1,kSFSize                                   // pop off our stack frame
  292         lwz             r0,8(r1)                                                // restore return address
  293         mtlr    r0
  294         blr
  295         
  296         
  297         // 64-bit/128-byte processor: copy using VRs
  298         
  299 pmap_copy_64:                                                                   // r10=features, r11=old MSR
  300                 sldi    r3,r3,12                                                // get page address from page num
  301                 sldi    r4,r4,12                                                // get page address from page num
  302                 la              r9,FM_SIZE+16(r1)                               // get base of VR save area
  303         li              r5,16                                                   // load x-form offsets into r5-r9
  304         li              r6,32                                                   // another offset
  305         bf              pfAltivecb,pmap_novmx_copy              // altivec suppressed...
  306         stvx    v0,0,r9                                                 // save 8 VRs so we can copy wo bubbles
  307         stvx    v1,r5,r9
  308         li              r7,48                                                   // another offset
  309         li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
  310         stvx    v2,r6,r9
  311         stvx    v3,r7,r9
  312         addi    r9,r9,64                                                // advance base ptr so we can store another 4
  313         mtctr   r0
  314         li              r0,MASK(MSR_DR)                                 // get DR bit
  315         stvx    v4,0,r9
  316         stvx    v5,r5,r9
  317         andc    r12,r2,r0                                               // turn off DR bit
  318         li              r0,1                                                    // get a 1 to slam into SF
  319         stvx    v6,r6,r9
  320         stvx    v7,r7,r9
  321         rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
  322         li              r8,-128                                                 // offset so we can reach back one line
  323         mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
  324         isync                                                                   // wait for it to happen
  325         dcbt128 0,r3,1                                                  // start a forward stream
  326         b               pmap_64_copy_loop
  327         
  328         .align  5                                                               // align inner loops
  329 pmap_64_copy_loop:                                                              // loop over 128-byte chunks
  330         dcbz128 0,r4                                                    // avoid read of destination line
  331         lvx             v0,0,r3                                                 // offset 0
  332         lvx             v1,r5,r3                                                // offset 16
  333         lvx             v2,r6,r3                                                // offset 32
  334         lvx             v3,r7,r3                                                // offset 48
  335         addi    r3,r3,64                                                // don't have enough GPRs so add 64 2x
  336         lvx             v4,0,r3                                                 // offset 64
  337         lvx             v5,r5,r3                                                // offset 80
  338         lvx             v6,r6,r3                                                // offset 96
  339         lvx             v7,r7,r3                                                // offset 112
  340         addi    r3,r3,64
  341         stvx    v0,0,r4                                                 // offset 0
  342         stvx    v1,r5,r4                                                // offset 16
  343         stvx    v2,r6,r4                                                // offset 32
  344         stvx    v3,r7,r4                                                // offset 48
  345         addi    r4,r4,64
  346         stvx    v4,0,r4                                                 // offset 64
  347         stvx    v5,r5,r4                                                // offset 80
  348         stvx    v6,r6,r4                                                // offset 96
  349         stvx    v7,r7,r4                                                // offset 112
  350         addi    r4,r4,64
  351         dcbf    r8,r4                                                   // flush the line we just wrote
  352         bdnz    pmap_64_copy_loop
  353 
  354         sync                                                                    // wait for stores to take
  355         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
  356         li              r8,PPC_PGBYTES-128                              // point to last line in page
  357 pmap_64_icache_flush:
  358         subic.  r9,r8,128                                               // more to go?
  359         icbi    r4,r8                                                   // flush from icache
  360         subi    r8,r9,128                                               // get offset to next line
  361         icbi    r4,r9
  362         bne             pmap_64_icache_flush
  363         
  364         sync
  365         mtmsrd  r2                                                              // turn DR back on, SF off
  366         isync
  367         la              r9,FM_SIZE+16(r1)                               // get base address of VR save area on stack
  368         lvx             v0,0,r9                                                 // restore the VRs
  369         lvx             v1,r5,r9
  370         lvx             v2,r6,r9
  371         lvx             v3,r7,r9
  372         addi    r9,r9,64        
  373         lvx             v4,0,r9
  374         lvx             v5,r5,r9
  375         lvx             v6,r6,r9
  376         lvx             v7,r7,r9
  377 
  378         b               pmap_g4_restore                                 // restore lower half of MSR and return
  379 
  380  //
  381  //             Copy on 64-bit without VMX
  382  //
  383 
  384 pmap_novmx_copy:        
  385                 li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
  386                 mtctr   r0
  387                 li              r0,MASK(MSR_DR)                                 // get DR bit
  388                 andc    r12,r2,r0                                               // turn off DR bit
  389                 li              r0,1                                                    // get a 1 to slam into SF
  390                 rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
  391                 mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
  392                 isync                                                                   // wait for it to happen
  393                 dcbt128 0,r3,1                                                  // start a forward stream 
  394        
  395 pmap_novmx_copy_loop:                                                   // loop over 128-byte cache lines
  396         dcbz128 0,r4                                                    // avoid read of dest line
  397         
  398         ld              r0,0(r3)                                                // Load half a line
  399         ld              r12,8(r3)
  400         ld              r5,16(r3)
  401         ld              r6,24(r3)
  402         ld              r7,32(r3)
  403         ld              r8,40(r3)
  404         ld              r9,48(r3)
  405         ld              r10,56(r3)
  406         
  407         std             r0,0(r4)                                                // Store half a line
  408         std             r12,8(r4)
  409         std             r5,16(r4)
  410         std             r6,24(r4)
  411         std             r7,32(r4)
  412         std             r8,40(r4)
  413         std             r9,48(r4)
  414         std             r10,56(r4)
  415         
  416         ld              r0,64(r3)                                               // Load half a line
  417         ld              r12,72(r3)
  418         ld              r5,80(r3)
  419         ld              r6,88(r3)
  420         ld              r7,96(r3)
  421         ld              r8,104(r3)
  422         ld              r9,112(r3)
  423         ld              r10,120(r3)
  424         
  425         addi    r3,r3,128
  426  
  427         std             r0,64(r4)                                               // Store half a line
  428         std             r12,72(r4)
  429         std             r5,80(r4)
  430         std             r6,88(r4)
  431         std             r7,96(r4)
  432         std             r8,104(r4)
  433         std             r9,112(r4)
  434         std             r10,120(r4)
  435         
  436         dcbf    0,r4                                                    // flush the line we just wrote
  437                 addi    r4,r4,128
  438         bdnz    pmap_novmx_copy_loop
  439 
  440         sync                                                                    // wait for stores to take
  441         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
  442         li              r8,PPC_PGBYTES-128                              // point to last line in page
  443 
  444 pmap_novmx_icache_flush:
  445         subic.  r9,r8,128                                               // more to go?
  446         icbi    r4,r8                                                   // flush from icache
  447         subi    r8,r9,128                                               // get offset to next line
  448         icbi    r4,r9
  449         bne             pmap_novmx_icache_flush
  450         
  451         sync
  452         mtmsrd  r2                                                              // turn DR back on, SF off
  453         isync
  454 
  455         b               pmap_g4_restore                                 // restore lower half of MSR and return
  456 
  457 
  458 
  459 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>    
  460                 
  461 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
  462 // These routines all run both on 32 and 64-bit machines, though because they are called
  463 // by the BSD kernel they are always in 32-bit mode when entered.  The mapped ptr returned
  464 // by MapUserMemoryWindow will be 64 bits however on 64-bit machines.  Beware to avoid
  465 // using compare instructions on this ptr.  This mapped ptr is kept globally in r31, so there
  466 // is no need to store or load it, which are mode-dependent operations since it could be
  467 // 32 or 64 bits.
  468 
  469 #define kkFrameSize     (FM_SIZE+32)
  470 
  471 #define kkBufSize       (FM_SIZE+0)
  472 #define kkCR3           (FM_SIZE+4)
  473 #define kkSource        (FM_SIZE+8)
  474 #define kkDest          (FM_SIZE+12)
  475 #define kkCountPtr      (FM_SIZE+16)
  476 #define kkR31Save       (FM_SIZE+20)
  477 #define kkThrErrJmp     (FM_SIZE+24)
  478  
  479  
  480 // nonvolatile CR bits we use as flags in cr3
  481 
  482 #define kk64bit         12
  483 #define kkNull          13
  484 #define kkIn            14
  485 #define kkString        15
  486 #define kkZero          15
  487 
  488 
  489 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  490 /*
  491  * int
  492  * copyoutstr(src, dst, maxcount, count)
  493  *      vm_offset_t     src;        // r3
  494  *      addr64_t        dst;        // r4 and r5
  495  *      vm_size_t       maxcount;   // r6
  496  *      vm_size_t*      count;      // r7
  497  *
  498  * Set *count to the number of bytes copied.
  499  */
  500 
  501 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
  502         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
  503         mr      r10,r4                          // move high word of 64-bit user address to r10
  504         li              r0,0
  505         crset   kkString                                                // flag as a string op
  506         mr      r11,r5                          // move low word of 64-bit user address to r11
  507         stw             r0,0(r7)                                                // initialize #bytes moved
  508         crclr   kkIn                                                    // flag as copyout
  509         b               copyJoin
  510 
  511 
  512 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  513 /*
  514  * int
  515  * copyinstr(src, dst, maxcount, count)
  516  *      addr64_t        src;        // r3 and r4
  517  *      vm_offset_t     dst;        // r5
  518  *      vm_size_t       maxcount;   // r6
  519  *      vm_size_t*      count;      // r7
  520  *
  521  * Set *count to the number of bytes copied
  522  * If dst == NULL, don't copy, just count bytes.
  523  * Only currently called from klcopyinstr. 
  524  */
  525 
  526 ENTRY(copyinstr, TAG_NO_FRAME_USED)
  527         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
  528         cmplwi  r5,0                                                    // dst==NULL?
  529         mr      r10,r3                          // move high word of 64-bit user address to r10
  530         li              r0,0
  531         crset   kkString                                                // flag as a string op
  532         mr      r11,r4                          // move low word of 64-bit user address to r11
  533         crmove  kkNull,cr0_eq                                   // remember if (dst==NULL)
  534         stw             r0,0(r7)                                                // initialize #bytes moved
  535         crset   kkIn                                                    // flag as copyin (rather than copyout)
  536         b               copyJoin1                                               // skip over the "crclr kkNull"
  537 
  538 
  539 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  540 /*
  541  * int
  542  * copyout(src, dst, count)
  543  *      vm_offset_t     src;        // r3
  544  *      addr64_t        dst;        // r4 and r5
  545  *      size_t          count;      // r6
  546  */
  547 
  548                         .align  5
  549                         .globl  EXT(copyout)
  550                         .globl  EXT(copyoutmsg)
  551 
  552 LEXT(copyout)
  553 LEXT(copyoutmsg)
  554 
  555 #if INSTRUMENT
  556         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
  557         stw             r12,0x6100+(12*16)+0x0(0)               ; INSTRUMENT - Save it
  558         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
  559         stw             r12,0x6100+(12*16)+0x4(0)               ; INSTRUMENT - Save it
  560         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
  561         stw             r12,0x6100+(12*16)+0x8(0)               ; INSTRUMENT - Save it
  562         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
  563         stw             r12,0x6100+(12*16)+0xC(0)               ; INSTRUMENT - Save it
  564 #endif                  
  565         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
  566         mr      r10,r4                          // move high word of 64-bit user address to r10
  567         crclr   kkString                                                // not a string version
  568         mr      r11,r5                          // move low word of 64-bit user address to r11
  569         crclr   kkIn                                                    // flag as copyout
  570         b               copyJoin
  571         
  572 
  573 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  574 /*
  575  * int
  576  * copyin(src, dst, count)
  577  *      addr64_t        src;        // r3 and r4
  578  *      vm_offset_t     dst;        // r5
  579  *      size_t          count;      // r6
  580  */
  581 
  582 
  583                         .align  5
  584                         .globl  EXT(copyin)
  585                         .globl  EXT(copyinmsg)
  586 
  587 LEXT(copyin)
  588 LEXT(copyinmsg)
  589 
  590         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
  591         mr      r10,r3                          // move high word of 64-bit user address to r10
  592         crclr   kkString                                                // not a string version
  593         mr      r11,r4                          // move low word of 64-bit user address to r11
  594         crset   kkIn                                                    // flag as copyin
  595         
  596         
  597 // Common code to handle setup for all the copy variants:
  598 //              r2 = caller's cr3
  599 //      r3 = source if copyout
  600 //      r5 = dest if copyin
  601 //      r6 = buffer length or count
  602 //      r7 = count output ptr (if kkString set)
  603 //         r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
  604 //         r11 = low word of 64-bit user-space address
  605 //     cr3 = kkIn, kkString, kkNull flags
  606 
  607 copyJoin:
  608         crclr   kkNull                                                  // (dst==NULL) convention not used with this call
  609 copyJoin1:                                                                              // enter from copyinstr with kkNull set
  610                 mflr    r0                                                              // get return address
  611         cmplwi  r6,0                                                    // buffer length 0?
  612         lis             r9,0x1000                                               // r9 <- 0x10000000 (256MB)
  613                 stw             r0,FM_LR_SAVE(r1)                               // save return
  614         cmplw   cr1,r6,r9                                               // buffer length > 256MB ?
  615         mfsprg  r8,2                                                    // get the features
  616         beq--   copyinout_0                                             // 0 length is degenerate case
  617                 stwu    r1,-kkFrameSize(r1)                             // set up stack frame
  618         stw             r2,kkCR3(r1)                    // save caller's cr3, which we use for flags
  619         mtcrf   0x02,r8                                                 // move pf64Bit to cr6
  620         stw             r3,kkSource(r1)                                 // save args across MapUserMemoryWindow
  621         stw             r5,kkDest(r1)
  622         stw             r6,kkBufSize(r1)
  623         crmove  kk64bit,pf64Bitb                                // remember if this is a 64-bit processor
  624         stw             r7,kkCountPtr(r1)
  625         stw             r31,kkR31Save(r1)                               // we use r31 globally for mapped user ptr
  626         li              r31,0                                                   // no mapped ptr yet
  627         
  628         
  629 // Handle buffer length > 256MB.  This is an error (ENAMETOOLONG) on copyin and copyout.
  630 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
  631 // the buffer length to 256MB.  This isn't an issue if the string is less than 256MB
  632 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG.  This restriction
  633 // is due to MapUserMemoryWindow; we don't want to consume more than two segments for
  634 // the mapping. 
  635 
  636         ble++   cr1,copyin0                                             // skip if buffer length <= 256MB
  637         bf              kkString,copyinout_too_big              // error if not string op
  638         mr              r6,r9                                                   // silently clamp buffer length to 256MB
  639         stw             r9,kkBufSize(r1)                                // update saved copy too
  640 
  641 
  642 // Set up thread_recover in case we hit an illegal address.
  643 
  644 copyin0:
  645                 mfsprg  r8,1                                                    // Get the current thread 
  646                 lis             r2,hi16(copyinout_error)
  647                 ori             r2,r2,lo16(copyinout_error)
  648                 lwz             r4,THREAD_RECOVER(r8)
  649                 lwz             r3,ACT_VMMAP(r8)                                // r3 <- vm_map virtual address
  650                 stw             r2,THREAD_RECOVER(r8)
  651                 stw             r4,kkThrErrJmp(r1)
  652 
  653 
  654 // Map user segment into kernel map, turn on 64-bit mode.  At this point:
  655 //              r3 = vm map
  656 //              r6 = buffer length
  657 // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
  658 //
  659 // When we call MapUserMemoryWindow, we pass:
  660 //      r3 = vm map ptr
  661 //   r4/r5 = 64-bit user space address as an addr64_t
  662         
  663         mr      r4,r10                          // copy user ptr into r4/r5
  664         mr      r5,r11
  665 #if INSTRUMENT
  666         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
  667         stw             r12,0x6100+(13*16)+0x0(0)               ; INSTRUMENT - Save it
  668         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
  669         stw             r12,0x6100+(13*16)+0x4(0)               ; INSTRUMENT - Save it
  670         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
  671         stw             r12,0x6100+(13*16)+0x8(0)               ; INSTRUMENT - Save it
  672         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
  673         stw             r12,0x6100+(13*16)+0xC(0)               ; INSTRUMENT - Save it
  674 #endif                  
  675         bl              EXT(MapUserMemoryWindow)                // get r3/r4 <- 64-bit address in kernel map of user operand
  676 #if INSTRUMENT
  677         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
  678         stw             r12,0x6100+(14*16)+0x0(0)               ; INSTRUMENT - Save it
  679         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
  680         stw             r12,0x6100+(14*16)+0x4(0)               ; INSTRUMENT - Save it
  681         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
  682         stw             r12,0x6100+(14*16)+0x8(0)               ; INSTRUMENT - Save it
  683         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
  684         stw             r12,0x6100+(14*16)+0xC(0)               ; INSTRUMENT - Save it
  685 #endif                  
  686         mr              r31,r4                                                  // r31 <- mapped ptr into user space (may be 64-bit)
  687         bf--    kk64bit,copyin1                                 // skip if a 32-bit processor
  688  
  689                 rldimi  r31,r3,32,0                                             // slam high-order bits into mapped ptr
  690         mfmsr   r4                                                              // if 64-bit, turn on SF so we can use returned ptr
  691         li              r0,1
  692         rldimi  r4,r0,63,MSR_SF_BIT                             // light bit 0
  693         mtmsrd  r4                                                              // turn on 64-bit mode
  694         isync                                                                   // wait for mode to change
  695         
  696         
  697 // Load r3-r5, substituting mapped ptr as appropriate.
  698 
  699 copyin1:
  700         lwz             r5,kkBufSize(r1)                                // restore length to copy
  701         bf              kkIn,copyin2                                    // skip if copyout
  702         lwz             r4,kkDest(r1)                                   // copyin: dest is kernel ptr
  703         mr              r3,r31                                                  // source is mapped ptr
  704         b               copyin3
  705 copyin2:                                                                                // handle copyout
  706         lwz             r3,kkSource(r1)                                 // source is kernel buffer (r3 at entry)
  707         mr              r4,r31                                                  // dest is mapped ptr into user space
  708         
  709         
  710 // Finally, all set up to copy:
  711 //              r3 = source ptr (mapped if copyin)
  712 //              r4 = dest ptr (mapped if copyout)
  713 //              r5 = length
  714 //         r31 = mapped ptr returned by MapUserMemoryWindow
  715 //         cr3 = kkIn, kkString, kk64bit, and kkNull flags
  716 
  717 copyin3:
  718         bt              kkString,copyString                             // handle copyinstr and copyoutstr
  719         bl              EXT(bcopy)                                              // copyin and copyout: let bcopy do the work
  720         li              r3,0                                                    // return success
  721         
  722         
  723 // Main exit point for copyin, copyout, copyinstr, and copyoutstr.  Also reached
  724 // from error recovery if we get a DSI accessing user space.  Clear recovery ptr, 
  725 // and pop off frame.
  726 //              r3 = 0, EFAULT, or ENAMETOOLONG
  727 
  728 copyinx: 
  729         lwz             r2,kkCR3(r1)                    // get callers cr3
  730                 mfsprg  r6,1                                                    // Get the current thread 
  731         bf--    kk64bit,copyinx1                                // skip if 32-bit processor
  732         mfmsr   r12
  733         rldicl  r12,r12,0,MSR_SF_BIT+1                  // if 64-bit processor, turn 64-bit mode off
  734         mtmsrd  r12                                                             // turn SF off
  735         isync                                                                   // wait for the mode to change
  736 copyinx1:
  737                 lwz             r0,FM_LR_SAVE+kkFrameSize(r1)   // get return address
  738         lwz             r31,kkR31Save(r1)                               // restore callers r31
  739         lwz             r4,kkThrErrJmp(r1)                              // load saved thread recover
  740         addi    r1,r1,kkFrameSize                               // pop off our stack frame
  741                 mtlr    r0
  742                 stw             r4,THREAD_RECOVER(r6)                   // restore thread recover
  743         mtcrf   0x10,r2                                                 // restore cr3
  744                 blr
  745 
  746 
  747 /* We get here via the exception handler if an illegal
  748  * user memory reference was made.  This error handler is used by
  749  * copyin, copyout, copyinstr, and copyoutstr.  Registers are as
  750  * they were at point of fault, so for example cr3 flags are valid.
  751  */
  752 
  753 copyinout_error:
  754         li              r3,EFAULT                                               // return error
  755         b               copyinx
  756 
  757 copyinout_0:                                                                    // degenerate case: 0-length copy
  758                 mtcrf   0x10,r2                                                 // restore cr3
  759         li              r3,0                                                    // return success
  760         blr
  761         
  762 copyinout_too_big:                                                              // degenerate case
  763         mtcrf   0x10,r2                                                 // restore cr3
  764         lwz             r1,0(r1)                                                // pop off stack frame
  765         li              r3,ENAMETOOLONG
  766         blr
  767         
  768 
  769 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  770 // Handle copyinstr and copyoutstr.  At this point the stack frame is set up,
  771 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
  772 // if necessary, and:
  773 //              r3 = source ptr, mapped if copyinstr
  774 //              r4 = dest ptr, mapped if copyoutstr
  775 //              r5 = buffer length
  776 //         r31 = mapped ptr returned by MapUserMemoryWindow
  777 //     cr3 = kkIn, kkString, kkNull, and kk64bit flags
  778 // We do word copies unless the buffer is very short, then use a byte copy loop
  779 // for the leftovers if necessary.  The crossover at which the word loop becomes
  780 // faster is about seven bytes, counting the zero.
  781 //
  782 // We first must word-align the source ptr, in order to avoid taking a spurious
  783 // page fault.
  784 
  785 copyString:
  786         cmplwi  cr1,r5,15                                               // is buffer very short?
  787         mr      r12,r3                          // remember ptr to 1st source byte
  788         mtctr   r5                                                              // assuming short, set up loop count for bytes
  789         blt--   cr1,copyinstr8                                  // too short for word loop
  790         rlwinm  r2,r3,0,0x3                     // get byte offset of 1st byte within word
  791         rlwinm  r9,r3,3,0x18                    // get bit offset of 1st byte within word
  792         li      r7,-1
  793         sub     r3,r3,r2                        // word-align source address
  794         add     r6,r5,r2                        // get length starting at byte 0 in word
  795         srw     r7,r7,r9                        // get mask for bytes in first word
  796         srwi    r0,r6,2                                                 // get #words in buffer
  797         lwz     r5,0(r3)                        // get aligned word with first source byte
  798         lis             r10,hi16(0xFEFEFEFF)                    // load magic constants into r10 and r11
  799         lis             r11,hi16(0x80808080)
  800         mtctr   r0                                                              // set up word loop count
  801         addi    r3,r3,4                         // advance past the source word
  802         ori             r10,r10,lo16(0xFEFEFEFF)
  803         ori             r11,r11,lo16(0x80808080)
  804         orc     r8,r5,r7                        // map bytes preceeding first source byte into 0xFF
  805         bt--    kkNull,copyinstr5enter          // enter loop that just counts
  806         
  807 // Special case 1st word, which has been 0xFF filled on left.  Note that we use
  808 // "and.", even though we execute both in 32 and 64-bit mode.  This is OK.
  809 
  810         slw     r5,r5,r9                        // left justify payload bytes
  811         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
  812         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
  813                 subfic  r0,r2,4                                                 // get r0 <- #payload bytes in 1st word
  814         and.    r7,r9,r7                                                // if r7==0, then all bytes in r8 are nonzero
  815         stw     r5,0(r4)                        // copy payload bytes to dest buffer
  816         add             r4,r4,r0                                                // then point to next byte in dest buffer
  817         bdnzt   cr0_eq,copyinstr6               // use loop that copies if 0 not found
  818         
  819         b               copyinstr7                      // 0 found (buffer can't be full)
  820         
  821         
  822 // Word loop(s).  They do a word-parallel search for 0s, using the following
  823 // inobvious but very efficient test:
  824 //              y =  data + 0xFEFEFEFF
  825 //              z = ~data & 0x80808080
  826 // If (y & z)==0, then all bytes in dataword are nonzero.  There are two copies
  827 // of this loop, one that just counts and another that copies.
  828 //              r3 = ptr to next word of source (word aligned)
  829 //              r4 = ptr to next byte in buffer
  830 //      r6 = original buffer length (adjusted to be word origin)
  831 //     r10 = 0xFEFEFEFE
  832 //     r11 = 0x80808080
  833 //     r12 = ptr to 1st source byte (used to determine string length)
  834 
  835         .align  5                                                               // align inner loops for speed
  836 copyinstr5:                                                                             // version that counts but does not copy
  837         lwz     r8,0(r3)                                                // get next word of source
  838         addi    r3,r3,4                         // advance past it
  839 copyinstr5enter:
  840         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
  841         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
  842         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
  843         bdnzt   cr0_eq,copyinstr5                               // if r7==0, then all bytes in r8 are nonzero
  844 
  845         b               copyinstr7
  846 
  847         .align  5                                                               // align inner loops for speed
  848 copyinstr6:                                                                             // version that counts and copies
  849         lwz     r8,0(r3)                                                // get next word of source
  850         addi    r3,r3,4                         // advance past it
  851         addi    r4,r4,4                                                 // increment dest ptr while we wait for data
  852         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
  853         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
  854         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
  855         stw             r8,-4(r4)                                               // pack all 4 bytes into buffer
  856         bdnzt   cr0_eq,copyinstr6                               // if r7==0, then all bytes are nonzero
  857 
  858 
  859 // Either 0 found or buffer filled.  The above algorithm has mapped nonzero bytes to 0
  860 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
  861 // mapped to 0x80.  We must mask out these false hits before searching for an 0x80 byte.
  862 //              r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
  863 //      r6 = original buffer length (adjusted to be word origin)
  864 //      r7 = computed vector of 0x00 and 0x80 bytes
  865 //      r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
  866 //     r12 = ptr to 1st source byte (used to determine string length)
  867 //     cr0 = beq set iff 0 not found
  868 
  869 copyinstr7:
  870         rlwinm  r2,r8,7,0,31                                    // move 0x01 bits to 0x80 position
  871                 rlwinm  r6,r6,0,0x3                                             // mask down to partial byte count in last word
  872         andc    r7,r7,r2                                                // turn off false hits from 0x0100 worst case
  873         crnot   kkZero,cr0_eq                                   // 0 found iff cr0_eq is off
  874         srwi    r7,r7,8                         // we want to count the 0 as a byte xferred
  875                 cmpwi   r6,0                                                    // any bytes left over in last word?
  876         cntlzw  r7,r7                                                   // now we can find the 0 byte (ie, the 0x80)
  877         subi    r3,r3,4                         // back up r3 to point to 1st byte in r8
  878         srwi    r7,r7,3                                                 // convert 8,16,24,32 to 1,2,3,4
  879         add     r3,r3,r7                        // now r3 points one past 0 byte, or at 1st byte not xferred
  880         bt++    kkZero,copyinstr10                              // 0 found, so done
  881         
  882         beq             copyinstr10                                             // r6==0, so buffer truly full
  883         mtctr   r6                                                              // 0 not found, loop over r6 bytes
  884         b               copyinstr8                                              // enter byte loop for last 1-3 leftover bytes
  885         
  886 
  887 // Byte loop.  This is used for very small buffers and for the odd bytes left over
  888 // after searching and copying words at a time.
  889 //      r3 = ptr to next byte of source
  890 //      r4 = ptr to next dest byte
  891 //     r12 = ptr to first byte of source
  892 //     ctr = count of bytes to check
  893     
  894         .align  5                                                               // align inner loops for speed
  895 copyinstr8:                                                                             // loop over bytes of source
  896         lbz             r0,0(r3)                                                // get next byte of source
  897         addi    r3,r3,1
  898         addi    r4,r4,1                                                 // increment dest addr whether we store or not
  899         cmpwi   r0,0                                                    // the 0?
  900         bt--    kkNull,copyinstr9                               // don't store if copyinstr with NULL ptr
  901         stb             r0,-1(r4)
  902 copyinstr9:
  903         bdnzf   cr0_eq,copyinstr8                               // loop if byte not 0 and more room in buffer
  904         
  905         crmove  kkZero,cr0_eq                                   // remember if 0 found or buffer filled
  906 
  907         
  908 // Buffer filled or 0 found.  Unwind and return.
  909 //      r3 = ptr to 1st source byte not transferred
  910 //     r12 = ptr to 1st source byte
  911 //     r31 = mapped ptr returned by MapUserMemoryWindow
  912 //     cr3 = kkZero set iff 0 found
  913 
  914 copyinstr10:
  915         lwz             r9,kkCountPtr(r1)                               // get ptr to place to store count of bytes moved
  916         sub     r2,r3,r12                       // compute #bytes copied (including the 0)
  917         li              r3,0                                                    // assume success return status
  918         stw             r2,0(r9)                                                // store #bytes moved
  919         bt++    kkZero,copyinx                                  // we did find the 0 so return 0
  920         li              r3,ENAMETOOLONG                                 // buffer filled
  921         b               copyinx                                                 // join main exit routine
  922 
  923 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  924 /*
  925  * int
  926  * copypv(source, sink, size, which)
  927  *      addr64_t        src;        // r3 and r4
  928  *      addr64_t        dst;        // r5 and r6
  929  *      size_t          size;           // r7
  930  *      int                     which;          // r8
  931  *
  932  * Operand size bytes are copied from operand src into operand dst. The source and
  933  * destination operand addresses are given as addr64_t, and may designate starting
  934  * locations in physical or virtual memory in any combination except where both are
  935  * virtual. Virtual memory locations may be in either the kernel or the current thread's
  936  * address space. Operand size may be up to 256MB.
  937  *
  938  * Operation is controlled by operand which, which offers these options:
  939  *              cppvPsrc : source operand is (1) physical or (0) virtual
  940  *              cppvPsnk : destination operand is (1) physical or (0) virtual
  941  *              cppvKmap : virtual operand is in (1) kernel or (0) current thread
  942  *              cppvFsnk : (1) flush destination before and after transfer
  943  *              cppvFsrc : (1) flush source before and after transfer
  944  *              cppvNoModSnk : (1) don't set source operand's changed bit(s)
  945  *              cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
  946  *
  947  * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
  948  * This section describes the operation of the new 64-bit path.
  949  *
  950  * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
  951  * window in the kernel address space into all of physical RAM plus the I/O hole. Since
  952  * the window's mappings specify the proper access policies for the underlying memory,
  953  * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
  954  * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
  955  * memory window, and are accessed with data relocation on. Virtual addresses are either
  956  * within the kernel, or are mapped into the kernel address space through the user memory
  957  * window. Because accesses to a virtual operand are performed with data relocation on,
  958  * the new path does not have to translate the address, disable/enable interrupts, lock
  959  * the mapping, or update referenced and changed bits.
  960  *
  961  * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
  962  * a substantial performance penalty for copypv operating in real mode. Utilizing the
  963  * new 64-bit path, transfer performance increases >100% on the G5.
  964  *
  965  * The attentive reader may notice that mtmsrd ops are not followed by isync ops as 
  966  * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
  967  * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
  968  * required.
  969  *
  970  * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
  971  * to call 32-bit functions, which would lead to the high-order 32 bits of our values
  972  * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
  973  * in our own stack frame across calls to 32-bit functions.
  974  *              
  975  */
  976 
  977 // Map operand which bits into non-volatile CR2 and CR3 bits.
  978 #define whichAlign      ((3+1)*4)
  979 #define whichMask       0x007F0000
  980 #define pvPsnk          (cppvPsnkb - whichAlign)
  981 #define pvPsrc          (cppvPsrcb - whichAlign)
  982 #define pvFsnk          (cppvFsnkb - whichAlign)
  983 #define pvFsrc          (cppvFsrcb - whichAlign)
  984 #define pvNoModSnk      (cppvNoModSnkb - whichAlign)
  985 #define pvNoRefSrc      (cppvNoRefSrcb - whichAlign)
  986 #define pvKmap          (cppvKmapb - whichAlign)
  987 #define pvNoCache       cr2_lt
  988 
  989                 .align  5
  990                 .globl  EXT(copypv)
  991 
  992 LEXT(copypv)
  993         mfsprg  r10,2                                                   // get feature flags
  994         mtcrf   0x02,r10                                                // we need to test pf64Bit
  995         bt++    pf64Bitb,copypv_64                              // skip if 64-bit processor (only they take hint)
  996         
  997         b               EXT(hw_copypv_32)                               // carry on with 32-bit copypv
  998 
  999 // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.        
 1000 copypv_64:
 1001                 mfsprg  r9,1                                                    // get current thread
 1002                 stwu    r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
 1003                                                                                                 // allocate stack frame and link it
 1004                 mflr    r0                                                              // get return address
 1005                 mfcr    r10                                                             // get cr2 and cr3
 1006                 lwz             r12,THREAD_RECOVER(r9)                  // get error callback
 1007                 stw             r26,FM_ARG0+0x00(r1)                    // save non-volatile r26
 1008                 stw             r27,FM_ARG0+0x04(r1)                    // save non-volatile r27
 1009                 stw             r28,FM_ARG0+0x08(r1)                    // save non-volatile r28
 1010                 stw             r29,FM_ARG0+0x0C(r1)                    // save non-volatile r29
 1011                 stw             r30,FM_ARG0+0x10(r1)                    // save non-volatile r30
 1012                 stw             r31,FM_ARG0+0x14(r1)                    // save non-volatile r31
 1013                 stw             r12,FM_ARG0+0x20(r1)                    // save error callback
 1014                 stw             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
 1015                                                                                                 // save return address
 1016                 stw             r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
 1017                                                                                                 // save non-volatile cr2 and cr3
 1018 
 1019 // Non-volatile register usage in this routine is:
 1020 //      r26: saved msr image
 1021 //      r27: current pmap_t / virtual source address
 1022 //      r28: destination virtual address
 1023 //      r29: source address
 1024 //      r30: destination address
 1025 //      r31: byte count to copy
 1026 //      cr2/3: parameter 'which' bits
 1027 
 1028                 rlwinm  r8,r8,whichAlign,whichMask              // align and mask which bits
 1029                 mr              r31,r7                                                  // copy size to somewhere non-volatile
 1030                 mtcrf   0x20,r8                                                 // insert which bits into cr2 and cr3
 1031                 mtcrf   0x10,r8                                                 // insert which bits into cr2 and cr3
 1032                 rlwinm  r29,r3,0,1,0                                    // form source address high-order bits
 1033                 rlwinm  r30,r5,0,1,0                                    // form destination address high-order bits
 1034                 rlwimi  r29,r4,0,0,31                                   // form source address low-order bits
 1035                 rlwimi  r30,r6,0,0,31                                   // form destination address low-order bits
 1036                 crand   cr7_lt,pvPsnk,pvPsrc                    // are both operand addresses physical?
 1037                 cntlzw  r0,r31                                                  // count leading zeroes in byte count
 1038                 cror    cr7_eq,pvPsnk,pvPsrc                    // cr7_eq <- source or destination is physical
 1039                 bf--    cr7_eq,copypv_einval                    // both operands may not be virtual
 1040                 cmplwi  r0,4                                                    // byte count greater than or equal 256M (2**28)?
 1041                 blt--   copypv_einval                                   // byte count too big, give EINVAL
 1042                 cmplwi  r31,0                                                   // byte count zero?
 1043                 beq--   copypv_zero                                             // early out
 1044                 bt              cr7_lt,copypv_phys                              // both operand addresses are physical
 1045                 mr              r28,r30                                                 // assume destination is virtual
 1046                 bf              pvPsnk,copypv_dv                                // is destination virtual?
 1047                 mr              r28,r29                                                 // no, so source must be virtual
 1048 copypv_dv:
 1049                 lis             r27,ha16(EXT(kernel_pmap))              // get kernel's pmap_t *, high-order
 1050                 lwz             r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
 1051                 bt              pvKmap,copypv_kern                              // virtual address in kernel map?
 1052                 lwz             r3,ACT_VMMAP(r9)                                // get user's vm_map *
 1053                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t virtual address 
 1054                 rldicl  r5,r28,0,32
 1055                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit call
 1056                 std             r30,FM_ARG0+0x38(r1)                    // preserve 64-bit r30 across 32-bit call
 1057                 bl              EXT(MapUserMemoryWindow)                // map slice of user space into kernel space
 1058                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
 1059                 ld              r30,FM_ARG0+0x38(r1)                    // restore 64-bit r30
 1060                 rlwinm  r28,r3,0,1,0                                    // convert relocated addr64_t virtual address 
 1061                 rlwimi  r28,r4,0,0,31                                   //  into a single 64-bit scalar
 1062 copypv_kern:
 1063 
 1064 // Since we'll be accessing the virtual operand with data-relocation on, we won't need to 
 1065 // update the referenced and changed bits manually after the copy. So, force the appropriate
 1066 // flag bit on for the virtual operand.
 1067                 crorc   pvNoModSnk,pvNoModSnk,pvPsnk    // for virtual dest, let hardware do ref/chg bits
 1068                 crorc   pvNoRefSrc,pvNoRefSrc,pvPsrc    // for virtual source, let hardware do ref bit
 1069                 
 1070 // We'll be finding a mapping and looking at, so we need to disable 'rupts.
 1071                 lis             r0,hi16(MASK(MSR_VEC))                  // get vector mask
 1072                 ori             r0,r0,lo16(MASK(MSR_FP))                // insert fp mask
 1073                 mfmsr   r26                                                             // save current msr
 1074                 andc    r26,r26,r0                                              // turn off VEC and FP in saved copy
 1075                 ori             r0,r0,lo16(MASK(MSR_EE))                // add EE to our mask
 1076                 andc    r0,r26,r0                                               // disable EE in our new msr image
 1077                 mtmsrd  r0                                                              // introduce new msr image
 1078 
 1079 // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
 1080 // try to find a mapping corresponding to this address in order to determine whether the address
 1081 // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
 1082 // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
 1083 // examine the mapping's caching-inhibited bit.
 1084                 mr              r3,r27                                                  // r3 <- pmap_t pmap
 1085                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t va
 1086                 rldicl  r5,r28,0,32
 1087                 la              r6,FM_ARG0+0x18(r1)                             // r6 <- addr64_t *nextva
 1088                 li              r7,1                                                    // r7 <- int full, search nested mappings
 1089                 std             r26,FM_ARG0+0x28(r1)                    // preserve 64-bit r26 across 32-bit calls
 1090                 std             r28,FM_ARG0+0x30(r1)                    // preserve 64-bit r28 across 32-bit calls
 1091                 std             r29,FM_ARG0+0x38(r1)                    // preserve 64-bit r29 across 32-bit calls
 1092                 std             r30,FM_ARG0+0x40(r1)                    // preserve 64-bit r30 across 32-bit calls
 1093                 bl              EXT(mapping_find)                               // find mapping for virtual operand
 1094                 mr.             r3,r3                                                   // did we find it?
 1095                 beq             copypv_nomapping                                // nope, so we'll assume it's cacheable
 1096                 lwz             r4,mpVAddr+4(r3)                                // get low half of virtual addr for hw flags
 1097                 rlwinm. r4,r4,0,mpIb-32,mpIb-32                 // caching-inhibited bit set?
 1098                 crnot   pvNoCache,cr0_eq                                // if it is, use bcopy_nc
 1099                 bl              EXT(mapping_drop_busy)                  // drop busy on the mapping
 1100 copypv_nomapping:
 1101                 ld              r26,FM_ARG0+0x28(r1)                    // restore 64-bit r26
 1102                 ld              r28,FM_ARG0+0x30(r1)                    // restore 64-bit r28
 1103                 ld              r29,FM_ARG0+0x38(r1)                    // restore 64-bit r29
 1104                 ld              r30,FM_ARG0+0x40(r1)                    // restore 64-bit r30
 1105                 mtmsrd  r26                                                             // restore msr to it's previous state
 1106 
 1107 // Set both the source and destination virtual addresses to the virtual operand's address --
 1108 // we'll overlay one of them with the physical operand's address.
 1109                 mr              r27,r28                                                 // make virtual operand BOTH source AND destination
 1110 
 1111 // Now we're ready to relocate the physical operand address(es) into the physical memory window.
 1112 // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
 1113 // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
 1114 // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
 1115 copypv_phys:
 1116                 ld              r6,lgPMWvaddr(0)                                // get physical memory window virtual address
 1117                 bf              pvPsnk,copypv_dstvirt                   // is destination address virtual?
 1118                 cntlzd  r4,r30                                                  // count leading zeros in destination address
 1119                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
 1120                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations          
 1121                 add             r28,r30,r6                                              // relocate physical destination into physical window
 1122 copypv_dstvirt:
 1123                 bf              pvPsrc,copypv_srcvirt                   // is source address virtual?
 1124                 cntlzd  r4,r29                                                  // count leading zeros in source address
 1125                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
 1126                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations          
 1127                 add             r27,r29,r6                                              // relocate physical source into physical window
 1128 copypv_srcvirt:
 1129 
 1130 // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
 1131 // funny happens during the copy. So, we set a pointer to our error handler in the per-thread
 1132 // control block.
 1133                 mfsprg  r8,1                                                    // get current threads stuff
 1134                 lis             r3,hi16(copypv_error)                   // get our error callback's address, high
 1135                 ori             r3,r3,lo16(copypv_error)                // get our error callback's address, low
 1136                 stw             r3,THREAD_RECOVER(r8)                   // set our error callback
 1137                 
 1138 // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
 1139 // 64-bit mode.
 1140                 li              r0,1                                                    // get a handy one bit
 1141                 mfmsr   r3                                                              // get current msr
 1142                 rldimi  r3,r0,63,MSR_SF_BIT                             // set SF bit on in our msr copy
 1143                 mtmsrd  r3                                                              // enter 64-bit mode
 1144 
 1145 // If requested, flush data cache
 1146 // Note that we don't flush, the code is being saved "just in case".
 1147 #if 0
 1148                 bf              pvFsrc,copypv_nfs                               // do we flush the source?
 1149                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
 1150                 rldicl  r4,r27,0,32
 1151                 mr              r5,r31                                                  // r5 <- count (in bytes)
 1152                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
 1153                 bl              EXT(flush_dcache)                               // flush the source operand
 1154 copypv_nfs:
 1155                 bf              pvFsnk,copypv_nfdx                              // do we flush the destination?
 1156                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
 1157                 rldicl  r4,r28,0,32
 1158                 mr              r5,r31                                                  // r5 <- count (in bytes)
 1159                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
 1160                 bl              EXT(flush_dcache)                               // flush the destination operand
 1161 copypv_nfdx:
 1162 #endif
 1163 
 1164 // Call bcopy or bcopy_nc to perform the copy.
 1165                 mr              r3,r27                                                  // r3 <- source virtual address
 1166                 mr              r4,r28                                                  // r4 <- destination virtual address
 1167                 mr              r5,r31                                                  // r5 <- bytes to copy
 1168                 bt              pvNoCache,copypv_nc                             // take non-caching route
 1169                 bl              EXT(bcopy)                                              // call bcopy to do the copying
 1170                 b               copypv_copydone
 1171 copypv_nc:
 1172                 bl              EXT(bcopy_nc)                                   // call bcopy_nc to do the copying
 1173 copypv_copydone:
 1174 
 1175 // If requested, flush data cache
 1176 // Note that we don't flush, the code is being saved "just in case".
 1177 #if 0
 1178                 bf              pvFsrc,copypv_nfsx                              // do we flush the source?
 1179                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
 1180                 rldicl  r4,r27,0,32
 1181                 mr              r5,r31                                                  // r5 <- count (in bytes)
 1182                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
 1183                 bl              EXT(flush_dcache)                               // flush the source operand
 1184 copypv_nfsx:
 1185                 bf              pvFsnk,copypv_nfd                               // do we flush the destination?
 1186                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
 1187                 rldicl  r4,r28,0,32
 1188                 mr              r5,r31                                                  // r5 <- count (in bytes)
 1189                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
 1190                 bl              EXT(flush_dcache)                               // flush the destination operand
 1191 copypv_nfd:
 1192 #endif
 1193 
 1194 // Leave 64-bit mode.
 1195                 mfmsr   r3                                                              // get current msr
 1196                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
 1197                 mtmsrd  r3                                                              // leave 64-bit mode
 1198 
 1199 // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
 1200 // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
 1201 // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
 1202 // Note that this code is page-size sensitive, so it should probably be a part of our low-level
 1203 // code in hw_vm.s.
 1204                 bt              pvNoModSnk,copypv_nomod                 // skip destination update if not requested
 1205                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit calls
 1206                 li              r26,1                                                   // r26 <- 4K-page count                                         
 1207                 mr              r27,r31                                                 // r27 <- byte count
 1208                 rlwinm  r3,r30,0,20,31                                  // does destination cross a page boundary?
 1209                 subfic  r3,r3,4096                                              //
 1210                 cmplw   r3,r27                                                  // 
 1211                 blt             copypv_modnox                                   // skip if not crossing case
 1212                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
 1213                 addi    r26,r26,1                                               // increment page count
 1214 copypv_modnox:
 1215                 srdi    r3,r27,12                                               // pages to update (not including crosser)
 1216                 add             r26,r26,r3                                              // add in crosser
 1217                 srdi    r27,r30,12                                              // r27 <- destination page number
 1218 copypv_modloop:
 1219                 mr              r3,r27                                                  // r3 <- destination page number                                
 1220                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
 1221                 bl              EXT(mapping_phys_lookup)                // see if page is really there
 1222                 mr.             r3,r3                                                   // is it?
 1223                 beq--   copypv_modend                                   // nope, break out of modify loop
 1224                 mr              r3,r27                                                  // r3 <- destination page number
 1225                 bl              EXT(mapping_set_mod)                    // set page changed status
 1226                 subi    r26,r26,1                                               // decrement page count
 1227                 cmpwi   r26,0                                                   // done yet?
 1228                 bgt             copypv_modloop                                  // nope, iterate
 1229 copypv_modend:
 1230                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
 1231 copypv_nomod:
 1232                 bt              pvNoRefSrc,copypv_done                  // skip source update if not requested
 1233 copypv_debugref:
 1234                 li              r26,1                                                   // r26 <- 4K-page count                                         
 1235                 mr              r27,r31                                                 // r27 <- byte count
 1236                 rlwinm  r3,r29,0,20,31                                  // does source cross a page boundary?
 1237                 subfic  r3,r3,4096                                              //
 1238                 cmplw   r3,r27                                                  // 
 1239                 blt             copypv_refnox                                   // skip if not crossing case
 1240                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
 1241                 addi    r26,r26,1                                               // increment page count
 1242 copypv_refnox:
 1243                 srdi    r3,r27,12                                               // pages to update (not including crosser)
 1244                 add             r26,r26,r3                                              // add in crosser
 1245                 srdi    r27,r29,12                                              // r27 <- source page number
 1246 copypv_refloop:
 1247                 mr              r3,r27                                                  // r3 <- source page number
 1248                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
 1249                 bl              EXT(mapping_phys_lookup)                // see if page is really there
 1250                 mr.             r3,r3                                                   // is it?
 1251                 beq--   copypv_done                                             // nope, break out of modify loop
 1252                 mr              r3,r27                                                  // r3 <- source  page number
 1253                 bl              EXT(mapping_set_ref)                    // set page referenced status
 1254                 subi    r26,r26,1                                               // decrement page count
 1255                 cmpwi   r26,0                                                   // done yet?
 1256                 bgt             copypv_refloop                                  // nope, iterate
 1257                 
 1258 // Return, indicating success.
 1259 copypv_done:
 1260 copypv_zero:
 1261                 li              r3,0                                                    // our efforts were crowned with success
 1262 
 1263 // Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
 1264 copypv_return:
 1265                 mfsprg  r9,1                                                    // get current threads stuff
 1266                 lwz             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
 1267                                                                                                 // get return address
 1268                 lwz             r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
 1269                                                                                                 // get non-volatile cr2 and cr3
 1270                 lwz             r26,FM_ARG0+0x00(r1)                    // restore non-volatile r26
 1271                 lwz             r27,FM_ARG0+0x04(r1)                    // restore non-volatile r27
 1272                 mtlr    r0                                                              // restore return address
 1273                 lwz             r28,FM_ARG0+0x08(r1)                    // restore non-volatile r28
 1274                 mtcrf   0x20,r4                                                 // restore non-volatile cr2
 1275                 mtcrf   0x10,r4                                                 // restore non-volatile cr3
 1276                 lwz             r11,FM_ARG0+0x20(r1)                    // save error callback
 1277                 lwz             r29,FM_ARG0+0x0C(r1)                    // restore non-volatile r29
 1278                 lwz             r30,FM_ARG0+0x10(r1)                    // restore non-volatile r30
 1279                 lwz             r31,FM_ARG0+0x14(r1)                    // restore non-volatile r31
 1280                 stw             r11,THREAD_RECOVER(r9)                  // restore our error callback
 1281                 lwz             r1,0(r1)                                                // release stack frame
 1282                                                                                                 
 1283                 blr                                                                             // y'all come back now
 1284 
 1285 // Invalid argument handler.
 1286 copypv_einval:
 1287                 li              r3,EINVAL                                               // invalid argument
 1288                 b               copypv_return                                   // return
 1289 
 1290 // Error encountered during bcopy or bcopy_nc.          
 1291 copypv_error:
 1292                 mfmsr   r3                                                              // get current msr
 1293                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
 1294                 mtmsrd  r3                                                              // leave 64-bit mode
 1295                 li              r3,EFAULT                                               // it was all his fault
 1296                 b               copypv_return                                   // return

Cache object: 740e53de5e79c4fb79141168fffcc7cb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.