The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i86pc/ml/locore.s

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
   24  */
   25 
   26 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
   27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
   28 /*        All Rights Reserved                                   */
   29 
   30 /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
   31 /*        All Rights Reserved                                   */
   32 
   33 
   34 #include <sys/asm_linkage.h>
   35 #include <sys/asm_misc.h>
   36 #include <sys/regset.h>
   37 #include <sys/privregs.h>
   38 #include <sys/psw.h>
   39 #include <sys/reboot.h>
   40 #include <sys/x86_archext.h>
   41 #include <sys/machparam.h>
   42 
   43 #if defined(__lint)
   44 
   45 #include <sys/types.h>
   46 #include <sys/thread.h>
   47 #include <sys/systm.h>
   48 #include <sys/lgrp.h>
   49 #include <sys/regset.h>
   50 #include <sys/link.h>
   51 #include <sys/bootconf.h>
   52 #include <sys/bootsvcs.h>
   53 
   54 #else   /* __lint */
   55 
   56 #include <sys/segments.h>
   57 #include <sys/pcb.h>
   58 #include <sys/trap.h>
   59 #include <sys/ftrace.h>
   60 #include <sys/traptrace.h>
   61 #include <sys/clock.h>
   62 #include <sys/cmn_err.h>
   63 #include <sys/pit.h>
   64 #include <sys/panic.h>
   65 
   66 #if defined(__xpv)
   67 #include <sys/hypervisor.h>
   68 #endif
   69 
   70 #include "assym.h"
   71 
   72 /*
   73  * Our assumptions:
   74  *      - We are running in protected-paged mode.
   75  *      - Interrupts are disabled.
   76  *      - The GDT and IDT are the callers; we need our copies.
   77  *      - The kernel's text, initialized data and bss are mapped.
   78  *
   79  * Our actions:
   80  *      - Save arguments
   81  *      - Initialize our stack pointer to the thread 0 stack (t0stack)
   82  *        and leave room for a phony "struct regs".
   83  *      - Our GDT and IDT need to get munged.
   84  *      - Since we are using the boot's GDT descriptors, we need
   85  *        to copy them into our GDT before we switch to ours.
   86  *      - We start using our GDT by loading correct values in the
   87  *        selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
   88  *        gs=KGS_SEL).
   89  *      - The default LDT entry for syscall is set.
   90  *      - We load the default LDT into the hardware LDT register.
   91  *      - We load the default TSS into the hardware task register.
   92  *      - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
   93  *      - mlsetup(%esp) gets called.
   94  *      - We change our appearance to look like the real thread 0.
   95  *        (NOTE: making ourselves to be a real thread may be a noop)
   96  *      - main() gets called.  (NOTE: main() never returns).
   97  *
   98  * NOW, the real code!
   99  */
  100         /*
  101          * The very first thing in the kernel's text segment must be a jump
  102          * to the os/fakebop.c startup code.
  103          */
  104         .text
  105         jmp     _start
  106 
  107         /*
  108          * Globals:
  109          */
  110         .globl  _locore_start
  111         .globl  mlsetup
  112         .globl  main
  113         .globl  panic
  114         .globl  t0stack
  115         .globl  t0
  116         .globl  sysp
  117         .globl  edata
  118 
  119         /*
  120          * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
  121          */
  122         .globl  bootops
  123         .globl  bootopsp
  124 
  125         /*
  126          * NOTE: t0stack should be the first thing in the data section so that
  127          * if it ever overflows, it will fault on the last kernel text page.
  128          */
  129         .data
  130         .comm   t0stack, DEFAULTSTKSZ, 32
  131         .comm   t0, 4094, 32
  132 
  133 #endif  /* __lint */
  134 
  135 
  136 #if defined(__amd64)
  137 
  138 #if defined(__lint)
  139 
  140 /* ARGSUSED */
  141 void
  142 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
  143 {}
  144 
  145 #else   /* __lint */
  146 
  147         /*
  148          * kobj_init() vectors us back to here with (note) a slightly different
  149          * set of arguments than _start is given (see lint prototypes above).
  150          *
  151          * XXX  Make this less vile, please.
  152          */
  153         ENTRY_NP(_locore_start)
  154 
  155         /*
  156          * %rdi = boot services (should die someday)
  157          * %rdx = bootops
  158          * end
  159          */     
  160 
  161         leaq    edata(%rip), %rbp       /* reference edata for ksyms */
  162         movq    $0, (%rbp)              /* limit stack back trace */
  163 
  164         /*
  165          * Initialize our stack pointer to the thread 0 stack (t0stack)
  166          * and leave room for a "struct regs" for lwp0.  Note that the
  167          * stack doesn't actually align to a 16-byte boundary until just
  168          * before we call mlsetup because we want to use %rsp to point at
  169          * our regs structure.
  170          */
  171         leaq    t0stack(%rip), %rsp
  172         addq    $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
  173 #if (REGSIZE & 15) == 0
  174         subq    $8, %rsp
  175 #endif
  176         /*
  177          * Save call back for special x86 boot services vector
  178          */     
  179         movq    %rdi, sysp(%rip)
  180 
  181         movq    %rdx, bootops(%rip)             /* save bootops */
  182         movq    $bootops, bootopsp(%rip)
  183 
  184         /*
  185          * Save arguments and flags, if only for debugging ..
  186          */
  187         movq    %rdi, REGOFF_RDI(%rsp)
  188         movq    %rsi, REGOFF_RSI(%rsp)
  189         movq    %rdx, REGOFF_RDX(%rsp)
  190         movq    %rcx, REGOFF_RCX(%rsp)
  191         movq    %r8, REGOFF_R8(%rsp)
  192         movq    %r9, REGOFF_R9(%rsp)
  193         pushf
  194         popq    %r11
  195         movq    %r11, REGOFF_RFL(%rsp)
  196 
  197 #if !defined(__xpv)
  198         /*
  199          * Enable write protect and alignment check faults.
  200          */
  201         movq    %cr0, %rax
  202         orq     $_CONST(CR0_WP|CR0_AM), %rax
  203         andq    $_BITNOT(CR0_WT|CR0_CE), %rax
  204         movq    %rax, %cr0
  205 #endif  /* __xpv */
  206 
  207         /*
  208          * (We just assert this works by virtue of being here) 
  209          */
  210         bts     $X86FSET_CPUID, x86_featureset(%rip)
  211 
  212         /*
  213          * mlsetup() gets called with a struct regs as argument, while
  214          * main takes no args and should never return.
  215          */
  216         xorl    %ebp, %ebp
  217         movq    %rsp, %rdi
  218         pushq   %rbp
  219         /* (stack pointer now aligned on 16-byte boundary right here) */
  220         movq    %rsp, %rbp
  221         call    mlsetup
  222         call    main
  223         /* NOTREACHED */
  224         leaq    __return_from_main(%rip), %rdi
  225         xorl    %eax, %eax
  226         call    panic
  227         SET_SIZE(_locore_start)
  228 
  229 #endif  /* __amd64 */
  230 #endif  /* __lint */
  231 
  232 #if !defined(__lint)
  233 
  234 __return_from_main:
  235         .string "main() returned"
  236 __unsupported_cpu:
  237         .string "486 style cpu detected - no longer supported!"
  238 
  239 #endif  /* !__lint */
  240 
  241 #if !defined(__amd64)
  242 
  243 #if defined(__lint)
  244 
  245 /* ARGSUSED */
  246 void
  247 _locore_start(struct boot_syscalls *sysp, struct bootops *bop)
  248 {}
  249 
  250 #else   /* __lint */
  251 
  252         /*
  253          * kobj_init() vectors us back to here with (note) a slightly different
  254          * set of arguments than _start is given (see lint prototypes above).
  255          *
  256          * XXX  Make this less vile, please.
  257          */
  258         ENTRY_NP(_locore_start)
  259 
  260         /*
  261          *      %ecx = boot services (should die someday)
  262          *      %ebx = bootops
  263          */     
  264         mov     $edata, %ebp            / edata needs to be defined for ksyms
  265         movl    $0, (%ebp)              / limit stack back trace
  266 
  267         /*
  268          * Initialize our stack pointer to the thread 0 stack (t0stack)
  269          * and leave room for a phony "struct regs".
  270          */
  271         movl    $t0stack + DEFAULTSTKSZ - REGSIZE, %esp
  272 
  273         /*
  274          * Save call back for special x86 boot services vector
  275          */
  276         mov     %ecx, sysp              / save call back for boot services
  277 
  278         mov     %ebx, bootops           / save bootops
  279         movl    $bootops, bootopsp
  280 
  281 
  282         /*
  283          * Save all registers and flags
  284          */
  285         pushal  
  286         pushfl
  287 
  288 #if !defined(__xpv)
  289         /*
  290          * Override bios settings and enable write protect and
  291          * alignment check faults.
  292          */
  293         movl    %cr0, %eax
  294 
  295         /*
  296          * enable WP for detecting faults, and enable alignment checking.
  297          */
  298         orl     $_CONST(CR0_WP|CR0_AM), %eax
  299         andl    $_BITNOT(CR0_WT|CR0_CE), %eax
  300         movl    %eax, %cr0              / set the cr0 register correctly and
  301                                         / override the BIOS setup
  302 
  303         /*
  304          * If bit 21 of eflags can be flipped, then cpuid is present
  305          * and enabled.
  306          */
  307         pushfl
  308         popl    %ecx
  309         movl    %ecx, %eax
  310         xorl    $PS_ID, %eax            / try complemented bit
  311         pushl   %eax
  312         popfl
  313         pushfl
  314         popl    %eax
  315         cmpl    %eax, %ecx
  316         jne     have_cpuid
  317 
  318         /*
  319          * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
  320          * div does not modify the cc flags on Cyrix, even though this may
  321          * also be true for other vendors, this is generally true only for
  322          * newer models from those vendors that support and do not disable
  323          * cpuid (usually because cpuid cannot be disabled)
  324          */
  325 
  326         /*
  327          * clear cc flags
  328          */
  329         xorb    %ah, %ah
  330         sahf
  331 
  332         /*
  333          * perform 5/2 test
  334          */
  335         movw    $5, %ax
  336         movb    $2, %bl
  337         divb    %bl
  338 
  339         lahf
  340         cmpb    $2, %ah
  341         jne     cpu_486
  342 
  343         /*
  344          * div did not modify the cc flags, chances are the vendor is Cyrix
  345          * assume the vendor is Cyrix and use the CCR's to enable cpuid
  346          */
  347         .set    CYRIX_CRI, 0x22         / CR Index Register
  348         .set    CYRIX_CRD, 0x23         / CR Data Register
  349 
  350         .set    CYRIX_CCR3, 0xc3        / Config Control Reg 3
  351         .set    CYRIX_CCR4, 0xe8        / Config Control Reg 4
  352         .set    CYRIX_DIR0, 0xfe        / Device Identification Reg 0
  353         .set    CYRIX_DIR1, 0xff        / Device Identification Reg 1
  354 
  355         /*
  356          * even if the cpu vendor is Cyrix and the motherboard/chipset
  357          * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
  358          * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
  359          * the reads and writes of 0x21 are guaranteed to be off-chip of
  360          * the cpu
  361          */
  362 
  363         /*
  364          * enable read of ISR at I/O port 0x20
  365          */
  366         movb    $0xb, %al
  367         outb    $MCMD_PORT
  368 
  369         /*
  370          * read IMR and store in %bl
  371          */
  372         inb     $MIMR_PORT
  373         movb    %al, %bl
  374 
  375         /*
  376          * mask out all interrupts so that ISR will not change
  377          */
  378         movb    $0xff, %al
  379         outb    $MIMR_PORT
  380 
  381         /*
  382          * reads of I/O port 0x22 on Cyrix are always directed off-chip
  383          * make use of I/O pull-up to test for an unknown device on 0x22
  384          */
  385         inb     $CYRIX_CRI
  386         cmpb    $0xff, %al
  387         je      port_22_free
  388 
  389         /*
  390          * motherboard/chipset vendor may be ignoring line A1 of I/O address
  391          */
  392         movb    %al, %cl
  393 
  394         /*
  395          * if the ISR and the value read from 0x22 do not match then we have
  396          * detected some unknown device, probably a chipset, at 0x22
  397          */
  398         inb     $MCMD_PORT
  399         cmpb    %al, %cl
  400         jne     restore_IMR
  401 
  402 port_22_free:
  403         /*
  404          * now test to see if some unknown device is using I/O port 0x23
  405          *
  406          * read the external I/O port at 0x23
  407          */
  408         inb     $CYRIX_CRD
  409 
  410         /*
  411          * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
  412          * IMR is 0xff so both tests are performed simultaneously.
  413          */
  414         cmpb    $0xff, %al
  415         jne     restore_IMR
  416 
  417         /*
  418          * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
  419          * record the type and fix it later if not.
  420          */
  421         movl    $X86_VENDOR_Cyrix, x86_vendor
  422         movl    $X86_TYPE_CYRIX_486, x86_type
  423 
  424         /*
  425          * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
  426          *
  427          * load CCR3 index into CCR index register
  428          */
  429 
  430         movb    $CYRIX_CCR3, %al
  431         outb    $CYRIX_CRI
  432 
  433         /*
  434          * If we are not a Cyrix cpu, then we have performed an external I/O
  435          * cycle. If the CCR index was not valid for this Cyrix model, we may
  436          * have performed an external I/O cycle as well. In these cases and
  437          * if the motherboard/chipset vendor ignores I/O address line A1,
  438          * then the PIC will have IRQ3 set at the lowest priority as a side     
  439          * effect of the above outb. We are reasonalbly confident that there
  440          * is not an unknown device on I/O port 0x22, so there should have been
  441          * no unpredictable side-effect of the above outb.
  442          */
  443 
  444         /*
  445          * read CCR3
  446          */
  447         inb     $CYRIX_CRD
  448 
  449         /*
  450          * If we are not a Cyrix cpu the inb above produced an external I/O
  451          * cycle. If we are a Cyrix model that does not support CCR3 wex
  452          * produced an external I/O cycle. In all known Cyrix models 6x86 and
  453          * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
  454          * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
  455          * reserved as well. It is highly unlikely that CCR3 contains the value
  456          * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
  457          * deduce we are not a Cyrix with support for cpuid if so.
  458          */
  459         cmpb    $0xff, %al
  460         je      restore_PIC
  461 
  462         /*
  463          * There exist 486 ISA Cyrix chips that support CCR3 but do not support
  464          * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
  465          * cycles, the exact behavior is model specific and undocumented.
  466          * Unfortunately these external I/O cycles may confuse some PIC's beyond
  467          * recovery. Fortunatetly we can use the following undocumented trick:
  468          * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
  469          * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
  470          * to work on all Cyrix cpu's which support cpuid.
  471          */
  472         movb    %al, %dl
  473         xorb    $0x10, %dl
  474         movb    %al, %cl
  475 
  476         /*
  477          * write back CRR3 with toggled bit 4 to CCR3
  478          */
  479         movb    $CYRIX_CCR3, %al
  480         outb    $CYRIX_CRI
  481 
  482         movb    %dl, %al
  483         outb    $CYRIX_CRD
  484 
  485         /*
  486          * read CCR3
  487          */
  488         movb    $CYRIX_CCR3, %al
  489         outb    $CYRIX_CRI
  490         inb     $CYRIX_CRD
  491         movb    %al, %dl
  492 
  493         /*
  494          * restore CCR3
  495          */
  496         movb    $CYRIX_CCR3, %al
  497         outb    $CYRIX_CRI
  498 
  499         movb    %cl, %al
  500         outb    $CYRIX_CRD
  501 
  502         /*
  503          * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
  504          * case we do not have cpuid anyway
  505          */
  506         andb    $0x10, %al
  507         andb    $0x10, %dl
  508         cmpb    %al, %dl
  509         je      restore_PIC
  510 
  511         /*
  512          * read DIR0
  513          */
  514         movb    $CYRIX_DIR0, %al
  515         outb    $CYRIX_CRI
  516         inb     $CYRIX_CRD
  517 
  518         /*
  519          * test for pull-up
  520          */
  521         cmpb    $0xff, %al
  522         je      restore_PIC
  523 
  524         /*
  525          * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
  526          * future use. If Cyrix ever produces a cpu that supports cpuid with
  527          * these ids, the following test will have to change. For now we remain
  528          * pessimistic since the formats of the CRR's may be different then.
  529          *
  530          * test for at least a 6x86, to see if we support both MAPEN and CPUID
  531          */
  532         cmpb    $0x30, %al
  533         jb      restore_IMR
  534 
  535         /*
  536          * enable MAPEN
  537          */
  538         movb    $CYRIX_CCR3, %al
  539         outb    $CYRIX_CRI
  540 
  541         andb    $0xf, %cl
  542         movb    %cl, %al
  543         orb     $0x10, %al
  544         outb    $CYRIX_CRD
  545 
  546         /*
  547          * select CCR4
  548          */
  549         movb    $CYRIX_CCR4, %al
  550         outb    $CYRIX_CRI
  551 
  552         /*
  553          * read CCR4
  554          */
  555         inb     $CYRIX_CRD
  556 
  557         /*
  558          * enable cpuid
  559          */
  560         orb     $0x80, %al
  561         movb    %al, %dl
  562 
  563         /*
  564          * select CCR4
  565          */
  566         movb    $CYRIX_CCR4, %al
  567         outb    $CYRIX_CRI
  568 
  569         /*
  570          * write CCR4
  571          */
  572         movb    %dl, %al
  573         outb    $CYRIX_CRD
  574 
  575         /*
  576          * select CCR3
  577          */
  578         movb    $CYRIX_CCR3, %al
  579         outb    $CYRIX_CRI
  580 
  581         /*
  582          * disable MAPEN and write CCR3
  583          */
  584         movb    %cl, %al
  585         outb    $CYRIX_CRD
  586 
  587         /*
  588          * restore IMR
  589          */
  590         movb    %bl, %al
  591         outb    $MIMR_PORT
  592 
  593         /*
  594          * test to see if cpuid available
  595          */
  596         pushfl
  597         popl    %ecx
  598         movl    %ecx, %eax
  599         xorl    $PS_ID, %eax            / try complemented bit
  600         pushl   %eax
  601         popfl
  602         pushfl
  603         popl    %eax
  604         cmpl    %eax, %ecx
  605         jne     have_cpuid
  606         jmp     cpu_486
  607 
  608 restore_PIC:
  609         /*
  610          * In case the motherboard/chipset vendor is ignoring line A1 of the
  611          * I/O address, we set the PIC priorities to sane values.
  612          */
  613         movb    $0xc7, %al      / irq 7 lowest priority
  614         outb    $MCMD_PORT
  615 
  616 restore_IMR:
  617         movb    %bl, %al
  618         outb    $MIMR_PORT
  619         jmp     cpu_486
  620 
  621 have_cpuid:
  622         /*
  623          * cpuid instruction present
  624          */
  625         bts     $X86FSET_CPUID, x86_featureset  / Just to set; Ignore the CF
  626         movl    $0, %eax
  627         cpuid
  628 
  629         movl    %ebx, cpu_vendor
  630         movl    %edx, cpu_vendor+4
  631         movl    %ecx, cpu_vendor+8
  632 
  633         /*
  634          * early cyrix cpus are somewhat strange and need to be
  635          * probed in curious ways to determine their identity
  636          */
  637 
  638         leal    cpu_vendor, %esi
  639         leal    CyrixInstead, %edi
  640         movl    $12, %ecx
  641         repz
  642           cmpsb
  643         je      vendor_is_cyrix
  644 
  645         / let mlsetup()/cpuid_pass1() handle everything else in C
  646 
  647         jmp     cpu_done
  648 
  649 is486:
  650         /*
  651          * test to see if a useful cpuid
  652          */
  653         testl   %eax, %eax
  654         jz      isa486
  655 
  656         movl    $1, %eax
  657         cpuid
  658 
  659         movl    %eax, %ebx
  660         andl    $0xF00, %ebx
  661         cmpl    $0x400, %ebx
  662         je      isa486
  663 
  664         rep;    ret     /* use 2 byte return instruction */
  665                         /* AMD Software Optimization Guide - Section 6.2 */
  666 isa486:
  667         /*
  668          * lose the return address
  669          */
  670         popl    %eax
  671         jmp     cpu_486
  672 
  673 vendor_is_cyrix:
  674         call    is486
  675 
  676         /*
  677          * Processor signature and feature flags for Cyrix are insane.
  678          * BIOS can play with semi-documented registers, so cpuid must be used
  679          * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
  680          * Keep the family in %ebx and feature flags in %edx until not needed
  681          */
  682 
  683         /*
  684          * read DIR0
  685          */
  686         movb    $CYRIX_DIR0, %al
  687         outb    $CYRIX_CRI
  688         inb     $CYRIX_CRD
  689 
  690         /*
  691          * First we handle the cases where we are a 6x86 or 6x86L.
  692          * The 6x86 is basically a 486, the only reliable bit in the
  693          * feature flags is for FPU. The 6x86L is better, unfortunately
  694          * there is no really good way to distinguish between these two
  695          * cpu's. We are pessimistic and when in doubt assume 6x86.
  696          */
  697 
  698         cmpb    $0x40, %al
  699         jae     maybeGX
  700 
  701         /*
  702          * We are an M1, either a 6x86 or 6x86L.
  703          */
  704         cmpb    $0x30, %al
  705         je      maybe6x86L
  706         cmpb    $0x31, %al
  707         je      maybe6x86L
  708         cmpb    $0x34, %al
  709         je      maybe6x86L
  710         cmpb    $0x35, %al
  711         je      maybe6x86L
  712 
  713         /*
  714          * although it is possible that we are a 6x86L, the cpu and
  715          * documentation are so buggy, we just do not care.
  716          */
  717         jmp     likely6x86
  718 
  719 maybe6x86L:
  720         /*
  721          *  read DIR1
  722          */
  723         movb    $CYRIX_DIR1, %al
  724         outb    $CYRIX_CRI
  725         inb     $CYRIX_CRD
  726         cmpb    $0x22, %al
  727         jb      likely6x86
  728 
  729         /*
  730          * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
  731          */
  732         movl    $X86_TYPE_CYRIX_6x86L, x86_type
  733         jmp     coma_bug
  734 
  735 likely6x86:
  736         /*
  737          * We are likely a 6x86, or a 6x86L without a way of knowing
  738          *
  739          * The 6x86 has NO Pentium or Pentium Pro compatible features even
  740          * though it claims to be a Pentium Pro compatible!
  741          *
  742          * The 6x86 core used in the 6x86 may have most of the Pentium system
  743          * registers and largely conform to the Pentium System Programming
  744          * Reference. Documentation on these parts is long gone. Treat it as
  745          * a crippled Pentium and hope for the best.
  746          */
  747 
  748         movl    $X86_TYPE_CYRIX_6x86, x86_type
  749         jmp     coma_bug
  750 
  751 maybeGX:
  752         /*
  753          * Now we check whether we are a MediaGX or GXm. We have particular
  754          * reason for concern here. Even though most of the GXm's
  755          * report having TSC in the cpuid feature flags, the TSC may be
  756          * horribly broken. What is worse, is that MediaGX's are basically
  757          * 486's while the good GXm's are more like Pentium Pro's!
  758          */
  759 
  760         cmpb    $0x50, %al
  761         jae     maybeM2
  762 
  763         /*
  764          * We are either a MediaGX (sometimes called a Gx86) or GXm
  765          */
  766 
  767         cmpb    $41, %al
  768         je      maybeMediaGX
  769 
  770         cmpb    $44, %al
  771         jb      maybeGXm
  772 
  773         cmpb    $47, %al
  774         jbe     maybeMediaGX
  775 
  776         /*
  777          * We do not honestly know what we are, so assume a MediaGX
  778          */
  779         jmp     media_gx
  780 
  781 maybeGXm:
  782         /*
  783          * It is still possible we are either a MediaGX or GXm, trust cpuid
  784          * family should be 5 on a GXm
  785          */
  786         cmpl    $0x500, %ebx
  787         je      GXm
  788 
  789         /*
  790          * BIOS/Cyrix might set family to 6 on a GXm
  791          */
  792         cmpl    $0x600, %ebx
  793         jne     media_gx
  794 
  795 GXm:
  796         movl    $X86_TYPE_CYRIX_GXm, x86_type
  797         jmp     cpu_done
  798 
  799 maybeMediaGX:
  800         /*
  801          * read DIR1
  802          */
  803         movb    $CYRIX_DIR1, %al
  804         outb    $CYRIX_CRI
  805         inb     $CYRIX_CRD
  806 
  807         cmpb    $0x30, %al
  808         jae     maybeGXm
  809 
  810         /*
  811          * we are a MediaGX for which we do not trust cpuid
  812          */
  813 media_gx:
  814         movl    $X86_TYPE_CYRIX_MediaGX, x86_type
  815         jmp     cpu_486
  816 
  817 maybeM2:
  818         /*
  819          * Now we check whether we are a 6x86MX or MII. These cpu's are
  820          * virtually identical, but we care because for the 6x86MX, we
  821          * must work around the coma bug. Also for 6x86MX prior to revision
  822          * 1.4, the TSC may have serious bugs.
  823          */
  824 
  825         cmpb    $0x60, %al
  826         jae     maybeM3
  827 
  828         /*
  829          * family should be 6, but BIOS/Cyrix might set it to 5
  830          */
  831         cmpl    $0x600, %ebx
  832         ja      cpu_486
  833 
  834         /*
  835          *  read DIR1
  836          */
  837         movb    $CYRIX_DIR1, %al
  838         outb    $CYRIX_CRI
  839         inb     $CYRIX_CRD
  840 
  841         cmpb    $0x8, %al
  842         jb      cyrix6x86MX
  843         cmpb    $0x80, %al
  844         jb      MII
  845 
  846 cyrix6x86MX:
  847         /*
  848          * It is altogether unclear how the revision stamped on the cpu
  849          * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
  850          */
  851         movl    $X86_TYPE_CYRIX_6x86MX, x86_type
  852         jmp     coma_bug
  853 
  854 MII:
  855         movl    $X86_TYPE_CYRIX_MII, x86_type
  856 likeMII:
  857         jmp     cpu_done
  858 
  859 maybeM3:
  860         /*
  861          * We are some chip that we cannot identify yet, an MIII perhaps.
  862          * We will be optimistic and hope that the chip is much like an MII,
  863          * and that cpuid is sane. Cyrix seemed to have gotten it right in
  864          * time for the MII, we can only hope it stayed that way.
  865          * Maybe the BIOS or Cyrix is trying to hint at something
  866          */
  867         cmpl    $0x500, %ebx
  868         je      GXm
  869 
  870         cmpb    $0x80, %al
  871         jae     likelyM3
  872 
  873         /*
  874          * Just test for the features Cyrix is known for
  875          */
  876 
  877         jmp     MII
  878 
  879 likelyM3:
  880         /*
  881          * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
  882          * the Cyrix MIII. There may be parts later that use the same ranges
  883          * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
  884          * now we will call anything with a DIR0 of 0x80 or higher an MIII.
  885          * The MIII is supposed to support large pages, but we will believe
  886          * it when we see it. For now we just enable and test for MII features.
  887          */     
  888         movl    $X86_TYPE_VIA_CYRIX_III, x86_type
  889         jmp     likeMII
  890 
  891 coma_bug:
  892 
  893 /*
  894  * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
  895  * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
  896  * cycles except page table accesses and interrupt ACK cycles do not assert
  897  * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
  898  * Due to a bug in the cpu core involving over-optimization of branch
  899  * prediction, register renaming, and execution of instructions down both the
  900  * X and Y pipes for the xchgl instruction, short loops can be written that
  901  * never de-assert LOCK# from one invocation of the loop to the next, ad
  902  * infinitum. The undesirable effect of this situation is that interrupts are
  903  * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
  904  * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
  905  * longer do, unless they are page table accesses or interrupt ACK cycles.
  906  * With LOCK# not asserted, these bus cycles are now cached. This can cause
  907  * undesirable behaviour if the ARR's are not configured correctly. Solaris
  908  * does not configure the ARR's, nor does it provide any useful mechanism for
  909  * doing so, thus the ideal workaround is not viable. Fortunately, the only
  910  * known exploits for this bug involve the xchgl instruction specifically.
  911  * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
  912  * 6x86MX cpu's which can be used to specify one instruction as a serializing
  913  * instruction. With the xchgl instruction serialized, LOCK# is still
  914  * asserted, but it is the sole instruction for which LOCK# is asserted.
  915  * There is now some added penalty for the xchgl instruction, but the usual
  916  * bus locking is preserved. This ingenious workaround was discovered by
  917  * disassembling a binary provided by Cyrix as a workaround for this bug on
  918  * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
  919  * mentioned in any public errata! The only concern for this workaround is
  920  * that there may be similar undiscovered bugs with other instructions that
  921  * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
  922  * fixed this bug sometime late in 1997 and no other exploits other than
  923  * xchgl have been discovered is good indication that this workaround is
  924  * reasonable.
  925  */     
  926 
  927         .set    CYRIX_DBR0, 0x30        / Debug Register 0
  928         .set    CYRIX_DBR1, 0x31        / Debug Register 1
  929         .set    CYRIX_DBR2, 0x32        / Debug Register 2
  930         .set    CYRIX_DBR3, 0x33        / Debug Register 3
  931         .set    CYRIX_DOR, 0x3c         / Debug Opcode Register
  932 
  933         /*
  934          * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
  935          * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
  936          * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
  937          * and 0xff. Then, DOR is loaded with the one byte opcode.
  938          */
  939 
  940         /*
  941          * select CCR3
  942          */
  943         movb    $CYRIX_CCR3, %al
  944         outb    $CYRIX_CRI
  945 
  946         /*
  947          * read CCR3 and mask out MAPEN
  948          */
  949         inb     $CYRIX_CRD
  950         andb    $0xf, %al
  951 
  952         /*
  953          * save masked CCR3 in %ah
  954          */
  955         movb    %al, %ah
  956 
  957         /*
  958          * select CCR3
  959          */
  960         movb    $CYRIX_CCR3, %al
  961         outb    $CYRIX_CRI
  962 
  963         /*
  964          * enable MAPEN
  965          */
  966         movb    %ah, %al
  967         orb     $0x10, %al
  968         outb    $CYRIX_CRD
  969 
  970         /*
  971          * read DBR0
  972          */
  973         movb    $CYRIX_DBR0, %al
  974         outb    $CYRIX_CRI
  975         inb     $CYRIX_CRD
  976 
  977         /*
  978          * disable MATCH and save in %bh
  979          */
  980         orb     $0x80, %al
  981         movb    %al, %bh
  982 
  983         /*
  984          * write DBR0
  985          */
  986         movb    $CYRIX_DBR0, %al
  987         outb    $CYRIX_CRI
  988         movb    %bh, %al
  989         outb    $CYRIX_CRD
  990 
  991         /*
  992          * write DBR1
  993          */
  994         movb    $CYRIX_DBR1, %al 
  995         outb    $CYRIX_CRI
  996         movb    $0xf8, %al
  997         outb    $CYRIX_CRD
  998 
  999         /*
 1000          * write DBR2
 1001          */
 1002         movb    $CYRIX_DBR2, %al
 1003         outb    $CYRIX_CRI
 1004         movb    $0x7f, %al
 1005         outb    $CYRIX_CRD
 1006 
 1007         /*
 1008          * write DBR3
 1009          */
 1010         movb    $CYRIX_DBR3, %al
 1011         outb    $CYRIX_CRI
 1012         xorb    %al, %al
 1013         outb    $CYRIX_CRD
 1014 
 1015         /*
 1016          * write DOR
 1017          */
 1018         movb    $CYRIX_DOR, %al
 1019         outb    $CYRIX_CRI
 1020         movb    $0x87, %al
 1021         outb    $CYRIX_CRD
 1022 
 1023         /*
 1024          * enable MATCH
 1025          */
 1026         movb    $CYRIX_DBR0, %al
 1027         outb    $CYRIX_CRI
 1028         movb    %bh, %al
 1029         andb    $0x7f, %al
 1030         outb    $CYRIX_CRD
 1031 
 1032         /*
 1033          * disable MAPEN
 1034          */
 1035         movb    $0xc3, %al
 1036         outb    $CYRIX_CRI
 1037         movb    %ah, %al
 1038         outb    $CYRIX_CRD
 1039 
 1040         jmp     cpu_done
 1041 
 1042 cpu_done:
 1043 
 1044         popfl                                   /* Restore original FLAGS */
 1045         popal                                   /* Restore all registers */
 1046 
 1047 #endif  /* !__xpv */
 1048 
 1049         /*
 1050          *  mlsetup(%esp) gets called.
 1051          */
 1052         pushl   %esp
 1053         call    mlsetup
 1054         addl    $4, %esp
 1055 
 1056         /*
 1057          * We change our appearance to look like the real thread 0.
 1058          * (NOTE: making ourselves to be a real thread may be a noop)
 1059          * main() gets called.  (NOTE: main() never returns).
 1060          */
 1061         call    main
 1062         /* NOTREACHED */
 1063         pushl   $__return_from_main
 1064         call    panic
 1065 
 1066         /* NOTREACHED */
 1067 cpu_486:
 1068         pushl   $__unsupported_cpu
 1069         call    panic
 1070         SET_SIZE(_locore_start)
 1071 
 1072 #endif  /* __lint */
 1073 #endif  /* !__amd64 */
 1074 
 1075 
 1076 /*
 1077  *  For stack layout, see privregs.h
 1078  *  When cmntrap gets called, the error code and trap number have been pushed.
 1079  *  When cmntrap_pushed gets called, the entire struct regs has been pushed.
 1080  */
 1081 
 1082 #if defined(__lint)
 1083 
 1084 /* ARGSUSED */
 1085 void
 1086 cmntrap()
 1087 {}
 1088 
 1089 #else   /* __lint */
 1090 
 1091         .globl  trap            /* C handler called below */
 1092 
 1093 #if defined(__amd64)
 1094 
 1095         ENTRY_NP2(cmntrap, _cmntrap)
 1096 
 1097         INTR_PUSH
 1098 
 1099         ALTENTRY(cmntrap_pushed)
 1100 
 1101         movq    %rsp, %rbp
 1102 
 1103         /*
 1104          * - if this is a #pf i.e. T_PGFLT, %r15 is live
 1105          *   and contains the faulting address i.e. a copy of %cr2
 1106          *
 1107          * - if this is a #db i.e. T_SGLSTP, %r15 is live
 1108          *   and contains the value of %db6
 1109          */
 1110 
 1111         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
 1112         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
 1113         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
 1114 
 1115         /*
 1116          * We must first check if DTrace has set its NOFAULT bit.  This
 1117          * regrettably must happen before the trap stack is recorded, because
 1118          * this requires a call to getpcstack() and may induce recursion if an
 1119          * fbt::getpcstack: enabling is inducing the bad load.
 1120          */
 1121         movl    %gs:CPU_ID, %eax
 1122         shlq    $CPU_CORE_SHIFT, %rax
 1123         leaq    cpu_core(%rip), %r8
 1124         addq    %r8, %rax
 1125         movw    CPUC_DTRACE_FLAGS(%rax), %cx
 1126         testw   $CPU_DTRACE_NOFAULT, %cx
 1127         jnz     .dtrace_induced
 1128 
 1129         TRACE_STACK(%rdi)
 1130 
 1131         movq    %rbp, %rdi
 1132         movq    %r15, %rsi
 1133         movl    %gs:CPU_ID, %edx
 1134 
 1135         /*
 1136          * We know that this isn't a DTrace non-faulting load; we can now safely
 1137          * reenable interrupts.  (In the case of pagefaults, we enter through an
 1138          * interrupt gate.)
 1139          */
 1140         ENABLE_INTR_FLAGS
 1141 
 1142         call    trap            /* trap(rp, addr, cpuid) handles all traps */
 1143         jmp     _sys_rtt
 1144 
 1145 .dtrace_induced:
 1146         cmpw    $KCS_SEL, REGOFF_CS(%rbp)       /* test CS for user-mode trap */
 1147         jne     2f                              /* if from user, panic */
 1148 
 1149         cmpl    $T_PGFLT, REGOFF_TRAPNO(%rbp)
 1150         je      0f
 1151 
 1152         cmpl    $T_GPFLT, REGOFF_TRAPNO(%rbp)
 1153         jne     3f                              /* if not PF or GP, panic */
 1154 
 1155         /*
 1156          * If we've taken a GPF, we don't (unfortunately) have the address that
 1157          * induced the fault.  So instead of setting the fault to BADADDR,
 1158          * we'll set the fault to ILLOP.
 1159          */
 1160         orw     $CPU_DTRACE_ILLOP, %cx
 1161         movw    %cx, CPUC_DTRACE_FLAGS(%rax)
 1162         jmp     1f
 1163 0:
 1164         orw     $CPU_DTRACE_BADADDR, %cx
 1165         movw    %cx, CPUC_DTRACE_FLAGS(%rax)    /* set fault to bad addr */
 1166         movq    %r15, CPUC_DTRACE_ILLVAL(%rax)
 1167                                             /* fault addr is illegal value */
 1168 1:
 1169         movq    REGOFF_RIP(%rbp), %rdi
 1170         movq    %rdi, %r12
 1171         call    dtrace_instr_size
 1172         addq    %rax, %r12
 1173         movq    %r12, REGOFF_RIP(%rbp)
 1174         INTR_POP
 1175         IRET
 1176         /*NOTREACHED*/
 1177 2:
 1178         leaq    dtrace_badflags(%rip), %rdi
 1179         xorl    %eax, %eax
 1180         call    panic
 1181 3:
 1182         leaq    dtrace_badtrap(%rip), %rdi
 1183         xorl    %eax, %eax
 1184         call    panic
 1185         SET_SIZE(cmntrap)
 1186         SET_SIZE(_cmntrap)
 1187 
 1188 #elif defined(__i386)
 1189 
 1190 
 1191         ENTRY_NP2(cmntrap, _cmntrap)
 1192 
 1193         INTR_PUSH
 1194 
 1195         ALTENTRY(cmntrap_pushed)
 1196 
 1197         movl    %esp, %ebp
 1198 
 1199         /*
 1200          * - if this is a #pf i.e. T_PGFLT, %esi is live
 1201          *   and contains the faulting address i.e. a copy of %cr2
 1202          *
 1203          * - if this is a #db i.e. T_SGLSTP, %esi is live
 1204          *   and contains the value of %db6
 1205          */
 1206 
 1207         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
 1208         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
 1209         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
 1210 
 1211         /*
 1212          * We must first check if DTrace has set its NOFAULT bit.  This
 1213          * regrettably must happen before the trap stack is recorded, because
 1214          * this requires a call to getpcstack() and may induce recursion if an
 1215          * fbt::getpcstack: enabling is inducing the bad load.
 1216          */
 1217         movl    %gs:CPU_ID, %eax
 1218         shll    $CPU_CORE_SHIFT, %eax
 1219         addl    $cpu_core, %eax
 1220         movw    CPUC_DTRACE_FLAGS(%eax), %cx
 1221         testw   $CPU_DTRACE_NOFAULT, %cx
 1222         jnz     .dtrace_induced
 1223 
 1224         TRACE_STACK(%edi)
 1225 
 1226         pushl   %gs:CPU_ID
 1227         pushl   %esi            /* fault address for PGFLTs */
 1228         pushl   %ebp            /* &regs */
 1229 
 1230         /*
 1231          * We know that this isn't a DTrace non-faulting load; we can now safely
 1232          * reenable interrupts.  (In the case of pagefaults, we enter through an
 1233          * interrupt gate.)
 1234          */
 1235         ENABLE_INTR_FLAGS
 1236 
 1237         call    trap            /* trap(rp, addr, cpuid) handles all traps */
 1238         addl    $12, %esp       /* get argument off stack */
 1239         jmp     _sys_rtt
 1240 
 1241 .dtrace_induced:
 1242         cmpw    $KCS_SEL, REGOFF_CS(%ebp)       /* test CS for user-mode trap */
 1243         jne     2f                              /* if from user, panic */
 1244 
 1245         cmpl    $T_PGFLT, REGOFF_TRAPNO(%ebp)
 1246         je      0f
 1247 
 1248         cmpl    $T_GPFLT, REGOFF_TRAPNO(%ebp)
 1249         jne     3f                              /* if not PF or GP, panic */
 1250 
 1251         /*
 1252          * If we've taken a GPF, we don't (unfortunately) have the address that
 1253          * induced the fault.  So instead of setting the fault to BADADDR,
 1254          * we'll set the fault to ILLOP.
 1255          */
 1256         orw     $CPU_DTRACE_ILLOP, %cx
 1257         movw    %cx, CPUC_DTRACE_FLAGS(%eax)
 1258         jmp     1f
 1259 0:
 1260         orw     $CPU_DTRACE_BADADDR, %cx
 1261         movw    %cx, CPUC_DTRACE_FLAGS(%eax)    /* set fault to bad addr */
 1262         movl    %esi, CPUC_DTRACE_ILLVAL(%eax)
 1263                                             /* fault addr is illegal value */
 1264 1:
 1265         pushl   REGOFF_EIP(%ebp)
 1266         call    dtrace_instr_size
 1267         addl    $4, %esp
 1268         movl    REGOFF_EIP(%ebp), %ecx
 1269         addl    %eax, %ecx
 1270         movl    %ecx, REGOFF_EIP(%ebp)
 1271         INTR_POP_KERNEL
 1272         IRET
 1273         /*NOTREACHED*/
 1274 2:
 1275         pushl   $dtrace_badflags
 1276         call    panic
 1277 3:
 1278         pushl   $dtrace_badtrap
 1279         call    panic
 1280         SET_SIZE(cmntrap)
 1281         SET_SIZE(_cmntrap)
 1282 
 1283 #endif  /* __i386 */
 1284 
 1285 /*
 1286  * Declare a uintptr_t which has the size of _cmntrap to enable stack
 1287  * traceback code to know when a regs structure is on the stack.
 1288  */
 1289         .globl  _cmntrap_size
 1290         .align  CLONGSIZE
 1291 _cmntrap_size:
 1292         .NWORD  . - _cmntrap
 1293         .type   _cmntrap_size, @object
 1294 
 1295 dtrace_badflags:
 1296         .string "bad DTrace flags"
 1297 
 1298 dtrace_badtrap:
 1299         .string "bad DTrace trap"
 1300 
 1301 #endif  /* __lint */
 1302 
 1303 #if defined(__lint)
 1304 
 1305 /* ARGSUSED */
 1306 void
 1307 cmninttrap()
 1308 {}
 1309 
 1310 #if !defined(__xpv)
 1311 void
 1312 bop_trap_handler(void)
 1313 {}
 1314 #endif
 1315 
 1316 #else   /* __lint */
 1317 
 1318         .globl  trap            /* C handler called below */
 1319 
 1320 #if defined(__amd64)
 1321 
 1322         ENTRY_NP(cmninttrap)
 1323 
 1324         INTR_PUSH
 1325         INTGATE_INIT_KERNEL_FLAGS
 1326 
 1327         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
 1328         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
 1329         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
 1330 
 1331         movq    %rsp, %rbp
 1332 
 1333         movl    %gs:CPU_ID, %edx
 1334         xorl    %esi, %esi
 1335         movq    %rsp, %rdi
 1336         call    trap            /* trap(rp, addr, cpuid) handles all traps */
 1337         jmp     _sys_rtt
 1338         SET_SIZE(cmninttrap)
 1339 
 1340 #if !defined(__xpv)
 1341         /*
 1342          * Handle traps early in boot. Just revectors into C quickly as
 1343          * these are always fatal errors.
 1344          *
 1345          * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
 1346          */
 1347         ENTRY(bop_trap_handler)
 1348         movq    %rsp, %rdi
 1349         sub     $8, %rsp
 1350         call    bop_trap
 1351         SET_SIZE(bop_trap_handler)
 1352 #endif
 1353 
 1354 #elif defined(__i386)
 1355 
 1356         ENTRY_NP(cmninttrap)
 1357 
 1358         INTR_PUSH
 1359         INTGATE_INIT_KERNEL_FLAGS
 1360 
 1361         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
 1362         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
 1363         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
 1364 
 1365         movl    %esp, %ebp
 1366 
 1367         TRACE_STACK(%edi)
 1368 
 1369         pushl   %gs:CPU_ID
 1370         pushl   $0
 1371         pushl   %ebp
 1372         call    trap            /* trap(rp, addr, cpuid) handles all traps */
 1373         addl    $12, %esp
 1374         jmp     _sys_rtt
 1375         SET_SIZE(cmninttrap)
 1376 
 1377 #if !defined(__xpv)
 1378         /*
 1379          * Handle traps early in boot. Just revectors into C quickly as
 1380          * these are always fatal errors.
 1381          */
 1382         ENTRY(bop_trap_handler)
 1383         movl    %esp, %eax
 1384         pushl   %eax
 1385         call    bop_trap
 1386         SET_SIZE(bop_trap_handler)
 1387 #endif
 1388 
 1389 #endif  /* __i386 */
 1390 
 1391 #endif  /* __lint */
 1392 
 1393 #if defined(__lint)
 1394 
 1395 /* ARGSUSED */
 1396 void
 1397 dtrace_trap()
 1398 {}
 1399 
 1400 #else   /* __lint */
 1401 
 1402         .globl  dtrace_user_probe
 1403 
 1404 #if defined(__amd64)
 1405 
 1406         ENTRY_NP(dtrace_trap)
 1407 
 1408         INTR_PUSH
 1409 
 1410         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
 1411         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
 1412         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
 1413 
 1414         movq    %rsp, %rbp
 1415 
 1416         movl    %gs:CPU_ID, %edx
 1417 #if defined(__xpv)
 1418         movq    %gs:CPU_VCPU_INFO, %rsi
 1419         movq    VCPU_INFO_ARCH_CR2(%rsi), %rsi
 1420 #else
 1421         movq    %cr2, %rsi
 1422 #endif
 1423         movq    %rsp, %rdi
 1424 
 1425         ENABLE_INTR_FLAGS
 1426 
 1427         call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
 1428         jmp     _sys_rtt
 1429 
 1430         SET_SIZE(dtrace_trap)
 1431 
 1432 #elif defined(__i386)
 1433 
 1434         ENTRY_NP(dtrace_trap)
 1435 
 1436         INTR_PUSH
 1437 
 1438         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
 1439         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
 1440         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
 1441 
 1442         movl    %esp, %ebp
 1443 
 1444         pushl   %gs:CPU_ID
 1445 #if defined(__xpv)
 1446         movl    %gs:CPU_VCPU_INFO, %eax
 1447         movl    VCPU_INFO_ARCH_CR2(%eax), %eax
 1448 #else
 1449         movl    %cr2, %eax
 1450 #endif
 1451         pushl   %eax
 1452         pushl   %ebp
 1453 
 1454         ENABLE_INTR_FLAGS
 1455 
 1456         call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
 1457         addl    $12, %esp               /* get argument off stack */
 1458 
 1459         jmp     _sys_rtt
 1460         SET_SIZE(dtrace_trap)
 1461 
 1462 #endif  /* __i386 */
 1463 
 1464 #endif  /* __lint */
 1465 
 1466 /*
 1467  * Return from _sys_trap routine.
 1468  */
 1469 
 1470 #if defined(__lint)
 1471 
 1472 void
 1473 lwp_rtt_initial(void)
 1474 {}
 1475 
 1476 void
 1477 lwp_rtt(void)
 1478 {}
 1479 
 1480 void
 1481 _sys_rtt(void)
 1482 {}
 1483 
 1484 #else   /* __lint */
 1485 
 1486 #if defined(__amd64)
 1487 
 1488         ENTRY_NP(lwp_rtt_initial)
 1489         movq    %gs:CPU_THREAD, %r15
 1490         movq    T_STACK(%r15), %rsp     /* switch to the thread stack */
 1491         movq    %rsp, %rbp
 1492         call    __dtrace_probe___proc_start
 1493         jmp     _lwp_rtt
 1494 
 1495         ENTRY_NP(lwp_rtt)
 1496 
 1497         /*
 1498          * r14  lwp
 1499          * rdx  lwp->lwp_procp
 1500          * r15  curthread
 1501          */
 1502 
 1503         movq    %gs:CPU_THREAD, %r15
 1504         movq    T_STACK(%r15), %rsp     /* switch to the thread stack */
 1505         movq    %rsp, %rbp
 1506 _lwp_rtt:
 1507         call    __dtrace_probe___proc_lwp__start
 1508         movq    %gs:CPU_LWP, %r14
 1509         movq    LWP_PROCP(%r14), %rdx
 1510 
 1511         /*
 1512          * XX64 Is the stack misaligned correctly at this point?
 1513          *      If not, we need to do a push before calling anything ..
 1514          */
 1515 
 1516 #if defined(DEBUG)
 1517         /*
 1518          * If we were to run lwp_savectx at this point -without-
 1519          * pcb_rupdate being set to 1, we'd end up sampling the hardware
 1520          * state left by the previous running lwp, rather than setting
 1521          * the values requested by the lwp creator.  Bad.
 1522          */
 1523         testb   $0x1, PCB_RUPDATE(%r14)
 1524         jne     1f
 1525         leaq    _no_pending_updates(%rip), %rdi
 1526         movl    $__LINE__, %esi
 1527         movq    %r14, %rdx
 1528         xorl    %eax, %eax
 1529         call    panic
 1530 _no_pending_updates:
 1531         .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
 1532 1:
 1533 #endif
 1534 
 1535         /*
 1536          * If agent lwp, clear %fs and %gs
 1537          */
 1538         cmpq    %r15, P_AGENTTP(%rdx)
 1539         jne     1f
 1540         xorl    %ecx, %ecx
 1541         movq    %rcx, REGOFF_FS(%rsp)
 1542         movq    %rcx, REGOFF_GS(%rsp)
 1543         movw    %cx, LWP_PCB_FS(%r14)
 1544         movw    %cx, LWP_PCB_GS(%r14)
 1545 1:
 1546         call    dtrace_systrace_rtt
 1547         movq    REGOFF_RDX(%rsp), %rsi
 1548         movq    REGOFF_RAX(%rsp), %rdi
 1549         call    post_syscall            /* post_syscall(rval1, rval2) */
 1550 
 1551         /*
 1552          * set up to take fault on first use of fp
 1553          */
 1554         STTS(%rdi)
 1555 
 1556         /*
 1557          * XXX - may want a fast path that avoids sys_rtt_common in the
 1558          * most common case.
 1559          */
 1560         ALTENTRY(_sys_rtt)
 1561         CLI(%rax)                       /* disable interrupts */
 1562         ALTENTRY(_sys_rtt_ints_disabled)
 1563         movq    %rsp, %rdi              /* pass rp to sys_rtt_common */
 1564         call    sys_rtt_common          /* do common sys_rtt tasks */
 1565         testq   %rax, %rax              /* returning to userland? */
 1566         jz      sr_sup
 1567 
 1568         /*
 1569          * Return to user
 1570          */
 1571         ASSERT_UPCALL_MASK_IS_SET
 1572         cmpw    $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
 1573         je      sys_rtt_syscall
 1574 
 1575         /*
 1576          * Return to 32-bit userland
 1577          */
 1578         ALTENTRY(sys_rtt_syscall32)
 1579         USER32_POP
 1580         IRET
 1581         /*NOTREACHED*/
 1582 
 1583         ALTENTRY(sys_rtt_syscall)
 1584         /*
 1585          * Return to 64-bit userland
 1586          */
 1587         USER_POP
 1588         ALTENTRY(nopop_sys_rtt_syscall)
 1589         IRET
 1590         /*NOTREACHED*/
 1591         SET_SIZE(nopop_sys_rtt_syscall)
 1592 
 1593         /*
 1594          * Return to supervisor
 1595          * NOTE: to make the check in trap() that tests if we are executing
 1596          * segment register fixup/restore code work properly, sr_sup MUST be
 1597          * after _sys_rtt .
 1598          */
 1599         ALTENTRY(sr_sup)
 1600         /*
 1601          * Restore regs before doing iretq to kernel mode
 1602          */
 1603         INTR_POP
 1604         IRET
 1605         .globl  _sys_rtt_end
 1606 _sys_rtt_end:
 1607         /*NOTREACHED*/
 1608         SET_SIZE(sr_sup)
 1609         SET_SIZE(_sys_rtt_end)
 1610         SET_SIZE(lwp_rtt)
 1611         SET_SIZE(lwp_rtt_initial)
 1612         SET_SIZE(_sys_rtt_ints_disabled)
 1613         SET_SIZE(_sys_rtt)
 1614         SET_SIZE(sys_rtt_syscall)
 1615         SET_SIZE(sys_rtt_syscall32)
 1616 
 1617 #elif defined(__i386)
 1618 
 1619         ENTRY_NP(lwp_rtt_initial)
 1620         movl    %gs:CPU_THREAD, %eax
 1621         movl    T_STACK(%eax), %esp     /* switch to the thread stack */
 1622         movl    %esp, %ebp
 1623         call    __dtrace_probe___proc_start
 1624         jmp     _lwp_rtt
 1625 
 1626         ENTRY_NP(lwp_rtt)
 1627         movl    %gs:CPU_THREAD, %eax
 1628         movl    T_STACK(%eax), %esp     /* switch to the thread stack */
 1629         movl    %esp, %ebp
 1630 _lwp_rtt:
 1631         call    __dtrace_probe___proc_lwp__start
 1632 
 1633         /*
 1634          * If agent lwp, clear %fs and %gs.
 1635          */
 1636         movl    %gs:CPU_LWP, %eax
 1637         movl    LWP_PROCP(%eax), %edx
 1638 
 1639         cmpl    %eax, P_AGENTTP(%edx)
 1640         jne     1f
 1641         movl    $0, REGOFF_FS(%esp)
 1642         movl    $0, REGOFF_GS(%esp)
 1643 1:
 1644         call    dtrace_systrace_rtt
 1645         movl    REGOFF_EDX(%esp), %edx
 1646         movl    REGOFF_EAX(%esp), %eax
 1647         pushl   %edx
 1648         pushl   %eax
 1649         call    post_syscall            /* post_syscall(rval1, rval2) */
 1650         addl    $8, %esp
 1651 
 1652         /*
 1653          * set up to take fault on first use of fp
 1654          */
 1655         STTS(%eax)
 1656 
 1657         /*
 1658          * XXX - may want a fast path that avoids sys_rtt_common in the
 1659          * most common case.
 1660          */
 1661         ALTENTRY(_sys_rtt)
 1662         CLI(%eax)                       /* disable interrupts */
 1663         ALTENTRY(_sys_rtt_ints_disabled)
 1664         pushl   %esp                    /* pass rp to sys_rtt_common */
 1665         call    sys_rtt_common
 1666         addl    $4, %esp                /* pop arg */
 1667         testl   %eax, %eax              /* test for return to user mode */
 1668         jz      sr_sup
 1669 
 1670         /*
 1671          * Return to User.
 1672          */
 1673         ALTENTRY(sys_rtt_syscall)
 1674         INTR_POP_USER
 1675 
 1676         /*
 1677          * There can be no instructions between this label and IRET or
 1678          * we could end up breaking linux brand support. See label usage
 1679          * in lx_brand_int80_callback for an example.
 1680          */
 1681         ALTENTRY(nopop_sys_rtt_syscall)
 1682         IRET
 1683         /*NOTREACHED*/
 1684         SET_SIZE(nopop_sys_rtt_syscall)
 1685 
 1686         ALTENTRY(_sys_rtt_end)
 1687 
 1688         /*
 1689          * Return to supervisor
 1690          */
 1691         ALTENTRY(sr_sup)
 1692 
 1693         /*
 1694          * Restore regs before doing iret to kernel mode
 1695          */
 1696         INTR_POP_KERNEL
 1697         IRET
 1698         /*NOTREACHED*/
 1699 
 1700         SET_SIZE(sr_sup)
 1701         SET_SIZE(_sys_rtt_end)
 1702         SET_SIZE(lwp_rtt)
 1703         SET_SIZE(lwp_rtt_initial)
 1704         SET_SIZE(_sys_rtt_ints_disabled)
 1705         SET_SIZE(_sys_rtt)
 1706         SET_SIZE(sys_rtt_syscall)
 1707 
 1708 #endif  /* __i386 */
 1709 
 1710 #endif  /* __lint */
 1711 
 1712 #if defined(__lint)
 1713 
 1714 /*
 1715  * So why do we have to deal with all this crud in the world of ia32?
 1716  *
 1717  * Basically there are four classes of ia32 implementations, those that do not
 1718  * have a TSC, those that have a marginal TSC that is broken to the extent
 1719  * that it is useless, those that have a marginal TSC that is not quite so
 1720  * horribly broken and can be used with some care, and those that have a
 1721  * reliable TSC. This crud has to be here in order to sift through all the
 1722  * variants.
 1723  */
 1724 
 1725 /*ARGSUSED*/
 1726 uint64_t
 1727 freq_tsc(uint32_t *pit_counter)
 1728 {
 1729         return (0);
 1730 }
 1731 
 1732 #else   /* __lint */
 1733 
 1734 #if defined(__amd64)
 1735 
 1736         /*
 1737          * XX64 quick and dirty port from the i386 version. Since we
 1738          * believe the amd64 tsc is more reliable, could this code be
 1739          * simpler?
 1740          */
 1741         ENTRY_NP(freq_tsc)
 1742         pushq   %rbp
 1743         movq    %rsp, %rbp
 1744         movq    %rdi, %r9       /* save pit_counter */
 1745         pushq   %rbx
 1746 
 1747 / We have a TSC, but we have no way in general to know how reliable it is.
 1748 / Usually a marginal TSC behaves appropriately unless not enough time
 1749 / elapses between reads. A reliable TSC can be read as often and as rapidly
 1750 / as desired. The simplistic approach of reading the TSC counter and
 1751 / correlating to the PIT counter cannot be naively followed. Instead estimates
 1752 / have to be taken to successively refine a guess at the speed of the cpu
 1753 / and then the TSC and PIT counter are correlated. In practice very rarely
 1754 / is more than one quick loop required for an estimate. Measures have to be
 1755 / taken to prevent the PIT counter from wrapping beyond its resolution and for
 1756 / measuring the clock rate of very fast processors.
 1757 /
 1758 / The following constant can be tuned. It should be such that the loop does
 1759 / not take too many nor too few PIT counts to execute. If this value is too
 1760 / large, then on slow machines the loop will take a long time, or the PIT
 1761 / counter may even wrap. If this value is too small, then on fast machines
 1762 / the PIT counter may count so few ticks that the resolution of the PIT
 1763 / itself causes a bad guess. Because this code is used in machines with
 1764 / marginal TSC's and/or IO, if this value is too small on those, it may
 1765 / cause the calculated cpu frequency to vary slightly from boot to boot.
 1766 /
 1767 / In all cases even if this constant is set inappropriately, the algorithm
 1768 / will still work and the caller should be able to handle variances in the
 1769 / calculation of cpu frequency, but the calculation will be inefficient and
 1770 / take a disproportionate amount of time relative to a well selected value.
 1771 / As the slowest supported cpu becomes faster, this constant should be
 1772 / carefully increased.
 1773 
 1774         movl    $0x8000, %ecx
 1775 
 1776         / to make sure the instruction cache has been warmed
 1777         clc
 1778 
 1779         jmp     freq_tsc_loop
 1780 
 1781 / The following block of code up to and including the latching of the PIT
 1782 / counter after freq_tsc_perf_loop is very critical and very carefully
 1783 / written, it should only be modified with great care. freq_tsc_loop to
 1784 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
 1785 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
 1786 
 1787         .align  32
 1788 freq_tsc_loop:
 1789         / save the loop count in %ebx
 1790         movl    %ecx, %ebx
 1791 
 1792         / initialize the PIT counter and start a count down
 1793         movb    $PIT_LOADMODE, %al
 1794         outb    $PITCTL_PORT
 1795         movb    $0xff, %al
 1796         outb    $PITCTR0_PORT
 1797         outb    $PITCTR0_PORT
 1798 
 1799         / read the TSC and store the TS in %edi:%esi
 1800         rdtsc
 1801         movl    %eax, %esi
 1802 
 1803 freq_tsc_perf_loop:
 1804         movl    %edx, %edi
 1805         movl    %eax, %esi
 1806         movl    %edx, %edi
 1807         loop    freq_tsc_perf_loop
 1808 
 1809         / read the TSC and store the LSW in %ecx
 1810         rdtsc
 1811         movl    %eax, %ecx
 1812 
 1813         / latch the PIT counter and status
 1814         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
 1815         outb    $PITCTL_PORT
 1816 
 1817         / remember if the icache has been warmed
 1818         setc    %ah
 1819 
 1820         / read the PIT status
 1821         inb     $PITCTR0_PORT
 1822         shll    $8, %eax
 1823 
 1824         / read PIT count
 1825         inb     $PITCTR0_PORT
 1826         shll    $8, %eax
 1827         inb     $PITCTR0_PORT
 1828         bswap   %eax
 1829 
 1830         / check to see if the PIT count was loaded into the CE
 1831         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
 1832         jc      freq_tsc_increase_count
 1833 
 1834         / check to see if PIT counter wrapped
 1835         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
 1836         jnc     freq_tsc_pit_did_not_wrap
 1837 
 1838         / halve count
 1839         shrl    $1, %ebx
 1840         movl    %ebx, %ecx
 1841 
 1842         / the instruction cache has been warmed
 1843         stc
 1844 
 1845         jmp     freq_tsc_loop
 1846 
 1847 freq_tsc_increase_count:
 1848         shll    $1, %ebx
 1849         jc      freq_tsc_too_fast
 1850 
 1851         movl    %ebx, %ecx
 1852 
 1853         / the instruction cache has been warmed
 1854         stc
 1855 
 1856         jmp     freq_tsc_loop
 1857 
 1858 freq_tsc_pit_did_not_wrap:
 1859         roll    $16, %eax
 1860 
 1861         cmpw    $0x2000, %ax
 1862         notw    %ax
 1863         jb      freq_tsc_sufficient_duration
 1864 
 1865 freq_tsc_calculate:
 1866         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
 1867         / then on the second CLK pulse the CE is decremented, therefore mode 0
 1868         / is really a (count + 1) counter, ugh
 1869         xorl    %esi, %esi
 1870         movw    %ax, %si
 1871         incl    %esi
 1872 
 1873         movl    $0xf000, %eax
 1874         mull    %ebx
 1875 
 1876         / tuck away (target_pit_count * loop_count)
 1877         movl    %edx, %ecx
 1878         movl    %eax, %ebx
 1879 
 1880         movl    %esi, %eax
 1881         movl    $0xffffffff, %edx
 1882         mull    %edx
 1883 
 1884         addl    %esi, %eax
 1885         adcl    $0, %edx
 1886 
 1887         cmpl    %ecx, %edx
 1888         ja      freq_tsc_div_safe
 1889         jb      freq_tsc_too_fast
 1890 
 1891         cmpl    %ebx, %eax
 1892         jbe     freq_tsc_too_fast
 1893 
 1894 freq_tsc_div_safe:
 1895         movl    %ecx, %edx
 1896         movl    %ebx, %eax
 1897 
 1898         movl    %esi, %ecx
 1899         divl    %ecx
 1900 
 1901         movl    %eax, %ecx
 1902 
 1903         / the instruction cache has been warmed
 1904         stc
 1905 
 1906         jmp     freq_tsc_loop
 1907 
 1908 freq_tsc_sufficient_duration:
 1909         / test to see if the icache has been warmed
 1910         btl     $16, %eax
 1911         jnc     freq_tsc_calculate
 1912 
 1913         / recall mode 0 is a (count + 1) counter
 1914         andl    $0xffff, %eax
 1915         incl    %eax
 1916 
 1917         / save the number of PIT counts
 1918         movl    %eax, (%r9)
 1919 
 1920         / calculate the number of TS's that elapsed
 1921         movl    %ecx, %eax
 1922         subl    %esi, %eax
 1923         sbbl    %edi, %edx
 1924 
 1925         jmp     freq_tsc_end
 1926 
 1927 freq_tsc_too_fast:
 1928         / return 0 as a 64 bit quantity
 1929         xorl    %eax, %eax
 1930         xorl    %edx, %edx
 1931 
 1932 freq_tsc_end:
 1933         shlq    $32, %rdx
 1934         orq     %rdx, %rax
 1935 
 1936         popq    %rbx
 1937         leaveq
 1938         ret
 1939         SET_SIZE(freq_tsc)
 1940 
 1941 #elif defined(__i386)
 1942 
 1943         ENTRY_NP(freq_tsc)
 1944         pushl   %ebp
 1945         movl    %esp, %ebp
 1946         pushl   %edi
 1947         pushl   %esi
 1948         pushl   %ebx
 1949 
 1950 / We have a TSC, but we have no way in general to know how reliable it is.
 1951 / Usually a marginal TSC behaves appropriately unless not enough time
 1952 / elapses between reads. A reliable TSC can be read as often and as rapidly
 1953 / as desired. The simplistic approach of reading the TSC counter and
 1954 / correlating to the PIT counter cannot be naively followed. Instead estimates
 1955 / have to be taken to successively refine a guess at the speed of the cpu
 1956 / and then the TSC and PIT counter are correlated. In practice very rarely
 1957 / is more than one quick loop required for an estimate. Measures have to be
 1958 / taken to prevent the PIT counter from wrapping beyond its resolution and for
 1959 / measuring the clock rate of very fast processors.
 1960 /
 1961 / The following constant can be tuned. It should be such that the loop does
 1962 / not take too many nor too few PIT counts to execute. If this value is too
 1963 / large, then on slow machines the loop will take a long time, or the PIT
 1964 / counter may even wrap. If this value is too small, then on fast machines
 1965 / the PIT counter may count so few ticks that the resolution of the PIT
 1966 / itself causes a bad guess. Because this code is used in machines with
 1967 / marginal TSC's and/or IO, if this value is too small on those, it may
 1968 / cause the calculated cpu frequency to vary slightly from boot to boot.
 1969 /
 1970 / In all cases even if this constant is set inappropriately, the algorithm
 1971 / will still work and the caller should be able to handle variances in the
 1972 / calculation of cpu frequency, but the calculation will be inefficient and
 1973 / take a disproportionate amount of time relative to a well selected value.
 1974 / As the slowest supported cpu becomes faster, this constant should be
 1975 / carefully increased.
 1976 
 1977         movl    $0x8000, %ecx
 1978 
 1979         / to make sure the instruction cache has been warmed
 1980         clc
 1981 
 1982         jmp     freq_tsc_loop
 1983 
 1984 / The following block of code up to and including the latching of the PIT
 1985 / counter after freq_tsc_perf_loop is very critical and very carefully
 1986 / written, it should only be modified with great care. freq_tsc_loop to
 1987 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
 1988 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
 1989 
 1990         .align  32
 1991 freq_tsc_loop:
 1992         / save the loop count in %ebx
 1993         movl    %ecx, %ebx
 1994 
 1995         / initialize the PIT counter and start a count down
 1996         movb    $PIT_LOADMODE, %al
 1997         outb    $PITCTL_PORT
 1998         movb    $0xff, %al
 1999         outb    $PITCTR0_PORT
 2000         outb    $PITCTR0_PORT
 2001 
 2002         / read the TSC and store the TS in %edi:%esi
 2003         rdtsc
 2004         movl    %eax, %esi
 2005 
 2006 freq_tsc_perf_loop:
 2007         movl    %edx, %edi
 2008         movl    %eax, %esi
 2009         movl    %edx, %edi
 2010         loop    freq_tsc_perf_loop
 2011 
 2012         / read the TSC and store the LSW in %ecx
 2013         rdtsc
 2014         movl    %eax, %ecx
 2015 
 2016         / latch the PIT counter and status
 2017         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
 2018         outb    $PITCTL_PORT
 2019 
 2020         / remember if the icache has been warmed
 2021         setc    %ah
 2022 
 2023         / read the PIT status
 2024         inb     $PITCTR0_PORT
 2025         shll    $8, %eax
 2026 
 2027         / read PIT count
 2028         inb     $PITCTR0_PORT
 2029         shll    $8, %eax
 2030         inb     $PITCTR0_PORT
 2031         bswap   %eax
 2032 
 2033         / check to see if the PIT count was loaded into the CE
 2034         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
 2035         jc      freq_tsc_increase_count
 2036 
 2037         / check to see if PIT counter wrapped
 2038         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
 2039         jnc     freq_tsc_pit_did_not_wrap
 2040 
 2041         / halve count
 2042         shrl    $1, %ebx
 2043         movl    %ebx, %ecx
 2044 
 2045         / the instruction cache has been warmed
 2046         stc
 2047 
 2048         jmp     freq_tsc_loop
 2049 
 2050 freq_tsc_increase_count:
 2051         shll    $1, %ebx
 2052         jc      freq_tsc_too_fast
 2053 
 2054         movl    %ebx, %ecx
 2055 
 2056         / the instruction cache has been warmed
 2057         stc
 2058 
 2059         jmp     freq_tsc_loop
 2060 
 2061 freq_tsc_pit_did_not_wrap:
 2062         roll    $16, %eax
 2063 
 2064         cmpw    $0x2000, %ax
 2065         notw    %ax
 2066         jb      freq_tsc_sufficient_duration
 2067 
 2068 freq_tsc_calculate:
 2069         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
 2070         / then on the second CLK pulse the CE is decremented, therefore mode 0
 2071         / is really a (count + 1) counter, ugh
 2072         xorl    %esi, %esi
 2073         movw    %ax, %si
 2074         incl    %esi
 2075 
 2076         movl    $0xf000, %eax
 2077         mull    %ebx
 2078 
 2079         / tuck away (target_pit_count * loop_count)
 2080         movl    %edx, %ecx
 2081         movl    %eax, %ebx
 2082 
 2083         movl    %esi, %eax
 2084         movl    $0xffffffff, %edx
 2085         mull    %edx
 2086 
 2087         addl    %esi, %eax
 2088         adcl    $0, %edx
 2089 
 2090         cmpl    %ecx, %edx
 2091         ja      freq_tsc_div_safe
 2092         jb      freq_tsc_too_fast
 2093 
 2094         cmpl    %ebx, %eax
 2095         jbe     freq_tsc_too_fast
 2096 
 2097 freq_tsc_div_safe:
 2098         movl    %ecx, %edx
 2099         movl    %ebx, %eax
 2100 
 2101         movl    %esi, %ecx
 2102         divl    %ecx
 2103 
 2104         movl    %eax, %ecx
 2105 
 2106         / the instruction cache has been warmed
 2107         stc
 2108 
 2109         jmp     freq_tsc_loop
 2110 
 2111 freq_tsc_sufficient_duration:
 2112         / test to see if the icache has been warmed
 2113         btl     $16, %eax
 2114         jnc     freq_tsc_calculate
 2115 
 2116         / recall mode 0 is a (count + 1) counter
 2117         andl    $0xffff, %eax
 2118         incl    %eax
 2119 
 2120         / save the number of PIT counts
 2121         movl    8(%ebp), %ebx
 2122         movl    %eax, (%ebx)
 2123 
 2124         / calculate the number of TS's that elapsed
 2125         movl    %ecx, %eax
 2126         subl    %esi, %eax
 2127         sbbl    %edi, %edx
 2128 
 2129         jmp     freq_tsc_end
 2130 
 2131 freq_tsc_too_fast:
 2132         / return 0 as a 64 bit quantity
 2133         xorl    %eax, %eax
 2134         xorl    %edx, %edx
 2135 
 2136 freq_tsc_end:
 2137         popl    %ebx
 2138         popl    %esi
 2139         popl    %edi
 2140         popl    %ebp
 2141         ret
 2142         SET_SIZE(freq_tsc)
 2143 
 2144 #endif  /* __i386 */
 2145 #endif  /* __lint */
 2146 
 2147 #if !defined(__amd64)
 2148 #if defined(__lint)
 2149 
 2150 /*
 2151  * We do not have a TSC so we use a block of instructions with well known
 2152  * timings.
 2153  */
 2154 
 2155 /*ARGSUSED*/
 2156 uint64_t
 2157 freq_notsc(uint32_t *pit_counter)
 2158 {
 2159         return (0);
 2160 }
 2161 
 2162 #else   /* __lint */
 2163         ENTRY_NP(freq_notsc)
 2164         pushl   %ebp
 2165         movl    %esp, %ebp
 2166         pushl   %edi
 2167         pushl   %esi
 2168         pushl   %ebx
 2169 
 2170         / initial count for the idivl loop
 2171         movl    $0x1000, %ecx
 2172 
 2173         / load the divisor
 2174         movl    $1, %ebx
 2175 
 2176         jmp     freq_notsc_loop
 2177 
 2178 .align  16
 2179 freq_notsc_loop:
 2180         / set high 32 bits of dividend to zero
 2181         xorl    %edx, %edx
 2182 
 2183         / save the loop count in %edi
 2184         movl    %ecx, %edi
 2185 
 2186         / initialize the PIT counter and start a count down
 2187         movb    $PIT_LOADMODE, %al
 2188         outb    $PITCTL_PORT
 2189         movb    $0xff, %al
 2190         outb    $PITCTR0_PORT
 2191         outb    $PITCTR0_PORT
 2192 
 2193         / set low 32 bits of dividend to zero
 2194         xorl    %eax, %eax
 2195 
 2196 / It is vital that the arguments to idivl be set appropriately because on some
 2197 / cpu's this instruction takes more or less clock ticks depending on its
 2198 / arguments.
 2199 freq_notsc_perf_loop:
 2200         idivl   %ebx
 2201         idivl   %ebx
 2202         idivl   %ebx
 2203         idivl   %ebx
 2204         idivl   %ebx
 2205         loop    freq_notsc_perf_loop
 2206 
 2207         / latch the PIT counter and status
 2208         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
 2209         outb    $PITCTL_PORT
 2210 
 2211         / read the PIT status
 2212         inb     $PITCTR0_PORT
 2213         shll    $8, %eax
 2214 
 2215         / read PIT count
 2216         inb     $PITCTR0_PORT
 2217         shll    $8, %eax
 2218         inb     $PITCTR0_PORT
 2219         bswap   %eax
 2220 
 2221         / check to see if the PIT count was loaded into the CE
 2222         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
 2223         jc      freq_notsc_increase_count
 2224 
 2225         / check to see if PIT counter wrapped
 2226         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
 2227         jnc     freq_notsc_pit_did_not_wrap
 2228 
 2229         / halve count
 2230         shrl    $1, %edi
 2231         movl    %edi, %ecx
 2232 
 2233         jmp     freq_notsc_loop
 2234 
 2235 freq_notsc_increase_count:
 2236         shll    $1, %edi
 2237         jc      freq_notsc_too_fast
 2238 
 2239         movl    %edi, %ecx
 2240 
 2241         jmp     freq_notsc_loop
 2242 
 2243 freq_notsc_pit_did_not_wrap:
 2244         shrl    $16, %eax
 2245 
 2246         cmpw    $0x2000, %ax
 2247         notw    %ax
 2248         jb      freq_notsc_sufficient_duration
 2249 
 2250 freq_notsc_calculate:
 2251         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
 2252         / then on the second CLK pulse the CE is decremented, therefore mode 0
 2253         / is really a (count + 1) counter, ugh
 2254         xorl    %esi, %esi
 2255         movw    %ax, %si
 2256         incl    %esi
 2257 
 2258         movl    %edi, %eax
 2259         movl    $0xf000, %ecx
 2260         mull    %ecx
 2261 
 2262         / tuck away (target_pit_count * loop_count)
 2263         movl    %edx, %edi
 2264         movl    %eax, %ecx
 2265 
 2266         movl    %esi, %eax
 2267         movl    $0xffffffff, %edx
 2268         mull    %edx
 2269 
 2270         addl    %esi, %eax
 2271         adcl    $0, %edx
 2272 
 2273         cmpl    %edi, %edx
 2274         ja      freq_notsc_div_safe
 2275         jb      freq_notsc_too_fast
 2276 
 2277         cmpl    %ecx, %eax
 2278         jbe     freq_notsc_too_fast
 2279 
 2280 freq_notsc_div_safe:
 2281         movl    %edi, %edx
 2282         movl    %ecx, %eax
 2283 
 2284         movl    %esi, %ecx
 2285         divl    %ecx
 2286 
 2287         movl    %eax, %ecx
 2288 
 2289         jmp     freq_notsc_loop
 2290 
 2291 freq_notsc_sufficient_duration:
 2292         / recall mode 0 is a (count + 1) counter
 2293         incl    %eax
 2294 
 2295         / save the number of PIT counts
 2296         movl    8(%ebp), %ebx
 2297         movl    %eax, (%ebx)
 2298 
 2299         / calculate the number of cpu clock ticks that elapsed
 2300         cmpl    $X86_VENDOR_Cyrix, x86_vendor
 2301         jz      freq_notsc_notcyrix
 2302 
 2303         / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
 2304         movl    $86, %eax
 2305         jmp     freq_notsc_calculate_tsc
 2306 
 2307 freq_notsc_notcyrix:
 2308         / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
 2309         movl    $237, %eax
 2310 
 2311 freq_notsc_calculate_tsc:
 2312         mull    %edi
 2313 
 2314         jmp     freq_notsc_end
 2315 
 2316 freq_notsc_too_fast:
 2317         / return 0 as a 64 bit quantity
 2318         xorl    %eax, %eax
 2319         xorl    %edx, %edx
 2320 
 2321 freq_notsc_end:
 2322         popl    %ebx
 2323         popl    %esi
 2324         popl    %edi
 2325         popl    %ebp
 2326 
 2327         ret
 2328         SET_SIZE(freq_notsc)
 2329 
 2330 #endif  /* __lint */
 2331 #endif  /* !__amd64 */
 2332 
 2333 #if !defined(__lint)
 2334         .data
 2335 #if !defined(__amd64)
 2336         .align  4
 2337 cpu_vendor:
 2338         .long   0, 0, 0         /* Vendor ID string returned */
 2339 
 2340         .globl  CyrixInstead
 2341 
 2342         .globl  x86_featureset
 2343         .globl  x86_type
 2344         .globl  x86_vendor
 2345 #endif
 2346 
 2347 #endif  /* __lint */

Cache object: b40b2f5ea9858d52873a8dbccc1f62e9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.