#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/psw.h>
#include <sys/reboot.h>
#include <sys/machparam.h>
#include <sys/segments.h>
#include <sys/pcb.h>
#include <sys/trap.h>
#include <sys/ftrace.h>
#include <sys/traptrace.h>
#include <sys/clock.h>
#include <sys/cmn_err.h>
#include <sys/pit.h>
#include <sys/panic.h>
#if defined(__xpv)
#include <sys/hypervisor.h>
#endif
#include "assym.h"
.text
jmp _start
.globl _locore_start
.globl mlsetup
.globl main
.globl panic
.globl t0stack
.globl t0
.globl sysp
.globl edata
.globl bootops
.globl bootopsp
.data
.comm t0stack, DEFAULTSTKSZ, 32
.comm t0, 4094, 32
ENTRY_NP(_locore_start)
leaq edata(%rip), %rbp
movq $0, (%rbp)
leaq t0stack(%rip), %rsp
addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
#if (REGSIZE & 15) == 0
subq $8, %rsp
#endif
movq %rdi, sysp(%rip)
movq %rdx, bootops(%rip)
movq $bootops, bootopsp(%rip)
movq %rdi, REGOFF_RDI(%rsp)
movq %rsi, REGOFF_RSI(%rsp)
movq %rdx, REGOFF_RDX(%rsp)
movq %rcx, REGOFF_RCX(%rsp)
movq %r8, REGOFF_R8(%rsp)
movq %r9, REGOFF_R9(%rsp)
pushf
popq %r11
movq %r11, REGOFF_RFL(%rsp)
#if !defined(__xpv)
movq %cr0, %rax
orq $_CONST(CR0_WP|CR0_AM), %rax
andq $_BITNOT(CR0_WT|CR0_CE), %rax
movq %rax, %cr0
#endif
xorl %ebp, %ebp
movq %rsp, %rdi
pushq %rbp
movq %rsp, %rbp
call mlsetup
call main
leaq __return_from_main(%rip), %rdi
xorl %eax, %eax
call panic
SET_SIZE(_locore_start)
__return_from_main:
.string "main() returned"
__unsupported_cpu:
.string "486 style cpu detected - no longer supported!"
#if defined(DEBUG)
_no_pending_updates:
.string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
#endif
.globl trap
ENTRY_NP2(cmntrap, _cmntrap)
INTR_PUSH
ALTENTRY(cmntrap_pushed)
movq %rsp, %rbp
TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP)
TRACE_REGS(%rdi, %rsp, %rbx, %rcx)
TRACE_STAMP(%rdi)
movl %gs:CPU_ID, %eax
shlq $CPU_CORE_SHIFT, %rax
leaq cpu_core(%rip), %r8
addq %r8, %rax
movw CPUC_DTRACE_FLAGS(%rax), %cx
testw $CPU_DTRACE_NOFAULT, %cx
jnz .dtrace_induced
TRACE_STACK(%rdi)
movq %rbp, %rdi
movq %r15, %rsi
movl %gs:CPU_ID, %edx
ENABLE_INTR_FLAGS
call trap
jmp _sys_rtt
.dtrace_induced:
cmpw $KCS_SEL, REGOFF_CS(%rbp)
jne 3f
cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp)
je 1f
cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp)
je 0f
cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp)
je 0f
cmpl $T_ZERODIV, REGOFF_TRAPNO(%rbp)
jne 4f
orw $CPU_DTRACE_DIVZERO, %cx
movw %cx, CPUC_DTRACE_FLAGS(%rax)
jmp 2f
0:
orw $CPU_DTRACE_ILLOP, %cx
movw %cx, CPUC_DTRACE_FLAGS(%rax)
jmp 2f
1:
orw $CPU_DTRACE_BADADDR, %cx
movw %cx, CPUC_DTRACE_FLAGS(%rax)
movq %r15, CPUC_DTRACE_ILLVAL(%rax)
2:
movq REGOFF_RIP(%rbp), %rdi
movq %rdi, %r12
call dtrace_instr_size
addq %rax, %r12
movq %r12, REGOFF_RIP(%rbp)
INTR_POP
call x86_md_clear
jmp tr_iret_auto
3:
leaq dtrace_badflags(%rip), %rdi
xorl %eax, %eax
call panic
4:
leaq dtrace_badtrap(%rip), %rdi
xorl %eax, %eax
call panic
SET_SIZE(cmntrap_pushed)
SET_SIZE(cmntrap)
SET_SIZE(_cmntrap)
.globl _cmntrap_size
.align CLONGSIZE
_cmntrap_size:
.NWORD . - _cmntrap
.type _cmntrap_size, @object
dtrace_badflags:
.string "bad DTrace flags"
dtrace_badtrap:
.string "bad DTrace trap"
.globl trap
ENTRY_NP(cmninttrap)
INTR_PUSH
INTGATE_INIT_KERNEL_FLAGS
TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP)
TRACE_REGS(%rdi, %rsp, %rbx, %rcx)
TRACE_STAMP(%rdi)
movq %rsp, %rbp
movl %gs:CPU_ID, %edx
xorl %esi, %esi
movq %rsp, %rdi
call trap
jmp _sys_rtt
SET_SIZE(cmninttrap)
#if !defined(__xpv)
ENTRY(bop_trap_handler)
movq %rsp, %rdi
sub $8, %rsp
call bop_trap
SET_SIZE(bop_trap_handler)
#endif
.globl dtrace_user_probe
ENTRY_NP(dtrace_trap)
INTR_PUSH
TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP)
TRACE_REGS(%rdi, %rsp, %rbx, %rcx)
TRACE_STAMP(%rdi)
movq %rsp, %rbp
movl %gs:CPU_ID, %edx
#if defined(__xpv)
movq %gs:CPU_VCPU_INFO, %rsi
movq VCPU_INFO_ARCH_CR2(%rsi), %rsi
#else
movq %cr2, %rsi
#endif
movq %rsp, %rdi
ENABLE_INTR_FLAGS
call dtrace_user_probe
jmp _sys_rtt
SET_SIZE(dtrace_trap)
ENTRY_NP(lwp_rtt_initial)
movq %gs:CPU_THREAD, %r15
movq T_STACK(%r15), %rsp
movq %rsp, %rbp
call __dtrace_probe___proc_start
jmp _lwp_rtt
ENTRY_NP(lwp_rtt)
movq %gs:CPU_THREAD, %r15
movq T_STACK(%r15), %rsp
movq %rsp, %rbp
_lwp_rtt:
call __dtrace_probe___proc_lwp__start
movq %gs:CPU_LWP, %r14
movq LWP_PROCP(%r14), %rdx
#if defined(DEBUG)
testb $0x1, PCB_RUPDATE(%r14)
jne 1f
leaq _no_pending_updates(%rip), %rdi
movl $__LINE__, %esi
movq %r14, %rdx
xorl %eax, %eax
call panic
1:
#endif
cmpq %r15, P_AGENTTP(%rdx)
jne 1f
xorl %ecx, %ecx
movq %rcx, REGOFF_FS(%rsp)
movq %rcx, REGOFF_GS(%rsp)
movw %cx, LWP_PCB_FS(%r14)
movw %cx, LWP_PCB_GS(%r14)
1:
call dtrace_systrace_rtt
movq REGOFF_RDX(%rsp), %rsi
movq REGOFF_RAX(%rsp), %rdi
call post_syscall
ALTENTRY(_sys_rtt)
CLI(%rax)
ALTENTRY(_sys_rtt_ints_disabled)
movq %rsp, %rdi
call sys_rtt_common
testq %rax, %rax
jz sr_sup
ASSERT_UPCALL_MASK_IS_SET
cmpw $UCS_SEL, REGOFF_CS(%rsp)
je sys_rtt_syscall
ALTENTRY(sys_rtt_syscall32)
USER32_POP
call x86_md_clear
jmp tr_iret_user
ALTENTRY(sys_rtt_syscall)
USER_POP
ALTENTRY(nopop_sys_rtt_syscall)
call x86_md_clear
jmp tr_iret_user
SET_SIZE(nopop_sys_rtt_syscall)
ALTENTRY(sr_sup)
INTR_POP
jmp tr_iret_kernel
.globl _sys_rtt_end
_sys_rtt_end:
SET_SIZE(sr_sup)
SET_SIZE(_sys_rtt_end)
SET_SIZE(lwp_rtt)
SET_SIZE(lwp_rtt_initial)
SET_SIZE(_sys_rtt_ints_disabled)
SET_SIZE(_sys_rtt)
SET_SIZE(sys_rtt_syscall)
SET_SIZE(sys_rtt_syscall32)
ENTRY_NP(freq_tsc_pit)
pushq %rbp
movq %rsp, %rbp
movq %rdi, %r9
pushq %rbx
/ We have a TSC, but we have no way in general to know how reliable it is.
/ Usually a marginal TSC behaves appropriately unless not enough time
/ elapses between reads. A reliable TSC can be read as often and as rapidly
/ as desired. The simplistic approach of reading the TSC counter and
/ correlating to the PIT counter cannot be naively followed. Instead estimates
/ have to be taken to successively refine a guess at the speed of the cpu
/ and then the TSC and PIT counter are correlated. In practice very rarely
/ is more than one quick loop required for an estimate. Measures have to be
/ taken to prevent the PIT counter from wrapping beyond its resolution and for
/ measuring the clock rate of very fast processors.
/
/ The following constant can be tuned. It should be such that the loop does
/ not take too many nor too few PIT counts to execute. If this value is too
/ large, then on slow machines the loop will take a long time, or the PIT
/ counter may even wrap. If this value is too small, then on fast machines
/ the PIT counter may count so few ticks that the resolution of the PIT
/ itself causes a bad guess. Because this code is used in machines with
/ marginal TSC's and/or IO, if this value is too small on those, it may
/ cause the calculated cpu frequency to vary slightly from boot to boot.
/
/ In all cases even if this constant is set inappropriately, the algorithm
/ will still work and the caller should be able to handle variances in the
/ calculation of cpu frequency, but the calculation will be inefficient and
/ take a disproportionate amount of time relative to a well selected value.
/ As the slowest supported cpu becomes faster, this constant should be
/ carefully increased.
movl $0x8000, %ecx
/ to make sure the instruction cache has been warmed
clc
jmp freq_tsc_loop
/ The following block of code up to and including the latching of the PIT
/ counter after freq_tsc_perf_loop is very critical and very carefully
/ written, it should only be modified with great care. freq_tsc_loop to
/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
/ freq_tsc_perf_loop up to the unlatching of the PIT counter.
.align 32
freq_tsc_loop:
/ save the loop count in %ebx
movl %ecx, %ebx
/ initialize the PIT counter and start a count down
movb $PIT_LOADMODE, %al
outb $PITCTL_PORT
movb $0xff, %al
outb $PITCTR0_PORT
outb $PITCTR0_PORT
/ read the TSC and store the TS in %edi:%esi
rdtsc
movl %eax, %esi
freq_tsc_perf_loop:
movl %edx, %edi
movl %eax, %esi
movl %edx, %edi
loop freq_tsc_perf_loop
/ read the TSC and store the LSW in %ecx
rdtsc
movl %eax, %ecx
/ latch the PIT counter and status
movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
outb $PITCTL_PORT
/ remember if the icache has been warmed
setc %ah
/ read the PIT status
inb $PITCTR0_PORT
shll $8, %eax
/ read PIT count
inb $PITCTR0_PORT
shll $8, %eax
inb $PITCTR0_PORT
bswap %eax
/ check to see if the PIT count was loaded into the CE
btw $_CONST(PITSTAT_NULLCNT+8), %ax
jc freq_tsc_increase_count
/ check to see if PIT counter wrapped
btw $_CONST(PITSTAT_OUTPUT+8), %ax
jnc freq_tsc_pit_did_not_wrap
/ halve count
shrl $1, %ebx
movl %ebx, %ecx
/ the instruction cache has been warmed
stc
jmp freq_tsc_loop
freq_tsc_increase_count:
shll $1, %ebx
jc freq_tsc_too_fast
movl %ebx, %ecx
/ the instruction cache has been warmed
stc
jmp freq_tsc_loop
freq_tsc_pit_did_not_wrap:
roll $16, %eax
cmpw $0x2000, %ax
notw %ax
jb freq_tsc_sufficient_duration
freq_tsc_calculate:
/ in mode 0, the PIT loads the count into the CE on the first CLK pulse,
/ then on the second CLK pulse the CE is decremented, therefore mode 0
/ is really a (count + 1) counter, ugh
xorl %esi, %esi
movw %ax, %si
incl %esi
movl $0xf000, %eax
mull %ebx
/ tuck away (target_pit_count * loop_count)
movl %edx, %ecx
movl %eax, %ebx
movl %esi, %eax
movl $0xffffffff, %edx
mull %edx
addl %esi, %eax
adcl $0, %edx
cmpl %ecx, %edx
ja freq_tsc_div_safe
jb freq_tsc_too_fast
cmpl %ebx, %eax
jbe freq_tsc_too_fast
freq_tsc_div_safe:
movl %ecx, %edx
movl %ebx, %eax
movl %esi, %ecx
divl %ecx
movl %eax, %ecx
/ the instruction cache has been warmed
stc
jmp freq_tsc_loop
freq_tsc_sufficient_duration:
/ test to see if the icache has been warmed
btl $16, %eax
jnc freq_tsc_calculate
/ recall mode 0 is a (count + 1) counter
andl $0xffff, %eax
incl %eax
/ save the number of PIT counts
movl %eax, (%r9)
/ calculate the number of TS's that elapsed
movl %ecx, %eax
subl %esi, %eax
sbbl %edi, %edx
jmp freq_tsc_end
freq_tsc_too_fast:
/ return 0 as a 64 bit quantity
xorl %eax, %eax
xorl %edx, %edx
freq_tsc_end:
shlq $32, %rdx
orq %rdx, %rax
popq %rbx
leaveq
ret
SET_SIZE(freq_tsc_pit)