
353 lines
8.8 KiB

; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
; Abstract:
; Provide macro for register save/restore using SSE registers
; Define SSE and AVX instruction set
; Define SSE macros using SSE 4.1 instructions
; args 1:XMM, 2:IDX, 3:REG
%macro SXMMN 3
pinsrq %1, %3, (%2 & 3)
; args 1:XMM, 2:REG, 3:IDX
%macro LXMMN 3
pextrq %2, %1, (%3 & 3)
; Define AVX macros using AVX instructions
; Save XMM to YMM
; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
%macro SYMMN 3
vinsertf128 %1, %1, %3, %2
; Restore XMM from YMM
; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
%macro LYMMN 3
vextractf128 %2, %1, %3
; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
; Modified: XMM5, YMM6, YMM7 and YMM8
%macro SAVE_REGS 0
SXMMN xmm5, 0, rbp
SXMMN xmm5, 1, rbx
SYMMN ymm7, 1, xmm5
SXMMN xmm5, 0, rsi
SXMMN xmm5, 1, rdi
SYMMN ymm8, 1, xmm5
; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
%macro LOAD_REGS 0
LYMMN ymm7, xmm5, 1
LXMMN xmm5, rbp, 0
LXMMN xmm5, rbx, 1
LYMMN ymm8, xmm5, 1
LXMMN xmm5, rsi, 0
LXMMN xmm5, rdi, 1
; Restore RBP from YMM7[128:191]
; Modified: XMM5 and RBP
%macro LOAD_RBP 0
LYMMN ymm7, xmm5, 1
movq rbp, xmm5
; Restore RBX from YMM7[192:255]
; Modified: XMM5 and RBX
%macro LOAD_RBX 0
LYMMN ymm7, xmm5, 1
LXMMN xmm5, rbx, 1
; Upper half of YMM6 to save/restore Time Stamp, RSP
; Save Time Stamp to YMM6[192:255]
; arg 1:general purpose register which holds time stamp
; Modified: XMM5 and YMM6
%macro SAVE_TS 1
LYMMN ymm6, xmm5, 1
SXMMN xmm5, 1, %1
SYMMN ymm6, 1, xmm5
; Restore Time Stamp from YMM6[192:255]
; arg 1:general purpose register where to save time stamp
; Modified: XMM5 and %1
%macro LOAD_TS 1
LYMMN ymm6, xmm5, 1
LXMMN xmm5, %1, 1
; Save RSP to YMM6[128:191]
; Modified: XMM5 and YMM6
%macro SAVE_RSP 0
LYMMN ymm6, xmm5, 1
SXMMN xmm5, 0, rsp
SYMMN ymm6, 1, xmm5
; Restore RSP from YMM6[128:191]
; Modified: XMM5 and RSP
%macro LOAD_RSP 0
LYMMN ymm6, xmm5, 1
movq rsp, xmm5
; Upper half of YMM9 to save/restore UCODE status, BFV address
; Save uCode status to YMM9[192:255]
; arg 1:general purpose register which holds uCode status
; Modified: XMM5 and YMM9
LYMMN ymm9, xmm5, 1
SXMMN xmm5, 0, %1
SYMMN ymm9, 1, xmm5
; Restore uCode status from YMM9[192:255]
; arg 1:general purpose register where to save uCode status
; Modified: XMM5 and %1
LYMMN ymm9, xmm5, 1
movq %1, xmm5
; Save BFV address to YMM9[128:191]
; arg 1:general purpose register which holds BFV address
; Modified: XMM5 and YMM9
%macro SAVE_BFV 1
LYMMN ymm9, xmm5, 1
SXMMN xmm5, 1, %1
SYMMN ymm9, 1, xmm5
; Restore BFV address from YMM9[128:191]
; arg 1:general purpose register where to save BFV address
; Modified: XMM5 and %1
%macro LOAD_BFV 1
LYMMN ymm9, xmm5, 1
LXMMN xmm5, %1, 1
; Upper half of YMM10 to save/restore RCX
; Save RCX to YMM10[128:191]
; Modified: XMM5 and YMM10
%macro SAVE_RCX 0
LYMMN ymm10, xmm5, 1
SXMMN xmm5, 0, rcx
SYMMN ymm10, 1, xmm5
; Restore RCX from YMM10[128:191]
; Modified: XMM5 and RCX
%macro LOAD_RCX 0
LYMMN ymm10, xmm5, 1
movq rcx, xmm5
; Save TemporaryRamSize to YMM10[192:255]
; arg 1:general purpose register which holds TemporaryRamSize
; Modified: XMM5 and YMM10[192:255]
LYMMN ymm10, xmm5, 1
SXMMN xmm5, 1, %1
SYMMN ymm10, 1, xmm5
; Restore TemporaryRamSize from YMM10[192:255]
; arg 1:general purpose register where to save TemporaryRamSize
; Modified: XMM5 and %1
LYMMN ymm10, xmm5, 1
LXMMN xmm5, %1, 1
; YMM7[128:191] for calling stack
; arg 1:Entry
; Modified: RSI, XMM5, YMM7
%macro CALL_YMM 1
mov rsi, %%ReturnAddress
LYMMN ymm7, xmm5, 1
SXMMN xmm5, 0, rsi
SYMMN ymm7, 1, xmm5
mov rsi, %1
jmp rsi
; Restore RIP from YMM7[128:191]
; Modified: RSI, XMM5
%macro RET_YMM 0
LYMMN ymm7, xmm5, 1
movq rsi, xmm5
jmp rsi
%macro ENABLE_SSE 0
; Initialize floating point units
jmp NextAddress
align 4
; Float control word initial value:
; all exceptions masked, double-precision, round-to-nearest
FpuControlWord DW 027Fh
; Multimedia-extensions control word:
; all exceptions masked, round-to-nearest, flush to zero for masked underflow
MmxControlWord DQ 01F80h
; Processor has to support SSE
jmp SseError
mov rax, FpuControlWord
fldcw [rax]
; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
; whether the processor supports SSE instruction.
; Save RBX to R11
; Save RCX to R10
mov r11, rbx
mov r10, rcx
mov rax, 1
bt rdx, 25
jnc SseError
; SSE 4.1 support
bt ecx, 19
jnc SseError
; Restore RBX from R11
; Restore RCX from R10
mov rbx, r11
mov rcx, r10
; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
mov rax, cr4
or rax, 00000600h
mov cr4, rax
; The processor should support SSE instruction and we can use
; ldmxcsr instruction
mov rax, MmxControlWord
ldmxcsr [rax]
%macro ENABLE_AVX 0
; Save RBX to R11
; Save RCX to R10
mov r11, rbx
mov r10, rcx
mov eax, 1
and ecx, 10000000h
cmp ecx, 10000000h ; check AVX feature flag
je EnableAvx
; Processor has to support AVX
jmp AvxError
; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
mov rax, cr4
or rax, 00040000h
mov cr4, rax
mov rcx, 0 ; index 0
xgetbv ; result in edx:eax
or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
; Restore RBX from R11
; Restore RCX from R10
mov rbx, r11
mov rcx, r10