2022-04-15 10:37:39 +02:00
|
|
|
;------------------------------------------------------------------------------
|
|
|
|
;
|
|
|
|
; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
|
|
|
|
; SPDX-License-Identifier: BSD-2-Clause-Patent
|
|
|
|
;
|
|
|
|
; Abstract:
|
|
|
|
;
|
|
|
|
; Provide macro for register save/restore using SSE registers
|
|
|
|
;
|
|
|
|
;------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
;
|
|
|
|
; Define SSE and AVX instruction set
|
|
|
|
;
|
|
|
|
;
|
|
|
|
; Define SSE macros using SSE 4.1 instructions
|
|
|
|
; args 1:XMM, 2:IDX, 3:REG
|
|
|
|
;
|
|
|
|
%macro SXMMN 3
|
|
|
|
pinsrq %1, %3, (%2 & 3)
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; args 1:XMM, 2:REG, 3:IDX
|
|
|
|
;
|
|
|
|
%macro LXMMN 3
|
|
|
|
pextrq %2, %1, (%3 & 3)
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Define AVX macros using AVX instructions
|
|
|
|
; Save XMM to YMM
|
|
|
|
; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
|
|
|
|
;
|
|
|
|
%macro SYMMN 3
|
|
|
|
vinsertf128 %1, %1, %3, %2
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore XMM from YMM
|
|
|
|
; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
|
|
|
|
;
|
|
|
|
%macro LYMMN 3
|
|
|
|
vextractf128 %2, %1, %3
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
|
|
|
|
; Modified: XMM5, YMM6, YMM7 and YMM8
|
|
|
|
;
|
|
|
|
%macro SAVE_REGS 0
|
|
|
|
SXMMN xmm5, 0, rbp
|
|
|
|
SXMMN xmm5, 1, rbx
|
|
|
|
SYMMN ymm7, 1, xmm5
|
|
|
|
SXMMN xmm5, 0, rsi
|
|
|
|
SXMMN xmm5, 1, rdi
|
|
|
|
SYMMN ymm8, 1, xmm5
|
|
|
|
SAVE_RSP
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
|
|
|
|
; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
|
|
|
|
;
|
|
|
|
%macro LOAD_REGS 0
|
|
|
|
LYMMN ymm7, xmm5, 1
|
|
|
|
LXMMN xmm5, rbp, 0
|
|
|
|
LXMMN xmm5, rbx, 1
|
|
|
|
LYMMN ymm8, xmm5, 1
|
|
|
|
LXMMN xmm5, rsi, 0
|
|
|
|
LXMMN xmm5, rdi, 1
|
|
|
|
LOAD_RSP
|
|
|
|
%endmacro
|
|
|
|
;
|
|
|
|
; Restore RBP from YMM7[128:191]
|
|
|
|
; Modified: XMM5 and RBP
|
|
|
|
;
|
|
|
|
%macro LOAD_RBP 0
|
|
|
|
LYMMN ymm7, xmm5, 1
|
|
|
|
movq rbp, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore RBX from YMM7[192:255]
|
|
|
|
; Modified: XMM5 and RBX
|
|
|
|
;
|
|
|
|
%macro LOAD_RBX 0
|
|
|
|
LYMMN ymm7, xmm5, 1
|
|
|
|
LXMMN xmm5, rbx, 1
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Upper half of YMM6 to save/restore Time Stamp, RSP
|
|
|
|
;
|
|
|
|
;
|
|
|
|
; Save Time Stamp to YMM6[192:255]
|
|
|
|
; arg 1:general purpose register which holds time stamp
|
|
|
|
; Modified: XMM5 and YMM6
|
|
|
|
;
|
|
|
|
%macro SAVE_TS 1
|
|
|
|
LYMMN ymm6, xmm5, 1
|
|
|
|
SXMMN xmm5, 1, %1
|
|
|
|
SYMMN ymm6, 1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore Time Stamp from YMM6[192:255]
|
|
|
|
; arg 1:general purpose register where to save time stamp
|
|
|
|
; Modified: XMM5 and %1
|
|
|
|
;
|
|
|
|
%macro LOAD_TS 1
|
|
|
|
LYMMN ymm6, xmm5, 1
|
|
|
|
LXMMN xmm5, %1, 1
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Save RSP to YMM6[128:191]
|
|
|
|
; Modified: XMM5 and YMM6
|
|
|
|
;
|
|
|
|
%macro SAVE_RSP 0
|
|
|
|
LYMMN ymm6, xmm5, 1
|
|
|
|
SXMMN xmm5, 0, rsp
|
|
|
|
SYMMN ymm6, 1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore RSP from YMM6[128:191]
|
|
|
|
; Modified: XMM5 and RSP
|
|
|
|
;
|
|
|
|
%macro LOAD_RSP 0
|
|
|
|
LYMMN ymm6, xmm5, 1
|
|
|
|
movq rsp, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Upper half of YMM9 to save/restore UCODE status, BFV address
|
|
|
|
;
|
|
|
|
;
|
|
|
|
; Save uCode status to YMM9[192:255]
|
|
|
|
; arg 1:general purpose register which holds uCode status
|
|
|
|
; Modified: XMM5 and YMM9
|
|
|
|
;
|
|
|
|
%macro SAVE_UCODE_STATUS 1
|
|
|
|
LYMMN ymm9, xmm5, 1
|
|
|
|
SXMMN xmm5, 0, %1
|
|
|
|
SYMMN ymm9, 1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore uCode status from YMM9[192:255]
|
|
|
|
; arg 1:general purpose register where to save uCode status
|
|
|
|
; Modified: XMM5 and %1
|
|
|
|
;
|
|
|
|
%macro LOAD_UCODE_STATUS 1
|
|
|
|
LYMMN ymm9, xmm5, 1
|
|
|
|
movq %1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Save BFV address to YMM9[128:191]
|
|
|
|
; arg 1:general purpose register which holds BFV address
|
|
|
|
; Modified: XMM5 and YMM9
|
|
|
|
;
|
|
|
|
%macro SAVE_BFV 1
|
|
|
|
LYMMN ymm9, xmm5, 1
|
|
|
|
SXMMN xmm5, 1, %1
|
|
|
|
SYMMN ymm9, 1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore BFV address from YMM9[128:191]
|
|
|
|
; arg 1:general purpose register where to save BFV address
|
|
|
|
; Modified: XMM5 and %1
|
|
|
|
;
|
|
|
|
%macro LOAD_BFV 1
|
|
|
|
LYMMN ymm9, xmm5, 1
|
|
|
|
LXMMN xmm5, %1, 1
|
|
|
|
%endmacro
|
|
|
|
|
2022-05-17 10:44:00 +02:00
|
|
|
;
|
|
|
|
; Upper half of YMM10 to save/restore RCX
|
|
|
|
;
|
|
|
|
;
|
|
|
|
; Save RCX to YMM10[128:191]
|
|
|
|
; Modified: XMM5 and YMM10
|
|
|
|
;
|
|
|
|
|
|
|
|
%macro SAVE_RCX 0
|
|
|
|
LYMMN ymm10, xmm5, 1
|
|
|
|
SXMMN xmm5, 0, rcx
|
|
|
|
SYMMN ymm10, 1, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
;
|
|
|
|
; Restore RCX from YMM10[128:191]
|
|
|
|
; Modified: XMM5 and RCX
|
|
|
|
;
|
|
|
|
|
|
|
|
%macro LOAD_RCX 0
|
|
|
|
LYMMN ymm10, xmm5, 1
|
|
|
|
movq rcx, xmm5
|
|
|
|
%endmacro
|
|
|
|
|
2022-04-15 10:37:39 +02:00
|
|
|
;
|
|
|
|
; YMM7[128:191] for calling stack
|
|
|
|
; arg 1:Entry
|
|
|
|
; Modified: RSI, XMM5, YMM7
|
|
|
|
;
|
|
|
|
%macro CALL_YMM 1
|
|
|
|
mov rsi, %%ReturnAddress
|
|
|
|
LYMMN ymm7, xmm5, 1
|
|
|
|
SXMMN xmm5, 0, rsi
|
|
|
|
SYMMN ymm7, 1, xmm5
|
|
|
|
mov rsi, %1
|
|
|
|
jmp rsi
|
|
|
|
%%ReturnAddress:
|
|
|
|
%endmacro
|
|
|
|
;
|
|
|
|
; Restore RIP from YMM7[128:191]
|
|
|
|
; Modified: RSI, XMM5
|
|
|
|
;
|
|
|
|
%macro RET_YMM 0
|
|
|
|
LYMMN ymm7, xmm5, 1
|
|
|
|
movq rsi, xmm5
|
|
|
|
jmp rsi
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
%macro ENABLE_SSE 0
|
|
|
|
;
|
|
|
|
; Initialize floating point units
|
|
|
|
;
|
|
|
|
jmp NextAddress
|
|
|
|
align 4
|
|
|
|
;
|
|
|
|
; Float control word initial value:
|
|
|
|
; all exceptions masked, double-precision, round-to-nearest
|
|
|
|
;
|
|
|
|
FpuControlWord DW 027Fh
|
|
|
|
;
|
|
|
|
; Multimedia-extensions control word:
|
|
|
|
; all exceptions masked, round-to-nearest, flush to zero for masked underflow
|
|
|
|
;
|
|
|
|
MmxControlWord DQ 01F80h
|
|
|
|
SseError:
|
|
|
|
;
|
|
|
|
; Processor has to support SSE
|
|
|
|
;
|
|
|
|
jmp SseError
|
|
|
|
NextAddress:
|
|
|
|
finit
|
|
|
|
mov rax, FpuControlWord
|
|
|
|
fldcw [rax]
|
|
|
|
|
|
|
|
;
|
|
|
|
; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
|
|
|
|
; whether the processor supports SSE instruction.
|
|
|
|
;
|
2022-05-17 10:44:00 +02:00
|
|
|
mov r10, rcx
|
2022-04-15 10:37:39 +02:00
|
|
|
mov rax, 1
|
|
|
|
cpuid
|
|
|
|
bt rdx, 25
|
|
|
|
jnc SseError
|
|
|
|
|
|
|
|
;
|
|
|
|
; SSE 4.1 support
|
|
|
|
;
|
|
|
|
bt ecx, 19
|
|
|
|
jnc SseError
|
2022-05-17 10:44:00 +02:00
|
|
|
mov rcx, r10
|
2022-04-15 10:37:39 +02:00
|
|
|
|
|
|
|
;
|
|
|
|
; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
|
|
|
|
;
|
|
|
|
mov rax, cr4
|
|
|
|
or rax, 00000600h
|
|
|
|
mov cr4, rax
|
|
|
|
|
|
|
|
;
|
|
|
|
; The processor should support SSE instruction and we can use
|
|
|
|
; ldmxcsr instruction
|
|
|
|
;
|
|
|
|
mov rax, MmxControlWord
|
|
|
|
ldmxcsr [rax]
|
|
|
|
%endmacro
|
|
|
|
|
|
|
|
%macro ENABLE_AVX 0
|
2022-05-17 10:44:00 +02:00
|
|
|
mov r10, rcx
|
2022-04-15 10:37:39 +02:00
|
|
|
mov eax, 1
|
|
|
|
cpuid
|
|
|
|
and ecx, 10000000h
|
|
|
|
cmp ecx, 10000000h ; check AVX feature flag
|
|
|
|
je EnableAvx
|
|
|
|
AvxError:
|
|
|
|
;
|
|
|
|
; Processor has to support AVX
|
|
|
|
;
|
|
|
|
jmp AvxError
|
|
|
|
EnableAvx:
|
|
|
|
;
|
|
|
|
; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
|
|
|
|
;
|
|
|
|
mov rax, cr4
|
|
|
|
or rax, 00040000h
|
|
|
|
mov cr4, rax
|
|
|
|
|
|
|
|
mov rcx, 0 ; index 0
|
|
|
|
xgetbv ; result in edx:eax
|
|
|
|
or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
|
|
|
|
xsetbv
|
2022-05-17 10:44:00 +02:00
|
|
|
mov rcx, r10
|
2022-04-15 10:37:39 +02:00
|
|
|
%endmacro
|
|
|
|
|