mirror of
https://github.com/acidanthera/audk.git
synced 2025-08-30 22:18:12 +02:00
4711 lines
135 KiB
NASM
4711 lines
135 KiB
NASM
default rel
|
|
%define XMMWORD
|
|
%define YMMWORD
|
|
%define ZMMWORD
|
|
section .text code align=64
|
|
|
|
|
|
EXTERN OPENSSL_ia32cap_P
|
|
global aesni_cbc_sha256_enc
|
|
|
|
ALIGN 16
|
|
aesni_cbc_sha256_enc:
|
|
|
|
lea r11,[OPENSSL_ia32cap_P]
|
|
mov eax,1
|
|
cmp rcx,0
|
|
je NEAR $L$probe
|
|
mov eax,DWORD[r11]
|
|
mov r10,QWORD[4+r11]
|
|
bt r10,61
|
|
jc NEAR aesni_cbc_sha256_enc_shaext
|
|
mov r11,r10
|
|
shr r11,32
|
|
|
|
test r10d,2048
|
|
jnz NEAR aesni_cbc_sha256_enc_xop
|
|
and r11d,296
|
|
cmp r11d,296
|
|
je NEAR aesni_cbc_sha256_enc_avx2
|
|
and r10d,268435456
|
|
jnz NEAR aesni_cbc_sha256_enc_avx
|
|
ud2
|
|
xor eax,eax
|
|
cmp rcx,0
|
|
je NEAR $L$probe
|
|
ud2
|
|
$L$probe:
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
|
|
|
|
section .rdata rdata align=64
|
|
ALIGN 64
|
|
|
|
K256:
|
|
DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
|
|
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1
|
|
DD 0,0,0,0,0,0,0,0
|
|
DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
|
|
DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
|
|
DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
|
|
DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
|
|
DB 46,111,114,103,62,0
|
|
ALIGN 64
|
|
section .text
|
|
|
|
ALIGN 64
|
|
aesni_cbc_sha256_enc_xop:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_cbc_sha256_enc_xop:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
$L$xop_shortcut:
|
|
mov r10,QWORD[56+rsp]
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
sub rsp,288
|
|
and rsp,-64
|
|
|
|
shl rdx,6
|
|
sub rsi,rdi
|
|
sub r10,rdi
|
|
add rdx,rdi
|
|
|
|
|
|
mov QWORD[((64+8))+rsp],rsi
|
|
mov QWORD[((64+16))+rsp],rdx
|
|
|
|
mov QWORD[((64+32))+rsp],r8
|
|
mov QWORD[((64+40))+rsp],r9
|
|
mov QWORD[((64+48))+rsp],r10
|
|
mov QWORD[120+rsp],rax
|
|
|
|
movaps XMMWORD[128+rsp],xmm6
|
|
movaps XMMWORD[144+rsp],xmm7
|
|
movaps XMMWORD[160+rsp],xmm8
|
|
movaps XMMWORD[176+rsp],xmm9
|
|
movaps XMMWORD[192+rsp],xmm10
|
|
movaps XMMWORD[208+rsp],xmm11
|
|
movaps XMMWORD[224+rsp],xmm12
|
|
movaps XMMWORD[240+rsp],xmm13
|
|
movaps XMMWORD[256+rsp],xmm14
|
|
movaps XMMWORD[272+rsp],xmm15
|
|
$L$prologue_xop:
|
|
vzeroall
|
|
|
|
mov r12,rdi
|
|
lea rdi,[128+rcx]
|
|
lea r13,[((K256+544))]
|
|
mov r14d,DWORD[((240-128))+rdi]
|
|
mov r15,r9
|
|
mov rsi,r10
|
|
vmovdqu xmm8,XMMWORD[r8]
|
|
sub r14,9
|
|
|
|
mov eax,DWORD[r15]
|
|
mov ebx,DWORD[4+r15]
|
|
mov ecx,DWORD[8+r15]
|
|
mov edx,DWORD[12+r15]
|
|
mov r8d,DWORD[16+r15]
|
|
mov r9d,DWORD[20+r15]
|
|
mov r10d,DWORD[24+r15]
|
|
mov r11d,DWORD[28+r15]
|
|
|
|
vmovdqa xmm14,XMMWORD[r14*8+r13]
|
|
vmovdqa xmm13,XMMWORD[16+r14*8+r13]
|
|
vmovdqa xmm12,XMMWORD[32+r14*8+r13]
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
jmp NEAR $L$loop_xop
|
|
ALIGN 16
|
|
$L$loop_xop:
|
|
vmovdqa xmm7,XMMWORD[((K256+512))]
|
|
vmovdqu xmm0,XMMWORD[r12*1+rsi]
|
|
vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
|
|
vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
|
|
vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
|
|
vpshufb xmm0,xmm0,xmm7
|
|
lea rbp,[K256]
|
|
vpshufb xmm1,xmm1,xmm7
|
|
vpshufb xmm2,xmm2,xmm7
|
|
vpaddd xmm4,xmm0,XMMWORD[rbp]
|
|
vpshufb xmm3,xmm3,xmm7
|
|
vpaddd xmm5,xmm1,XMMWORD[32+rbp]
|
|
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
|
|
vpaddd xmm7,xmm3,XMMWORD[96+rbp]
|
|
vmovdqa XMMWORD[rsp],xmm4
|
|
mov r14d,eax
|
|
vmovdqa XMMWORD[16+rsp],xmm5
|
|
mov esi,ebx
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
xor esi,ecx
|
|
vmovdqa XMMWORD[48+rsp],xmm7
|
|
mov r13d,r8d
|
|
jmp NEAR $L$xop_00_47
|
|
|
|
ALIGN 16
|
|
$L$xop_00_47:
|
|
sub rbp,-16*2*4
|
|
vmovdqu xmm9,XMMWORD[r12]
|
|
mov QWORD[((64+0))+rsp],r12
|
|
vpalignr xmm4,xmm1,xmm0,4
|
|
ror r13d,14
|
|
mov eax,r14d
|
|
vpalignr xmm7,xmm3,xmm2,4
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
DB 143,232,120,194,236,14
|
|
ror r14d,9
|
|
xor r12d,r10d
|
|
vpsrld xmm4,xmm4,3
|
|
ror r13d,5
|
|
xor r14d,eax
|
|
vpaddd xmm0,xmm0,xmm7
|
|
and r12d,r8d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[rsp]
|
|
mov r15d,eax
|
|
DB 143,232,120,194,245,11
|
|
ror r14d,11
|
|
xor r12d,r10d
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r15d,ebx
|
|
ror r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
DB 143,232,120,194,251,13
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
vpsrld xmm6,xmm3,10
|
|
ror r14d,2
|
|
add r11d,esi
|
|
vpaddd xmm0,xmm0,xmm4
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
DB 143,232,120,194,239,2
|
|
ror r13d,14
|
|
mov r11d,r14d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
ror r14d,9
|
|
xor r12d,r9d
|
|
vpxor xmm7,xmm7,xmm5
|
|
ror r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r13d,edx
|
|
vpsrldq xmm7,xmm7,8
|
|
add r10d,DWORD[4+rsp]
|
|
mov esi,r11d
|
|
ror r14d,11
|
|
xor r12d,r9d
|
|
vpaddd xmm0,xmm0,xmm7
|
|
xor esi,eax
|
|
ror r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
DB 143,232,120,194,248,13
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
vpsrld xmm6,xmm0,10
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
DB 143,232,120,194,239,2
|
|
ror r14d,2
|
|
add r10d,r15d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
ror r13d,14
|
|
mov r10d,r14d
|
|
vpxor xmm7,xmm7,xmm5
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
ror r14d,9
|
|
xor r12d,r8d
|
|
vpslldq xmm7,xmm7,8
|
|
ror r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r13d,ecx
|
|
vpaddd xmm0,xmm0,xmm7
|
|
add r9d,DWORD[8+rsp]
|
|
mov r15d,r10d
|
|
ror r14d,11
|
|
xor r12d,r8d
|
|
vpaddd xmm6,xmm0,XMMWORD[rbp]
|
|
xor r15d,r11d
|
|
ror r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
ror r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
ror r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
ror r14d,9
|
|
xor r12d,edx
|
|
ror r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[12+rsp]
|
|
mov esi,r9d
|
|
ror r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
ror r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
ror r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
vmovdqa XMMWORD[rsp],xmm6
|
|
vpalignr xmm4,xmm2,xmm1,4
|
|
ror r13d,14
|
|
mov r8d,r14d
|
|
vpalignr xmm7,xmm0,xmm3,4
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
DB 143,232,120,194,236,14
|
|
ror r14d,9
|
|
xor r12d,ecx
|
|
vpsrld xmm4,xmm4,3
|
|
ror r13d,5
|
|
xor r14d,r8d
|
|
vpaddd xmm1,xmm1,xmm7
|
|
and r12d,eax
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[16+rsp]
|
|
mov r15d,r8d
|
|
DB 143,232,120,194,245,11
|
|
ror r14d,11
|
|
xor r12d,ecx
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r15d,r9d
|
|
ror r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
DB 143,232,120,194,248,13
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
vpsrld xmm6,xmm0,10
|
|
ror r14d,2
|
|
add edx,esi
|
|
vpaddd xmm1,xmm1,xmm4
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
DB 143,232,120,194,239,2
|
|
ror r13d,14
|
|
mov edx,r14d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
ror r14d,9
|
|
xor r12d,ebx
|
|
vpxor xmm7,xmm7,xmm5
|
|
ror r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r13d,r11d
|
|
vpsrldq xmm7,xmm7,8
|
|
add ecx,DWORD[20+rsp]
|
|
mov esi,edx
|
|
ror r14d,11
|
|
xor r12d,ebx
|
|
vpaddd xmm1,xmm1,xmm7
|
|
xor esi,r8d
|
|
ror r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
DB 143,232,120,194,249,13
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
vpsrld xmm6,xmm1,10
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
DB 143,232,120,194,239,2
|
|
ror r14d,2
|
|
add ecx,r15d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
ror r13d,14
|
|
mov ecx,r14d
|
|
vpxor xmm7,xmm7,xmm5
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
ror r14d,9
|
|
xor r12d,eax
|
|
vpslldq xmm7,xmm7,8
|
|
ror r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r13d,r10d
|
|
vpaddd xmm1,xmm1,xmm7
|
|
add ebx,DWORD[24+rsp]
|
|
mov r15d,ecx
|
|
ror r14d,11
|
|
xor r12d,eax
|
|
vpaddd xmm6,xmm1,XMMWORD[32+rbp]
|
|
xor r15d,edx
|
|
ror r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
ror r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
ror r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
ror r14d,9
|
|
xor r12d,r11d
|
|
ror r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[28+rsp]
|
|
mov esi,ebx
|
|
ror r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
ror r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
ror r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
vmovdqa XMMWORD[16+rsp],xmm6
|
|
vpalignr xmm4,xmm3,xmm2,4
|
|
ror r13d,14
|
|
mov eax,r14d
|
|
vpalignr xmm7,xmm1,xmm0,4
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
DB 143,232,120,194,236,14
|
|
ror r14d,9
|
|
xor r12d,r10d
|
|
vpsrld xmm4,xmm4,3
|
|
ror r13d,5
|
|
xor r14d,eax
|
|
vpaddd xmm2,xmm2,xmm7
|
|
and r12d,r8d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[32+rsp]
|
|
mov r15d,eax
|
|
DB 143,232,120,194,245,11
|
|
ror r14d,11
|
|
xor r12d,r10d
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r15d,ebx
|
|
ror r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
DB 143,232,120,194,249,13
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
vpsrld xmm6,xmm1,10
|
|
ror r14d,2
|
|
add r11d,esi
|
|
vpaddd xmm2,xmm2,xmm4
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
DB 143,232,120,194,239,2
|
|
ror r13d,14
|
|
mov r11d,r14d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
ror r14d,9
|
|
xor r12d,r9d
|
|
vpxor xmm7,xmm7,xmm5
|
|
ror r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r13d,edx
|
|
vpsrldq xmm7,xmm7,8
|
|
add r10d,DWORD[36+rsp]
|
|
mov esi,r11d
|
|
ror r14d,11
|
|
xor r12d,r9d
|
|
vpaddd xmm2,xmm2,xmm7
|
|
xor esi,eax
|
|
ror r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
DB 143,232,120,194,250,13
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
vpsrld xmm6,xmm2,10
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
DB 143,232,120,194,239,2
|
|
ror r14d,2
|
|
add r10d,r15d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
ror r13d,14
|
|
mov r10d,r14d
|
|
vpxor xmm7,xmm7,xmm5
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
ror r14d,9
|
|
xor r12d,r8d
|
|
vpslldq xmm7,xmm7,8
|
|
ror r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r13d,ecx
|
|
vpaddd xmm2,xmm2,xmm7
|
|
add r9d,DWORD[40+rsp]
|
|
mov r15d,r10d
|
|
ror r14d,11
|
|
xor r12d,r8d
|
|
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
|
|
xor r15d,r11d
|
|
ror r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
ror r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
ror r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
ror r14d,9
|
|
xor r12d,edx
|
|
ror r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[44+rsp]
|
|
mov esi,r9d
|
|
ror r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
ror r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
ror r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
vpalignr xmm4,xmm0,xmm3,4
|
|
ror r13d,14
|
|
mov r8d,r14d
|
|
vpalignr xmm7,xmm2,xmm1,4
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
DB 143,232,120,194,236,14
|
|
ror r14d,9
|
|
xor r12d,ecx
|
|
vpsrld xmm4,xmm4,3
|
|
ror r13d,5
|
|
xor r14d,r8d
|
|
vpaddd xmm3,xmm3,xmm7
|
|
and r12d,eax
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[48+rsp]
|
|
mov r15d,r8d
|
|
DB 143,232,120,194,245,11
|
|
ror r14d,11
|
|
xor r12d,ecx
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r15d,r9d
|
|
ror r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
DB 143,232,120,194,250,13
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
vpsrld xmm6,xmm2,10
|
|
ror r14d,2
|
|
add edx,esi
|
|
vpaddd xmm3,xmm3,xmm4
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
DB 143,232,120,194,239,2
|
|
ror r13d,14
|
|
mov edx,r14d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
ror r14d,9
|
|
xor r12d,ebx
|
|
vpxor xmm7,xmm7,xmm5
|
|
ror r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r13d,r11d
|
|
vpsrldq xmm7,xmm7,8
|
|
add ecx,DWORD[52+rsp]
|
|
mov esi,edx
|
|
ror r14d,11
|
|
xor r12d,ebx
|
|
vpaddd xmm3,xmm3,xmm7
|
|
xor esi,r8d
|
|
ror r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
DB 143,232,120,194,251,13
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
vpsrld xmm6,xmm3,10
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
DB 143,232,120,194,239,2
|
|
ror r14d,2
|
|
add ecx,r15d
|
|
vpxor xmm7,xmm7,xmm6
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
ror r13d,14
|
|
mov ecx,r14d
|
|
vpxor xmm7,xmm7,xmm5
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
ror r14d,9
|
|
xor r12d,eax
|
|
vpslldq xmm7,xmm7,8
|
|
ror r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r13d,r10d
|
|
vpaddd xmm3,xmm3,xmm7
|
|
add ebx,DWORD[56+rsp]
|
|
mov r15d,ecx
|
|
ror r14d,11
|
|
xor r12d,eax
|
|
vpaddd xmm6,xmm3,XMMWORD[96+rbp]
|
|
xor r15d,edx
|
|
ror r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
ror r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
ror r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
ror r14d,9
|
|
xor r12d,r11d
|
|
ror r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[60+rsp]
|
|
mov esi,ebx
|
|
ror r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
ror r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
ror r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
vmovdqa XMMWORD[48+rsp],xmm6
|
|
mov r12,QWORD[((64+0))+rsp]
|
|
vpand xmm11,xmm11,xmm14
|
|
mov r15,QWORD[((64+8))+rsp]
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r12*1+r15],xmm8
|
|
lea r12,[16+r12]
|
|
cmp BYTE[131+rbp],0
|
|
jne NEAR $L$xop_00_47
|
|
vmovdqu xmm9,XMMWORD[r12]
|
|
mov QWORD[((64+0))+rsp],r12
|
|
ror r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
ror r14d,9
|
|
xor r12d,r10d
|
|
ror r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[rsp]
|
|
mov r15d,eax
|
|
ror r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
ror r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
ror r14d,2
|
|
add r11d,esi
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
ror r13d,14
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
ror r14d,9
|
|
xor r12d,r9d
|
|
ror r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r13d,edx
|
|
add r10d,DWORD[4+rsp]
|
|
mov esi,r11d
|
|
ror r14d,11
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
ror r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
ror r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
ror r13d,14
|
|
mov r10d,r14d
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
ror r14d,9
|
|
xor r12d,r8d
|
|
ror r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[8+rsp]
|
|
mov r15d,r10d
|
|
ror r14d,11
|
|
xor r12d,r8d
|
|
xor r15d,r11d
|
|
ror r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
ror r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
ror r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
ror r14d,9
|
|
xor r12d,edx
|
|
ror r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[12+rsp]
|
|
mov esi,r9d
|
|
ror r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
ror r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
ror r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
ror r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
ror r14d,9
|
|
xor r12d,ecx
|
|
ror r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[16+rsp]
|
|
mov r15d,r8d
|
|
ror r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
ror r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
ror r14d,2
|
|
add edx,esi
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
ror r13d,14
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
ror r14d,9
|
|
xor r12d,ebx
|
|
ror r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r13d,r11d
|
|
add ecx,DWORD[20+rsp]
|
|
mov esi,edx
|
|
ror r14d,11
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
ror r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
ror r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
ror r13d,14
|
|
mov ecx,r14d
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
ror r14d,9
|
|
xor r12d,eax
|
|
ror r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[24+rsp]
|
|
mov r15d,ecx
|
|
ror r14d,11
|
|
xor r12d,eax
|
|
xor r15d,edx
|
|
ror r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
ror r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
ror r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
ror r14d,9
|
|
xor r12d,r11d
|
|
ror r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[28+rsp]
|
|
mov esi,ebx
|
|
ror r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
ror r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
ror r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
ror r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
ror r14d,9
|
|
xor r12d,r10d
|
|
ror r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[32+rsp]
|
|
mov r15d,eax
|
|
ror r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
ror r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
ror r14d,2
|
|
add r11d,esi
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
ror r13d,14
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
ror r14d,9
|
|
xor r12d,r9d
|
|
ror r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r13d,edx
|
|
add r10d,DWORD[36+rsp]
|
|
mov esi,r11d
|
|
ror r14d,11
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
ror r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
ror r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
ror r13d,14
|
|
mov r10d,r14d
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
ror r14d,9
|
|
xor r12d,r8d
|
|
ror r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[40+rsp]
|
|
mov r15d,r10d
|
|
ror r14d,11
|
|
xor r12d,r8d
|
|
xor r15d,r11d
|
|
ror r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
ror r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
ror r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
ror r14d,9
|
|
xor r12d,edx
|
|
ror r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[44+rsp]
|
|
mov esi,r9d
|
|
ror r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
ror r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
ror r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
ror r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
ror r14d,9
|
|
xor r12d,ecx
|
|
ror r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[48+rsp]
|
|
mov r15d,r8d
|
|
ror r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
ror r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
ror r14d,2
|
|
add edx,esi
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
ror r13d,14
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
ror r14d,9
|
|
xor r12d,ebx
|
|
ror r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r13d,r11d
|
|
add ecx,DWORD[52+rsp]
|
|
mov esi,edx
|
|
ror r14d,11
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
ror r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
ror r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
ror r13d,14
|
|
mov ecx,r14d
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
ror r14d,9
|
|
xor r12d,eax
|
|
ror r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[56+rsp]
|
|
mov r15d,ecx
|
|
ror r14d,11
|
|
xor r12d,eax
|
|
xor r15d,edx
|
|
ror r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
ror r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
ror r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
ror r14d,9
|
|
xor r12d,r11d
|
|
ror r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[60+rsp]
|
|
mov esi,ebx
|
|
ror r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
ror r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
ror r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
mov r12,QWORD[((64+0))+rsp]
|
|
mov r13,QWORD[((64+8))+rsp]
|
|
mov r15,QWORD[((64+40))+rsp]
|
|
mov rsi,QWORD[((64+48))+rsp]
|
|
|
|
vpand xmm11,xmm11,xmm14
|
|
mov eax,r14d
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r13*1+r12],xmm8
|
|
lea r12,[16+r12]
|
|
|
|
add eax,DWORD[r15]
|
|
add ebx,DWORD[4+r15]
|
|
add ecx,DWORD[8+r15]
|
|
add edx,DWORD[12+r15]
|
|
add r8d,DWORD[16+r15]
|
|
add r9d,DWORD[20+r15]
|
|
add r10d,DWORD[24+r15]
|
|
add r11d,DWORD[28+r15]
|
|
|
|
cmp r12,QWORD[((64+16))+rsp]
|
|
|
|
mov DWORD[r15],eax
|
|
mov DWORD[4+r15],ebx
|
|
mov DWORD[8+r15],ecx
|
|
mov DWORD[12+r15],edx
|
|
mov DWORD[16+r15],r8d
|
|
mov DWORD[20+r15],r9d
|
|
mov DWORD[24+r15],r10d
|
|
mov DWORD[28+r15],r11d
|
|
|
|
jb NEAR $L$loop_xop
|
|
|
|
mov r8,QWORD[((64+32))+rsp]
|
|
mov rsi,QWORD[120+rsp]
|
|
|
|
vmovdqu XMMWORD[r8],xmm8
|
|
vzeroall
|
|
movaps xmm6,XMMWORD[128+rsp]
|
|
movaps xmm7,XMMWORD[144+rsp]
|
|
movaps xmm8,XMMWORD[160+rsp]
|
|
movaps xmm9,XMMWORD[176+rsp]
|
|
movaps xmm10,XMMWORD[192+rsp]
|
|
movaps xmm11,XMMWORD[208+rsp]
|
|
movaps xmm12,XMMWORD[224+rsp]
|
|
movaps xmm13,XMMWORD[240+rsp]
|
|
movaps xmm14,XMMWORD[256+rsp]
|
|
movaps xmm15,XMMWORD[272+rsp]
|
|
mov r15,QWORD[((-48))+rsi]
|
|
|
|
mov r14,QWORD[((-40))+rsi]
|
|
|
|
mov r13,QWORD[((-32))+rsi]
|
|
|
|
mov r12,QWORD[((-24))+rsi]
|
|
|
|
mov rbp,QWORD[((-16))+rsi]
|
|
|
|
mov rbx,QWORD[((-8))+rsi]
|
|
|
|
lea rsp,[rsi]
|
|
|
|
$L$epilogue_xop:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_cbc_sha256_enc_xop:
|
|
|
|
ALIGN 64
|
|
aesni_cbc_sha256_enc_avx:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_cbc_sha256_enc_avx:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
$L$avx_shortcut:
|
|
mov r10,QWORD[56+rsp]
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
sub rsp,288
|
|
and rsp,-64
|
|
|
|
shl rdx,6
|
|
sub rsi,rdi
|
|
sub r10,rdi
|
|
add rdx,rdi
|
|
|
|
|
|
mov QWORD[((64+8))+rsp],rsi
|
|
mov QWORD[((64+16))+rsp],rdx
|
|
|
|
mov QWORD[((64+32))+rsp],r8
|
|
mov QWORD[((64+40))+rsp],r9
|
|
mov QWORD[((64+48))+rsp],r10
|
|
mov QWORD[120+rsp],rax
|
|
|
|
movaps XMMWORD[128+rsp],xmm6
|
|
movaps XMMWORD[144+rsp],xmm7
|
|
movaps XMMWORD[160+rsp],xmm8
|
|
movaps XMMWORD[176+rsp],xmm9
|
|
movaps XMMWORD[192+rsp],xmm10
|
|
movaps XMMWORD[208+rsp],xmm11
|
|
movaps XMMWORD[224+rsp],xmm12
|
|
movaps XMMWORD[240+rsp],xmm13
|
|
movaps XMMWORD[256+rsp],xmm14
|
|
movaps XMMWORD[272+rsp],xmm15
|
|
$L$prologue_avx:
|
|
vzeroall
|
|
|
|
mov r12,rdi
|
|
lea rdi,[128+rcx]
|
|
lea r13,[((K256+544))]
|
|
mov r14d,DWORD[((240-128))+rdi]
|
|
mov r15,r9
|
|
mov rsi,r10
|
|
vmovdqu xmm8,XMMWORD[r8]
|
|
sub r14,9
|
|
|
|
mov eax,DWORD[r15]
|
|
mov ebx,DWORD[4+r15]
|
|
mov ecx,DWORD[8+r15]
|
|
mov edx,DWORD[12+r15]
|
|
mov r8d,DWORD[16+r15]
|
|
mov r9d,DWORD[20+r15]
|
|
mov r10d,DWORD[24+r15]
|
|
mov r11d,DWORD[28+r15]
|
|
|
|
vmovdqa xmm14,XMMWORD[r14*8+r13]
|
|
vmovdqa xmm13,XMMWORD[16+r14*8+r13]
|
|
vmovdqa xmm12,XMMWORD[32+r14*8+r13]
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
jmp NEAR $L$loop_avx
|
|
ALIGN 16
|
|
$L$loop_avx:
|
|
vmovdqa xmm7,XMMWORD[((K256+512))]
|
|
vmovdqu xmm0,XMMWORD[r12*1+rsi]
|
|
vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
|
|
vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
|
|
vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
|
|
vpshufb xmm0,xmm0,xmm7
|
|
lea rbp,[K256]
|
|
vpshufb xmm1,xmm1,xmm7
|
|
vpshufb xmm2,xmm2,xmm7
|
|
vpaddd xmm4,xmm0,XMMWORD[rbp]
|
|
vpshufb xmm3,xmm3,xmm7
|
|
vpaddd xmm5,xmm1,XMMWORD[32+rbp]
|
|
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
|
|
vpaddd xmm7,xmm3,XMMWORD[96+rbp]
|
|
vmovdqa XMMWORD[rsp],xmm4
|
|
mov r14d,eax
|
|
vmovdqa XMMWORD[16+rsp],xmm5
|
|
mov esi,ebx
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
xor esi,ecx
|
|
vmovdqa XMMWORD[48+rsp],xmm7
|
|
mov r13d,r8d
|
|
jmp NEAR $L$avx_00_47
|
|
|
|
ALIGN 16
|
|
$L$avx_00_47:
|
|
sub rbp,-16*2*4
|
|
vmovdqu xmm9,XMMWORD[r12]
|
|
mov QWORD[((64+0))+rsp],r12
|
|
vpalignr xmm4,xmm1,xmm0,4
|
|
shrd r13d,r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
vpalignr xmm7,xmm3,xmm2,4
|
|
xor r13d,r8d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r10d
|
|
vpsrld xmm6,xmm4,7
|
|
shrd r13d,r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vpaddd xmm0,xmm0,xmm7
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[rsp]
|
|
mov r15d,eax
|
|
vpsrld xmm7,xmm4,3
|
|
shrd r14d,r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
vpslld xmm5,xmm4,14
|
|
shrd r13d,r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
vpxor xmm4,xmm7,xmm6
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
vpshufd xmm7,xmm3,250
|
|
add edx,r11d
|
|
shrd r14d,r14d,2
|
|
add r11d,esi
|
|
vpsrld xmm6,xmm6,11
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
shrd r13d,r13d,14
|
|
vpxor xmm4,xmm4,xmm5
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
vpslld xmm5,xmm5,11
|
|
shrd r14d,r14d,9
|
|
xor r12d,r9d
|
|
shrd r13d,r13d,5
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r13d,edx
|
|
vpsrld xmm6,xmm7,10
|
|
add r10d,DWORD[4+rsp]
|
|
mov esi,r11d
|
|
shrd r14d,r14d,11
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
shrd r13d,r13d,6
|
|
vpsrlq xmm7,xmm7,17
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
vpaddd xmm0,xmm0,xmm4
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
vpxor xmm6,xmm6,xmm7
|
|
shrd r14d,r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
vpsrlq xmm7,xmm7,2
|
|
add r14d,r10d
|
|
shrd r13d,r13d,14
|
|
mov r10d,r14d
|
|
vpxor xmm6,xmm6,xmm7
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
shrd r14d,r14d,9
|
|
vpshufd xmm6,xmm6,132
|
|
xor r12d,r8d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r10d
|
|
vpsrldq xmm6,xmm6,8
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[8+rsp]
|
|
vpaddd xmm0,xmm0,xmm6
|
|
mov r15d,r10d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r8d
|
|
vpshufd xmm7,xmm0,80
|
|
xor r15d,r11d
|
|
shrd r13d,r13d,6
|
|
add r9d,r12d
|
|
vpsrld xmm6,xmm7,10
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
vpsrlq xmm7,xmm7,17
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
shrd r14d,r14d,2
|
|
vpxor xmm6,xmm6,xmm7
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
vpsrlq xmm7,xmm7,2
|
|
shrd r13d,r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
vpxor xmm6,xmm6,xmm7
|
|
xor r13d,ebx
|
|
shrd r14d,r14d,9
|
|
xor r12d,edx
|
|
vpshufd xmm6,xmm6,232
|
|
shrd r13d,r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vpslldq xmm6,xmm6,8
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[12+rsp]
|
|
mov esi,r9d
|
|
vpaddd xmm0,xmm0,xmm6
|
|
shrd r14d,r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
vpaddd xmm6,xmm0,XMMWORD[rbp]
|
|
shrd r13d,r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
shrd r14d,r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
vmovdqa XMMWORD[rsp],xmm6
|
|
vpalignr xmm4,xmm2,xmm1,4
|
|
shrd r13d,r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
vpalignr xmm7,xmm0,xmm3,4
|
|
xor r13d,eax
|
|
shrd r14d,r14d,9
|
|
xor r12d,ecx
|
|
vpsrld xmm6,xmm4,7
|
|
shrd r13d,r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vpaddd xmm1,xmm1,xmm7
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[16+rsp]
|
|
mov r15d,r8d
|
|
vpsrld xmm7,xmm4,3
|
|
shrd r14d,r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
vpslld xmm5,xmm4,14
|
|
shrd r13d,r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
vpxor xmm4,xmm7,xmm6
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
vpshufd xmm7,xmm0,250
|
|
add r11d,edx
|
|
shrd r14d,r14d,2
|
|
add edx,esi
|
|
vpsrld xmm6,xmm6,11
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
shrd r13d,r13d,14
|
|
vpxor xmm4,xmm4,xmm5
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
vpslld xmm5,xmm5,11
|
|
shrd r14d,r14d,9
|
|
xor r12d,ebx
|
|
shrd r13d,r13d,5
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r13d,r11d
|
|
vpsrld xmm6,xmm7,10
|
|
add ecx,DWORD[20+rsp]
|
|
mov esi,edx
|
|
shrd r14d,r14d,11
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
shrd r13d,r13d,6
|
|
vpsrlq xmm7,xmm7,17
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
vpaddd xmm1,xmm1,xmm4
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
vpxor xmm6,xmm6,xmm7
|
|
shrd r14d,r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
vpsrlq xmm7,xmm7,2
|
|
add r14d,ecx
|
|
shrd r13d,r13d,14
|
|
mov ecx,r14d
|
|
vpxor xmm6,xmm6,xmm7
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
shrd r14d,r14d,9
|
|
vpshufd xmm6,xmm6,132
|
|
xor r12d,eax
|
|
shrd r13d,r13d,5
|
|
xor r14d,ecx
|
|
vpsrldq xmm6,xmm6,8
|
|
and r12d,r10d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[24+rsp]
|
|
vpaddd xmm1,xmm1,xmm6
|
|
mov r15d,ecx
|
|
shrd r14d,r14d,11
|
|
xor r12d,eax
|
|
vpshufd xmm7,xmm1,80
|
|
xor r15d,edx
|
|
shrd r13d,r13d,6
|
|
add ebx,r12d
|
|
vpsrld xmm6,xmm7,10
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
vpsrlq xmm7,xmm7,17
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
shrd r14d,r14d,2
|
|
vpxor xmm6,xmm6,xmm7
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
vpsrlq xmm7,xmm7,2
|
|
shrd r13d,r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
vpxor xmm6,xmm6,xmm7
|
|
xor r13d,r9d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r11d
|
|
vpshufd xmm6,xmm6,232
|
|
shrd r13d,r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vpslldq xmm6,xmm6,8
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[28+rsp]
|
|
mov esi,ebx
|
|
vpaddd xmm1,xmm1,xmm6
|
|
shrd r14d,r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
vpaddd xmm6,xmm1,XMMWORD[32+rbp]
|
|
shrd r13d,r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
shrd r14d,r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
vmovdqa XMMWORD[16+rsp],xmm6
|
|
vpalignr xmm4,xmm3,xmm2,4
|
|
shrd r13d,r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
vpalignr xmm7,xmm1,xmm0,4
|
|
xor r13d,r8d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r10d
|
|
vpsrld xmm6,xmm4,7
|
|
shrd r13d,r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vpaddd xmm2,xmm2,xmm7
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[32+rsp]
|
|
mov r15d,eax
|
|
vpsrld xmm7,xmm4,3
|
|
shrd r14d,r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
vpslld xmm5,xmm4,14
|
|
shrd r13d,r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
vpxor xmm4,xmm7,xmm6
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
vpshufd xmm7,xmm1,250
|
|
add edx,r11d
|
|
shrd r14d,r14d,2
|
|
add r11d,esi
|
|
vpsrld xmm6,xmm6,11
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
shrd r13d,r13d,14
|
|
vpxor xmm4,xmm4,xmm5
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
vpslld xmm5,xmm5,11
|
|
shrd r14d,r14d,9
|
|
xor r12d,r9d
|
|
shrd r13d,r13d,5
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r13d,edx
|
|
vpsrld xmm6,xmm7,10
|
|
add r10d,DWORD[36+rsp]
|
|
mov esi,r11d
|
|
shrd r14d,r14d,11
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
shrd r13d,r13d,6
|
|
vpsrlq xmm7,xmm7,17
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
vpaddd xmm2,xmm2,xmm4
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
vpxor xmm6,xmm6,xmm7
|
|
shrd r14d,r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
vpsrlq xmm7,xmm7,2
|
|
add r14d,r10d
|
|
shrd r13d,r13d,14
|
|
mov r10d,r14d
|
|
vpxor xmm6,xmm6,xmm7
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
shrd r14d,r14d,9
|
|
vpshufd xmm6,xmm6,132
|
|
xor r12d,r8d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r10d
|
|
vpsrldq xmm6,xmm6,8
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[40+rsp]
|
|
vpaddd xmm2,xmm2,xmm6
|
|
mov r15d,r10d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r8d
|
|
vpshufd xmm7,xmm2,80
|
|
xor r15d,r11d
|
|
shrd r13d,r13d,6
|
|
add r9d,r12d
|
|
vpsrld xmm6,xmm7,10
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
vpsrlq xmm7,xmm7,17
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
shrd r14d,r14d,2
|
|
vpxor xmm6,xmm6,xmm7
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
vpsrlq xmm7,xmm7,2
|
|
shrd r13d,r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
vpxor xmm6,xmm6,xmm7
|
|
xor r13d,ebx
|
|
shrd r14d,r14d,9
|
|
xor r12d,edx
|
|
vpshufd xmm6,xmm6,232
|
|
shrd r13d,r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vpslldq xmm6,xmm6,8
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[44+rsp]
|
|
mov esi,r9d
|
|
vpaddd xmm2,xmm2,xmm6
|
|
shrd r14d,r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
|
|
shrd r13d,r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
shrd r14d,r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
vpalignr xmm4,xmm0,xmm3,4
|
|
shrd r13d,r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
vpalignr xmm7,xmm2,xmm1,4
|
|
xor r13d,eax
|
|
shrd r14d,r14d,9
|
|
xor r12d,ecx
|
|
vpsrld xmm6,xmm4,7
|
|
shrd r13d,r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vpaddd xmm3,xmm3,xmm7
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[48+rsp]
|
|
mov r15d,r8d
|
|
vpsrld xmm7,xmm4,3
|
|
shrd r14d,r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
vpslld xmm5,xmm4,14
|
|
shrd r13d,r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
vpxor xmm4,xmm7,xmm6
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
vpshufd xmm7,xmm2,250
|
|
add r11d,edx
|
|
shrd r14d,r14d,2
|
|
add edx,esi
|
|
vpsrld xmm6,xmm6,11
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
shrd r13d,r13d,14
|
|
vpxor xmm4,xmm4,xmm5
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
vpslld xmm5,xmm5,11
|
|
shrd r14d,r14d,9
|
|
xor r12d,ebx
|
|
shrd r13d,r13d,5
|
|
vpxor xmm4,xmm4,xmm6
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r13d,r11d
|
|
vpsrld xmm6,xmm7,10
|
|
add ecx,DWORD[52+rsp]
|
|
mov esi,edx
|
|
shrd r14d,r14d,11
|
|
vpxor xmm4,xmm4,xmm5
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
shrd r13d,r13d,6
|
|
vpsrlq xmm7,xmm7,17
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
vpaddd xmm3,xmm3,xmm4
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
vpxor xmm6,xmm6,xmm7
|
|
shrd r14d,r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
vpsrlq xmm7,xmm7,2
|
|
add r14d,ecx
|
|
shrd r13d,r13d,14
|
|
mov ecx,r14d
|
|
vpxor xmm6,xmm6,xmm7
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
shrd r14d,r14d,9
|
|
vpshufd xmm6,xmm6,132
|
|
xor r12d,eax
|
|
shrd r13d,r13d,5
|
|
xor r14d,ecx
|
|
vpsrldq xmm6,xmm6,8
|
|
and r12d,r10d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[56+rsp]
|
|
vpaddd xmm3,xmm3,xmm6
|
|
mov r15d,ecx
|
|
shrd r14d,r14d,11
|
|
xor r12d,eax
|
|
vpshufd xmm7,xmm3,80
|
|
xor r15d,edx
|
|
shrd r13d,r13d,6
|
|
add ebx,r12d
|
|
vpsrld xmm6,xmm7,10
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
vpsrlq xmm7,xmm7,17
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
shrd r14d,r14d,2
|
|
vpxor xmm6,xmm6,xmm7
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
vpsrlq xmm7,xmm7,2
|
|
shrd r13d,r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
vpxor xmm6,xmm6,xmm7
|
|
xor r13d,r9d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r11d
|
|
vpshufd xmm6,xmm6,232
|
|
shrd r13d,r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vpslldq xmm6,xmm6,8
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[60+rsp]
|
|
mov esi,ebx
|
|
vpaddd xmm3,xmm3,xmm6
|
|
shrd r14d,r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
vpaddd xmm6,xmm3,XMMWORD[96+rbp]
|
|
shrd r13d,r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
shrd r14d,r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
vmovdqa XMMWORD[48+rsp],xmm6
|
|
mov r12,QWORD[((64+0))+rsp]
|
|
vpand xmm11,xmm11,xmm14
|
|
mov r15,QWORD[((64+8))+rsp]
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r12*1+r15],xmm8
|
|
lea r12,[16+r12]
|
|
cmp BYTE[131+rbp],0
|
|
jne NEAR $L$avx_00_47
|
|
vmovdqu xmm9,XMMWORD[r12]
|
|
mov QWORD[((64+0))+rsp],r12
|
|
shrd r13d,r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r10d
|
|
shrd r13d,r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[rsp]
|
|
mov r15d,eax
|
|
shrd r14d,r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
shrd r13d,r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
shrd r14d,r14d,2
|
|
add r11d,esi
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
shrd r13d,r13d,14
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
shrd r14d,r14d,9
|
|
xor r12d,r9d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r13d,edx
|
|
add r10d,DWORD[4+rsp]
|
|
mov esi,r11d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
shrd r13d,r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
shrd r14d,r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
shrd r13d,r13d,14
|
|
mov r10d,r14d
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
shrd r14d,r14d,9
|
|
xor r12d,r8d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[8+rsp]
|
|
mov r15d,r10d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r8d
|
|
xor r15d,r11d
|
|
shrd r13d,r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
shrd r14d,r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
shrd r13d,r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
shrd r14d,r14d,9
|
|
xor r12d,edx
|
|
shrd r13d,r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[12+rsp]
|
|
mov esi,r9d
|
|
shrd r14d,r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
shrd r13d,r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
shrd r14d,r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
shrd r13d,r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
shrd r14d,r14d,9
|
|
xor r12d,ecx
|
|
shrd r13d,r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[16+rsp]
|
|
mov r15d,r8d
|
|
shrd r14d,r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
shrd r13d,r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
shrd r14d,r14d,2
|
|
add edx,esi
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
shrd r13d,r13d,14
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
shrd r14d,r14d,9
|
|
xor r12d,ebx
|
|
shrd r13d,r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r13d,r11d
|
|
add ecx,DWORD[20+rsp]
|
|
mov esi,edx
|
|
shrd r14d,r14d,11
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
shrd r13d,r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
shrd r14d,r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
shrd r13d,r13d,14
|
|
mov ecx,r14d
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
shrd r14d,r14d,9
|
|
xor r12d,eax
|
|
shrd r13d,r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[24+rsp]
|
|
mov r15d,ecx
|
|
shrd r14d,r14d,11
|
|
xor r12d,eax
|
|
xor r15d,edx
|
|
shrd r13d,r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
shrd r14d,r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
shrd r13d,r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r11d
|
|
shrd r13d,r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[28+rsp]
|
|
mov esi,ebx
|
|
shrd r14d,r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
shrd r13d,r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
shrd r14d,r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
shrd r13d,r13d,14
|
|
mov eax,r14d
|
|
mov r12d,r9d
|
|
xor r13d,r8d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r10d
|
|
shrd r13d,r13d,5
|
|
xor r14d,eax
|
|
and r12d,r8d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r13d,r8d
|
|
add r11d,DWORD[32+rsp]
|
|
mov r15d,eax
|
|
shrd r14d,r14d,11
|
|
xor r12d,r10d
|
|
xor r15d,ebx
|
|
shrd r13d,r13d,6
|
|
add r11d,r12d
|
|
and esi,r15d
|
|
xor r14d,eax
|
|
add r11d,r13d
|
|
xor esi,ebx
|
|
add edx,r11d
|
|
shrd r14d,r14d,2
|
|
add r11d,esi
|
|
mov r13d,edx
|
|
add r14d,r11d
|
|
shrd r13d,r13d,14
|
|
mov r11d,r14d
|
|
mov r12d,r8d
|
|
xor r13d,edx
|
|
shrd r14d,r14d,9
|
|
xor r12d,r9d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r11d
|
|
and r12d,edx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r13d,edx
|
|
add r10d,DWORD[36+rsp]
|
|
mov esi,r11d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r9d
|
|
xor esi,eax
|
|
shrd r13d,r13d,6
|
|
add r10d,r12d
|
|
and r15d,esi
|
|
xor r14d,r11d
|
|
add r10d,r13d
|
|
xor r15d,eax
|
|
add ecx,r10d
|
|
shrd r14d,r14d,2
|
|
add r10d,r15d
|
|
mov r13d,ecx
|
|
add r14d,r10d
|
|
shrd r13d,r13d,14
|
|
mov r10d,r14d
|
|
mov r12d,edx
|
|
xor r13d,ecx
|
|
shrd r14d,r14d,9
|
|
xor r12d,r8d
|
|
shrd r13d,r13d,5
|
|
xor r14d,r10d
|
|
and r12d,ecx
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r13d,ecx
|
|
add r9d,DWORD[40+rsp]
|
|
mov r15d,r10d
|
|
shrd r14d,r14d,11
|
|
xor r12d,r8d
|
|
xor r15d,r11d
|
|
shrd r13d,r13d,6
|
|
add r9d,r12d
|
|
and esi,r15d
|
|
xor r14d,r10d
|
|
add r9d,r13d
|
|
xor esi,r11d
|
|
add ebx,r9d
|
|
shrd r14d,r14d,2
|
|
add r9d,esi
|
|
mov r13d,ebx
|
|
add r14d,r9d
|
|
shrd r13d,r13d,14
|
|
mov r9d,r14d
|
|
mov r12d,ecx
|
|
xor r13d,ebx
|
|
shrd r14d,r14d,9
|
|
xor r12d,edx
|
|
shrd r13d,r13d,5
|
|
xor r14d,r9d
|
|
and r12d,ebx
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r13d,ebx
|
|
add r8d,DWORD[44+rsp]
|
|
mov esi,r9d
|
|
shrd r14d,r14d,11
|
|
xor r12d,edx
|
|
xor esi,r10d
|
|
shrd r13d,r13d,6
|
|
add r8d,r12d
|
|
and r15d,esi
|
|
xor r14d,r9d
|
|
add r8d,r13d
|
|
xor r15d,r10d
|
|
add eax,r8d
|
|
shrd r14d,r14d,2
|
|
add r8d,r15d
|
|
mov r13d,eax
|
|
add r14d,r8d
|
|
shrd r13d,r13d,14
|
|
mov r8d,r14d
|
|
mov r12d,ebx
|
|
xor r13d,eax
|
|
shrd r14d,r14d,9
|
|
xor r12d,ecx
|
|
shrd r13d,r13d,5
|
|
xor r14d,r8d
|
|
and r12d,eax
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r13d,eax
|
|
add edx,DWORD[48+rsp]
|
|
mov r15d,r8d
|
|
shrd r14d,r14d,11
|
|
xor r12d,ecx
|
|
xor r15d,r9d
|
|
shrd r13d,r13d,6
|
|
add edx,r12d
|
|
and esi,r15d
|
|
xor r14d,r8d
|
|
add edx,r13d
|
|
xor esi,r9d
|
|
add r11d,edx
|
|
shrd r14d,r14d,2
|
|
add edx,esi
|
|
mov r13d,r11d
|
|
add r14d,edx
|
|
shrd r13d,r13d,14
|
|
mov edx,r14d
|
|
mov r12d,eax
|
|
xor r13d,r11d
|
|
shrd r14d,r14d,9
|
|
xor r12d,ebx
|
|
shrd r13d,r13d,5
|
|
xor r14d,edx
|
|
and r12d,r11d
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r13d,r11d
|
|
add ecx,DWORD[52+rsp]
|
|
mov esi,edx
|
|
shrd r14d,r14d,11
|
|
xor r12d,ebx
|
|
xor esi,r8d
|
|
shrd r13d,r13d,6
|
|
add ecx,r12d
|
|
and r15d,esi
|
|
xor r14d,edx
|
|
add ecx,r13d
|
|
xor r15d,r8d
|
|
add r10d,ecx
|
|
shrd r14d,r14d,2
|
|
add ecx,r15d
|
|
mov r13d,r10d
|
|
add r14d,ecx
|
|
shrd r13d,r13d,14
|
|
mov ecx,r14d
|
|
mov r12d,r11d
|
|
xor r13d,r10d
|
|
shrd r14d,r14d,9
|
|
xor r12d,eax
|
|
shrd r13d,r13d,5
|
|
xor r14d,ecx
|
|
and r12d,r10d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r13d,r10d
|
|
add ebx,DWORD[56+rsp]
|
|
mov r15d,ecx
|
|
shrd r14d,r14d,11
|
|
xor r12d,eax
|
|
xor r15d,edx
|
|
shrd r13d,r13d,6
|
|
add ebx,r12d
|
|
and esi,r15d
|
|
xor r14d,ecx
|
|
add ebx,r13d
|
|
xor esi,edx
|
|
add r9d,ebx
|
|
shrd r14d,r14d,2
|
|
add ebx,esi
|
|
mov r13d,r9d
|
|
add r14d,ebx
|
|
shrd r13d,r13d,14
|
|
mov ebx,r14d
|
|
mov r12d,r10d
|
|
xor r13d,r9d
|
|
shrd r14d,r14d,9
|
|
xor r12d,r11d
|
|
shrd r13d,r13d,5
|
|
xor r14d,ebx
|
|
and r12d,r9d
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r13d,r9d
|
|
add eax,DWORD[60+rsp]
|
|
mov esi,ebx
|
|
shrd r14d,r14d,11
|
|
xor r12d,r11d
|
|
xor esi,ecx
|
|
shrd r13d,r13d,6
|
|
add eax,r12d
|
|
and r15d,esi
|
|
xor r14d,ebx
|
|
add eax,r13d
|
|
xor r15d,ecx
|
|
add r8d,eax
|
|
shrd r14d,r14d,2
|
|
add eax,r15d
|
|
mov r13d,r8d
|
|
add r14d,eax
|
|
mov r12,QWORD[((64+0))+rsp]
|
|
mov r13,QWORD[((64+8))+rsp]
|
|
mov r15,QWORD[((64+40))+rsp]
|
|
mov rsi,QWORD[((64+48))+rsp]
|
|
|
|
vpand xmm11,xmm11,xmm14
|
|
mov eax,r14d
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r13*1+r12],xmm8
|
|
lea r12,[16+r12]
|
|
|
|
add eax,DWORD[r15]
|
|
add ebx,DWORD[4+r15]
|
|
add ecx,DWORD[8+r15]
|
|
add edx,DWORD[12+r15]
|
|
add r8d,DWORD[16+r15]
|
|
add r9d,DWORD[20+r15]
|
|
add r10d,DWORD[24+r15]
|
|
add r11d,DWORD[28+r15]
|
|
|
|
cmp r12,QWORD[((64+16))+rsp]
|
|
|
|
mov DWORD[r15],eax
|
|
mov DWORD[4+r15],ebx
|
|
mov DWORD[8+r15],ecx
|
|
mov DWORD[12+r15],edx
|
|
mov DWORD[16+r15],r8d
|
|
mov DWORD[20+r15],r9d
|
|
mov DWORD[24+r15],r10d
|
|
mov DWORD[28+r15],r11d
|
|
jb NEAR $L$loop_avx
|
|
|
|
mov r8,QWORD[((64+32))+rsp]
|
|
mov rsi,QWORD[120+rsp]
|
|
|
|
vmovdqu XMMWORD[r8],xmm8
|
|
vzeroall
|
|
movaps xmm6,XMMWORD[128+rsp]
|
|
movaps xmm7,XMMWORD[144+rsp]
|
|
movaps xmm8,XMMWORD[160+rsp]
|
|
movaps xmm9,XMMWORD[176+rsp]
|
|
movaps xmm10,XMMWORD[192+rsp]
|
|
movaps xmm11,XMMWORD[208+rsp]
|
|
movaps xmm12,XMMWORD[224+rsp]
|
|
movaps xmm13,XMMWORD[240+rsp]
|
|
movaps xmm14,XMMWORD[256+rsp]
|
|
movaps xmm15,XMMWORD[272+rsp]
|
|
mov r15,QWORD[((-48))+rsi]
|
|
|
|
mov r14,QWORD[((-40))+rsi]
|
|
|
|
mov r13,QWORD[((-32))+rsi]
|
|
|
|
mov r12,QWORD[((-24))+rsi]
|
|
|
|
mov rbp,QWORD[((-16))+rsi]
|
|
|
|
mov rbx,QWORD[((-8))+rsi]
|
|
|
|
lea rsp,[rsi]
|
|
|
|
$L$epilogue_avx:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_cbc_sha256_enc_avx:
|
|
|
|
ALIGN 64
|
|
aesni_cbc_sha256_enc_avx2:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_cbc_sha256_enc_avx2:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
$L$avx2_shortcut:
|
|
mov r10,QWORD[56+rsp]
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
sub rsp,736
|
|
and rsp,-256*4
|
|
add rsp,448
|
|
|
|
shl rdx,6
|
|
sub rsi,rdi
|
|
sub r10,rdi
|
|
add rdx,rdi
|
|
|
|
|
|
|
|
mov QWORD[((64+16))+rsp],rdx
|
|
|
|
mov QWORD[((64+32))+rsp],r8
|
|
mov QWORD[((64+40))+rsp],r9
|
|
mov QWORD[((64+48))+rsp],r10
|
|
mov QWORD[120+rsp],rax
|
|
|
|
movaps XMMWORD[128+rsp],xmm6
|
|
movaps XMMWORD[144+rsp],xmm7
|
|
movaps XMMWORD[160+rsp],xmm8
|
|
movaps XMMWORD[176+rsp],xmm9
|
|
movaps XMMWORD[192+rsp],xmm10
|
|
movaps XMMWORD[208+rsp],xmm11
|
|
movaps XMMWORD[224+rsp],xmm12
|
|
movaps XMMWORD[240+rsp],xmm13
|
|
movaps XMMWORD[256+rsp],xmm14
|
|
movaps XMMWORD[272+rsp],xmm15
|
|
$L$prologue_avx2:
|
|
vzeroall
|
|
|
|
mov r13,rdi
|
|
vpinsrq xmm15,xmm15,rsi,1
|
|
lea rdi,[128+rcx]
|
|
lea r12,[((K256+544))]
|
|
mov r14d,DWORD[((240-128))+rdi]
|
|
mov r15,r9
|
|
mov rsi,r10
|
|
vmovdqu xmm8,XMMWORD[r8]
|
|
lea r14,[((-9))+r14]
|
|
|
|
vmovdqa xmm14,XMMWORD[r14*8+r12]
|
|
vmovdqa xmm13,XMMWORD[16+r14*8+r12]
|
|
vmovdqa xmm12,XMMWORD[32+r14*8+r12]
|
|
|
|
sub r13,-16*4
|
|
mov eax,DWORD[r15]
|
|
lea r12,[r13*1+rsi]
|
|
mov ebx,DWORD[4+r15]
|
|
cmp r13,rdx
|
|
mov ecx,DWORD[8+r15]
|
|
cmove r12,rsp
|
|
mov edx,DWORD[12+r15]
|
|
mov r8d,DWORD[16+r15]
|
|
mov r9d,DWORD[20+r15]
|
|
mov r10d,DWORD[24+r15]
|
|
mov r11d,DWORD[28+r15]
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
jmp NEAR $L$oop_avx2
|
|
ALIGN 16
|
|
$L$oop_avx2:
|
|
vmovdqa ymm7,YMMWORD[((K256+512))]
|
|
vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]
|
|
vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]
|
|
vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]
|
|
vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]
|
|
|
|
vinserti128 ymm0,ymm0,XMMWORD[r12],1
|
|
vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
|
|
vpshufb ymm0,ymm0,ymm7
|
|
vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
|
|
vpshufb ymm1,ymm1,ymm7
|
|
vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
|
|
|
|
lea rbp,[K256]
|
|
vpshufb ymm2,ymm2,ymm7
|
|
lea r13,[((-64))+r13]
|
|
vpaddd ymm4,ymm0,YMMWORD[rbp]
|
|
vpshufb ymm3,ymm3,ymm7
|
|
vpaddd ymm5,ymm1,YMMWORD[32+rbp]
|
|
vpaddd ymm6,ymm2,YMMWORD[64+rbp]
|
|
vpaddd ymm7,ymm3,YMMWORD[96+rbp]
|
|
vmovdqa YMMWORD[rsp],ymm4
|
|
xor r14d,r14d
|
|
vmovdqa YMMWORD[32+rsp],ymm5
|
|
lea rsp,[((-64))+rsp]
|
|
mov esi,ebx
|
|
vmovdqa YMMWORD[rsp],ymm6
|
|
xor esi,ecx
|
|
vmovdqa YMMWORD[32+rsp],ymm7
|
|
mov r12d,r9d
|
|
sub rbp,-16*2*4
|
|
jmp NEAR $L$avx2_00_47
|
|
|
|
ALIGN 16
|
|
$L$avx2_00_47:
|
|
vmovdqu xmm9,XMMWORD[r13]
|
|
vpinsrq xmm15,xmm15,r13,0
|
|
lea rsp,[((-64))+rsp]
|
|
vpalignr ymm4,ymm1,ymm0,4
|
|
add r11d,DWORD[((0+128))+rsp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
vpalignr ymm7,ymm3,ymm2,4
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
vpsrld ymm6,ymm4,7
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
vpaddd ymm0,ymm0,ymm7
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
vpsrld ymm7,ymm4,3
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
vpslld ymm5,ymm4,14
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
vpxor ymm4,ymm7,ymm6
|
|
and esi,r15d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
vpshufd ymm7,ymm3,250
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
vpsrld ymm6,ymm6,11
|
|
add r10d,DWORD[((4+128))+rsp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
vpslld ymm5,ymm5,11
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
vpxor ymm4,ymm4,ymm6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
vpsrld ymm6,ymm7,10
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
vpsrlq ymm7,ymm7,17
|
|
and r15d,esi
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
vpaddd ymm0,ymm0,ymm4
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
vpxor ymm6,ymm6,ymm7
|
|
add r9d,DWORD[((8+128))+rsp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
vpsrlq ymm7,ymm7,2
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
vpxor ymm6,ymm6,ymm7
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
vpshufd ymm6,ymm6,132
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
vpsrldq ymm6,ymm6,8
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
vpaddd ymm0,ymm0,ymm6
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
vpshufd ymm7,ymm0,80
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
vpsrld ymm6,ymm7,10
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
vpsrlq ymm7,ymm7,17
|
|
add r8d,DWORD[((12+128))+rsp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
vpxor ymm6,ymm6,ymm7
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
vpsrlq ymm7,ymm7,2
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
vpxor ymm6,ymm6,ymm7
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
vpshufd ymm6,ymm6,232
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
vpslldq ymm6,ymm6,8
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
vpaddd ymm0,ymm0,ymm6
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
vpaddd ymm6,ymm0,YMMWORD[rbp]
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
vmovdqa YMMWORD[rsp],ymm6
|
|
vpalignr ymm4,ymm2,ymm1,4
|
|
add edx,DWORD[((32+128))+rsp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
vpalignr ymm7,ymm0,ymm3,4
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
vpsrld ymm6,ymm4,7
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
vpaddd ymm1,ymm1,ymm7
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
vpsrld ymm7,ymm4,3
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
vpslld ymm5,ymm4,14
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
vpxor ymm4,ymm7,ymm6
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
vpshufd ymm7,ymm0,250
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
vpsrld ymm6,ymm6,11
|
|
add ecx,DWORD[((36+128))+rsp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
vpslld ymm5,ymm5,11
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
vpxor ymm4,ymm4,ymm6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
vpsrld ymm6,ymm7,10
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
vpsrlq ymm7,ymm7,17
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
vpaddd ymm1,ymm1,ymm4
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
vpxor ymm6,ymm6,ymm7
|
|
add ebx,DWORD[((40+128))+rsp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
vpsrlq ymm7,ymm7,2
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
vpxor ymm6,ymm6,ymm7
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
vpshufd ymm6,ymm6,132
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
vpsrldq ymm6,ymm6,8
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
vpaddd ymm1,ymm1,ymm6
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
vpshufd ymm7,ymm1,80
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
vpsrld ymm6,ymm7,10
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
vpsrlq ymm7,ymm7,17
|
|
add eax,DWORD[((44+128))+rsp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
vpxor ymm6,ymm6,ymm7
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
vpsrlq ymm7,ymm7,2
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
vpxor ymm6,ymm6,ymm7
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
vpshufd ymm6,ymm6,232
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
vpslldq ymm6,ymm6,8
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
vpaddd ymm1,ymm1,ymm6
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
vpaddd ymm6,ymm1,YMMWORD[32+rbp]
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
vmovdqa YMMWORD[32+rsp],ymm6
|
|
lea rsp,[((-64))+rsp]
|
|
vpalignr ymm4,ymm3,ymm2,4
|
|
add r11d,DWORD[((0+128))+rsp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
vpalignr ymm7,ymm1,ymm0,4
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
vpsrld ymm6,ymm4,7
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
vpaddd ymm2,ymm2,ymm7
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
vpsrld ymm7,ymm4,3
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
vpslld ymm5,ymm4,14
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
vpxor ymm4,ymm7,ymm6
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
vpshufd ymm7,ymm1,250
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
vpsrld ymm6,ymm6,11
|
|
add r10d,DWORD[((4+128))+rsp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
vpslld ymm5,ymm5,11
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
vpxor ymm4,ymm4,ymm6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
vpsrld ymm6,ymm7,10
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
vpsrlq ymm7,ymm7,17
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
vpaddd ymm2,ymm2,ymm4
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
vpxor ymm6,ymm6,ymm7
|
|
add r9d,DWORD[((8+128))+rsp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
vpsrlq ymm7,ymm7,2
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
vpxor ymm6,ymm6,ymm7
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
vpshufd ymm6,ymm6,132
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
vpsrldq ymm6,ymm6,8
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
vpaddd ymm2,ymm2,ymm6
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
vpshufd ymm7,ymm2,80
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
vpsrld ymm6,ymm7,10
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
vpsrlq ymm7,ymm7,17
|
|
add r8d,DWORD[((12+128))+rsp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
vpxor ymm6,ymm6,ymm7
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
vpsrlq ymm7,ymm7,2
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
vpxor ymm6,ymm6,ymm7
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
vpshufd ymm6,ymm6,232
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
vpslldq ymm6,ymm6,8
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
vpaddd ymm2,ymm2,ymm6
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
vpaddd ymm6,ymm2,YMMWORD[64+rbp]
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
vmovdqa YMMWORD[rsp],ymm6
|
|
vpalignr ymm4,ymm0,ymm3,4
|
|
add edx,DWORD[((32+128))+rsp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
vpalignr ymm7,ymm2,ymm1,4
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
vpsrld ymm6,ymm4,7
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
vpaddd ymm3,ymm3,ymm7
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
vpsrld ymm7,ymm4,3
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
vpslld ymm5,ymm4,14
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
vpxor ymm4,ymm7,ymm6
|
|
and esi,r15d
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
vpshufd ymm7,ymm2,250
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
vpsrld ymm6,ymm6,11
|
|
add ecx,DWORD[((36+128))+rsp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
vpslld ymm5,ymm5,11
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
vpxor ymm4,ymm4,ymm6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
vpsrld ymm6,ymm7,10
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
vpsrlq ymm7,ymm7,17
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
vpaddd ymm3,ymm3,ymm4
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
vpxor ymm6,ymm6,ymm7
|
|
add ebx,DWORD[((40+128))+rsp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
vpsrlq ymm7,ymm7,2
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
vpxor ymm6,ymm6,ymm7
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
vpshufd ymm6,ymm6,132
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
vpsrldq ymm6,ymm6,8
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
vpaddd ymm3,ymm3,ymm6
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
vpshufd ymm7,ymm3,80
|
|
and esi,r15d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
vpsrld ymm6,ymm7,10
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
vpsrlq ymm7,ymm7,17
|
|
add eax,DWORD[((44+128))+rsp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
vpxor ymm6,ymm6,ymm7
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
vpsrlq ymm7,ymm7,2
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
vpxor ymm6,ymm6,ymm7
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
vpshufd ymm6,ymm6,232
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
vpslldq ymm6,ymm6,8
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
vpaddd ymm3,ymm3,ymm6
|
|
and r15d,esi
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
vpaddd ymm6,ymm3,YMMWORD[96+rbp]
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
vmovdqa YMMWORD[32+rsp],ymm6
|
|
vmovq r13,xmm15
|
|
vpextrq r15,xmm15,1
|
|
vpand xmm11,xmm11,xmm14
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r13*1+r15],xmm8
|
|
lea r13,[16+r13]
|
|
lea rbp,[128+rbp]
|
|
cmp BYTE[3+rbp],0
|
|
jne NEAR $L$avx2_00_47
|
|
vmovdqu xmm9,XMMWORD[r13]
|
|
vpinsrq xmm15,xmm15,r13,0
|
|
add r11d,DWORD[((0+64))+rsp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
and esi,r15d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
add r10d,DWORD[((4+64))+rsp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
and r15d,esi
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
add r9d,DWORD[((8+64))+rsp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
add r8d,DWORD[((12+64))+rsp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
add edx,DWORD[((32+64))+rsp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
add ecx,DWORD[((36+64))+rsp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
add ebx,DWORD[((40+64))+rsp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
add eax,DWORD[((44+64))+rsp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
add r11d,DWORD[rsp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
add r10d,DWORD[4+rsp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
add r9d,DWORD[8+rsp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
add r8d,DWORD[12+rsp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
add edx,DWORD[32+rsp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
and esi,r15d
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
add ecx,DWORD[36+rsp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
add ebx,DWORD[40+rsp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
and esi,r15d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
add eax,DWORD[44+rsp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
and r15d,esi
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
vpextrq r12,xmm15,1
|
|
vmovq r13,xmm15
|
|
mov r15,QWORD[552+rsp]
|
|
add eax,r14d
|
|
lea rbp,[448+rsp]
|
|
|
|
vpand xmm11,xmm11,xmm14
|
|
vpor xmm8,xmm8,xmm11
|
|
vmovdqu XMMWORD[r13*1+r12],xmm8
|
|
lea r13,[16+r13]
|
|
|
|
add eax,DWORD[r15]
|
|
add ebx,DWORD[4+r15]
|
|
add ecx,DWORD[8+r15]
|
|
add edx,DWORD[12+r15]
|
|
add r8d,DWORD[16+r15]
|
|
add r9d,DWORD[20+r15]
|
|
add r10d,DWORD[24+r15]
|
|
add r11d,DWORD[28+r15]
|
|
|
|
mov DWORD[r15],eax
|
|
mov DWORD[4+r15],ebx
|
|
mov DWORD[8+r15],ecx
|
|
mov DWORD[12+r15],edx
|
|
mov DWORD[16+r15],r8d
|
|
mov DWORD[20+r15],r9d
|
|
mov DWORD[24+r15],r10d
|
|
mov DWORD[28+r15],r11d
|
|
|
|
cmp r13,QWORD[80+rbp]
|
|
je NEAR $L$done_avx2
|
|
|
|
xor r14d,r14d
|
|
mov esi,ebx
|
|
mov r12d,r9d
|
|
xor esi,ecx
|
|
jmp NEAR $L$ower_avx2
|
|
ALIGN 16
|
|
$L$ower_avx2:
|
|
vmovdqu xmm9,XMMWORD[r13]
|
|
vpinsrq xmm15,xmm15,r13,0
|
|
add r11d,DWORD[((0+16))+rbp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
and esi,r15d
|
|
vpxor xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
add r10d,DWORD[((4+16))+rbp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
and r15d,esi
|
|
vpxor xmm9,xmm9,xmm8
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
add r9d,DWORD[((8+16))+rbp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
add r8d,DWORD[((12+16))+rbp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
add edx,DWORD[((32+16))+rbp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
add ecx,DWORD[((36+16))+rbp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
add ebx,DWORD[((40+16))+rbp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
add eax,DWORD[((44+16))+rbp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
lea rbp,[((-64))+rbp]
|
|
add r11d,DWORD[((0+16))+rbp]
|
|
and r12d,r8d
|
|
rorx r13d,r8d,25
|
|
rorx r15d,r8d,11
|
|
lea eax,[r14*1+rax]
|
|
lea r11d,[r12*1+r11]
|
|
andn r12d,r8d,r10d
|
|
xor r13d,r15d
|
|
rorx r14d,r8d,6
|
|
lea r11d,[r12*1+r11]
|
|
xor r13d,r14d
|
|
mov r15d,eax
|
|
rorx r12d,eax,22
|
|
lea r11d,[r13*1+r11]
|
|
xor r15d,ebx
|
|
rorx r14d,eax,13
|
|
rorx r13d,eax,2
|
|
lea edx,[r11*1+rdx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,ebx
|
|
xor r14d,r13d
|
|
lea r11d,[rsi*1+r11]
|
|
mov r12d,r8d
|
|
add r10d,DWORD[((4+16))+rbp]
|
|
and r12d,edx
|
|
rorx r13d,edx,25
|
|
rorx esi,edx,11
|
|
lea r11d,[r14*1+r11]
|
|
lea r10d,[r12*1+r10]
|
|
andn r12d,edx,r9d
|
|
xor r13d,esi
|
|
rorx r14d,edx,6
|
|
lea r10d,[r12*1+r10]
|
|
xor r13d,r14d
|
|
mov esi,r11d
|
|
rorx r12d,r11d,22
|
|
lea r10d,[r13*1+r10]
|
|
xor esi,eax
|
|
rorx r14d,r11d,13
|
|
rorx r13d,r11d,2
|
|
lea ecx,[r10*1+rcx]
|
|
and r15d,esi
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,eax
|
|
xor r14d,r13d
|
|
lea r10d,[r15*1+r10]
|
|
mov r12d,edx
|
|
add r9d,DWORD[((8+16))+rbp]
|
|
and r12d,ecx
|
|
rorx r13d,ecx,25
|
|
rorx r15d,ecx,11
|
|
lea r10d,[r14*1+r10]
|
|
lea r9d,[r12*1+r9]
|
|
andn r12d,ecx,r8d
|
|
xor r13d,r15d
|
|
rorx r14d,ecx,6
|
|
lea r9d,[r12*1+r9]
|
|
xor r13d,r14d
|
|
mov r15d,r10d
|
|
rorx r12d,r10d,22
|
|
lea r9d,[r13*1+r9]
|
|
xor r15d,r11d
|
|
rorx r14d,r10d,13
|
|
rorx r13d,r10d,2
|
|
lea ebx,[r9*1+rbx]
|
|
and esi,r15d
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r11d
|
|
xor r14d,r13d
|
|
lea r9d,[rsi*1+r9]
|
|
mov r12d,ecx
|
|
add r8d,DWORD[((12+16))+rbp]
|
|
and r12d,ebx
|
|
rorx r13d,ebx,25
|
|
rorx esi,ebx,11
|
|
lea r9d,[r14*1+r9]
|
|
lea r8d,[r12*1+r8]
|
|
andn r12d,ebx,edx
|
|
xor r13d,esi
|
|
rorx r14d,ebx,6
|
|
lea r8d,[r12*1+r8]
|
|
xor r13d,r14d
|
|
mov esi,r9d
|
|
rorx r12d,r9d,22
|
|
lea r8d,[r13*1+r8]
|
|
xor esi,r10d
|
|
rorx r14d,r9d,13
|
|
rorx r13d,r9d,2
|
|
lea eax,[r8*1+rax]
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r10d
|
|
xor r14d,r13d
|
|
lea r8d,[r15*1+r8]
|
|
mov r12d,ebx
|
|
add edx,DWORD[((32+16))+rbp]
|
|
and r12d,eax
|
|
rorx r13d,eax,25
|
|
rorx r15d,eax,11
|
|
lea r8d,[r14*1+r8]
|
|
lea edx,[r12*1+rdx]
|
|
andn r12d,eax,ecx
|
|
xor r13d,r15d
|
|
rorx r14d,eax,6
|
|
lea edx,[r12*1+rdx]
|
|
xor r13d,r14d
|
|
mov r15d,r8d
|
|
rorx r12d,r8d,22
|
|
lea edx,[r13*1+rdx]
|
|
xor r15d,r9d
|
|
rorx r14d,r8d,13
|
|
rorx r13d,r8d,2
|
|
lea r11d,[rdx*1+r11]
|
|
and esi,r15d
|
|
vpand xmm8,xmm11,xmm12
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,r9d
|
|
xor r14d,r13d
|
|
lea edx,[rsi*1+rdx]
|
|
mov r12d,eax
|
|
add ecx,DWORD[((36+16))+rbp]
|
|
and r12d,r11d
|
|
rorx r13d,r11d,25
|
|
rorx esi,r11d,11
|
|
lea edx,[r14*1+rdx]
|
|
lea ecx,[r12*1+rcx]
|
|
andn r12d,r11d,ebx
|
|
xor r13d,esi
|
|
rorx r14d,r11d,6
|
|
lea ecx,[r12*1+rcx]
|
|
xor r13d,r14d
|
|
mov esi,edx
|
|
rorx r12d,edx,22
|
|
lea ecx,[r13*1+rcx]
|
|
xor esi,r8d
|
|
rorx r14d,edx,13
|
|
rorx r13d,edx,2
|
|
lea r10d,[rcx*1+r10]
|
|
and r15d,esi
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,r8d
|
|
xor r14d,r13d
|
|
lea ecx,[r15*1+rcx]
|
|
mov r12d,r11d
|
|
add ebx,DWORD[((40+16))+rbp]
|
|
and r12d,r10d
|
|
rorx r13d,r10d,25
|
|
rorx r15d,r10d,11
|
|
lea ecx,[r14*1+rcx]
|
|
lea ebx,[r12*1+rbx]
|
|
andn r12d,r10d,eax
|
|
xor r13d,r15d
|
|
rorx r14d,r10d,6
|
|
lea ebx,[r12*1+rbx]
|
|
xor r13d,r14d
|
|
mov r15d,ecx
|
|
rorx r12d,ecx,22
|
|
lea ebx,[r13*1+rbx]
|
|
xor r15d,edx
|
|
rorx r14d,ecx,13
|
|
rorx r13d,ecx,2
|
|
lea r9d,[rbx*1+r9]
|
|
and esi,r15d
|
|
vpand xmm11,xmm11,xmm13
|
|
vaesenc xmm9,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
|
|
xor r14d,r12d
|
|
xor esi,edx
|
|
xor r14d,r13d
|
|
lea ebx,[rsi*1+rbx]
|
|
mov r12d,r10d
|
|
add eax,DWORD[((44+16))+rbp]
|
|
and r12d,r9d
|
|
rorx r13d,r9d,25
|
|
rorx esi,r9d,11
|
|
lea ebx,[r14*1+rbx]
|
|
lea eax,[r12*1+rax]
|
|
andn r12d,r9d,r11d
|
|
xor r13d,esi
|
|
rorx r14d,r9d,6
|
|
lea eax,[r12*1+rax]
|
|
xor r13d,r14d
|
|
mov esi,ebx
|
|
rorx r12d,ebx,22
|
|
lea eax,[r13*1+rax]
|
|
xor esi,ecx
|
|
rorx r14d,ebx,13
|
|
rorx r13d,ebx,2
|
|
lea r8d,[rax*1+r8]
|
|
and r15d,esi
|
|
vpor xmm8,xmm8,xmm11
|
|
vaesenclast xmm11,xmm9,xmm10
|
|
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
|
|
xor r14d,r12d
|
|
xor r15d,ecx
|
|
xor r14d,r13d
|
|
lea eax,[r15*1+rax]
|
|
mov r12d,r9d
|
|
vmovq r13,xmm15
|
|
vpextrq r15,xmm15,1
|
|
vpand xmm11,xmm11,xmm14
|
|
vpor xmm8,xmm8,xmm11
|
|
lea rbp,[((-64))+rbp]
|
|
vmovdqu XMMWORD[r13*1+r15],xmm8
|
|
lea r13,[16+r13]
|
|
cmp rbp,rsp
|
|
jae NEAR $L$ower_avx2
|
|
|
|
mov r15,QWORD[552+rsp]
|
|
lea r13,[64+r13]
|
|
mov rsi,QWORD[560+rsp]
|
|
add eax,r14d
|
|
lea rsp,[448+rsp]
|
|
|
|
add eax,DWORD[r15]
|
|
add ebx,DWORD[4+r15]
|
|
add ecx,DWORD[8+r15]
|
|
add edx,DWORD[12+r15]
|
|
add r8d,DWORD[16+r15]
|
|
add r9d,DWORD[20+r15]
|
|
add r10d,DWORD[24+r15]
|
|
lea r12,[r13*1+rsi]
|
|
add r11d,DWORD[28+r15]
|
|
|
|
cmp r13,QWORD[((64+16))+rsp]
|
|
|
|
mov DWORD[r15],eax
|
|
cmove r12,rsp
|
|
mov DWORD[4+r15],ebx
|
|
mov DWORD[8+r15],ecx
|
|
mov DWORD[12+r15],edx
|
|
mov DWORD[16+r15],r8d
|
|
mov DWORD[20+r15],r9d
|
|
mov DWORD[24+r15],r10d
|
|
mov DWORD[28+r15],r11d
|
|
|
|
jbe NEAR $L$oop_avx2
|
|
lea rbp,[rsp]
|
|
|
|
|
|
|
|
|
|
$L$done_avx2:
|
|
mov r8,QWORD[((64+32))+rbp]
|
|
mov rsi,QWORD[((64+56))+rbp]
|
|
|
|
vmovdqu XMMWORD[r8],xmm8
|
|
vzeroall
|
|
movaps xmm6,XMMWORD[128+rbp]
|
|
movaps xmm7,XMMWORD[144+rbp]
|
|
movaps xmm8,XMMWORD[160+rbp]
|
|
movaps xmm9,XMMWORD[176+rbp]
|
|
movaps xmm10,XMMWORD[192+rbp]
|
|
movaps xmm11,XMMWORD[208+rbp]
|
|
movaps xmm12,XMMWORD[224+rbp]
|
|
movaps xmm13,XMMWORD[240+rbp]
|
|
movaps xmm14,XMMWORD[256+rbp]
|
|
movaps xmm15,XMMWORD[272+rbp]
|
|
mov r15,QWORD[((-48))+rsi]
|
|
|
|
mov r14,QWORD[((-40))+rsi]
|
|
|
|
mov r13,QWORD[((-32))+rsi]
|
|
|
|
mov r12,QWORD[((-24))+rsi]
|
|
|
|
mov rbp,QWORD[((-16))+rsi]
|
|
|
|
mov rbx,QWORD[((-8))+rsi]
|
|
|
|
lea rsp,[rsi]
|
|
|
|
$L$epilogue_avx2:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_cbc_sha256_enc_avx2:
|
|
|
|
ALIGN 32
|
|
aesni_cbc_sha256_enc_shaext:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_cbc_sha256_enc_shaext:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
mov r10,QWORD[56+rsp]
|
|
lea rsp,[((-168))+rsp]
|
|
movaps XMMWORD[(-8-160)+rax],xmm6
|
|
movaps XMMWORD[(-8-144)+rax],xmm7
|
|
movaps XMMWORD[(-8-128)+rax],xmm8
|
|
movaps XMMWORD[(-8-112)+rax],xmm9
|
|
movaps XMMWORD[(-8-96)+rax],xmm10
|
|
movaps XMMWORD[(-8-80)+rax],xmm11
|
|
movaps XMMWORD[(-8-64)+rax],xmm12
|
|
movaps XMMWORD[(-8-48)+rax],xmm13
|
|
movaps XMMWORD[(-8-32)+rax],xmm14
|
|
movaps XMMWORD[(-8-16)+rax],xmm15
|
|
$L$prologue_shaext:
|
|
lea rax,[((K256+128))]
|
|
movdqu xmm1,XMMWORD[r9]
|
|
movdqu xmm2,XMMWORD[16+r9]
|
|
movdqa xmm3,XMMWORD[((512-128))+rax]
|
|
|
|
mov r11d,DWORD[240+rcx]
|
|
sub rsi,rdi
|
|
movups xmm15,XMMWORD[rcx]
|
|
movups xmm6,XMMWORD[r8]
|
|
movups xmm4,XMMWORD[16+rcx]
|
|
lea rcx,[112+rcx]
|
|
|
|
pshufd xmm0,xmm1,0x1b
|
|
pshufd xmm1,xmm1,0xb1
|
|
pshufd xmm2,xmm2,0x1b
|
|
movdqa xmm7,xmm3
|
|
DB 102,15,58,15,202,8
|
|
punpcklqdq xmm2,xmm0
|
|
|
|
jmp NEAR $L$oop_shaext
|
|
|
|
ALIGN 16
|
|
$L$oop_shaext:
|
|
movdqu xmm10,XMMWORD[r10]
|
|
movdqu xmm11,XMMWORD[16+r10]
|
|
movdqu xmm12,XMMWORD[32+r10]
|
|
DB 102,68,15,56,0,211
|
|
movdqu xmm13,XMMWORD[48+r10]
|
|
|
|
movdqa xmm0,XMMWORD[((0-128))+rax]
|
|
paddd xmm0,xmm10
|
|
DB 102,68,15,56,0,219
|
|
movdqa xmm9,xmm2
|
|
movdqa xmm8,xmm1
|
|
movups xmm14,XMMWORD[rdi]
|
|
xorps xmm14,xmm15
|
|
xorps xmm6,xmm14
|
|
movups xmm5,XMMWORD[((-80))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movups xmm4,XMMWORD[((-64))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,202
|
|
|
|
movdqa xmm0,XMMWORD[((32-128))+rax]
|
|
paddd xmm0,xmm11
|
|
DB 102,68,15,56,0,227
|
|
lea r10,[64+r10]
|
|
movups xmm5,XMMWORD[((-48))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movups xmm4,XMMWORD[((-32))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,202
|
|
|
|
movdqa xmm0,XMMWORD[((64-128))+rax]
|
|
paddd xmm0,xmm12
|
|
DB 102,68,15,56,0,235
|
|
DB 69,15,56,204,211
|
|
movups xmm5,XMMWORD[((-16))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm13
|
|
DB 102,65,15,58,15,220,4
|
|
paddd xmm10,xmm3
|
|
movups xmm4,XMMWORD[rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,202
|
|
|
|
movdqa xmm0,XMMWORD[((96-128))+rax]
|
|
paddd xmm0,xmm13
|
|
DB 69,15,56,205,213
|
|
DB 69,15,56,204,220
|
|
movups xmm5,XMMWORD[16+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movups xmm4,XMMWORD[32+rcx]
|
|
aesenc xmm6,xmm5
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,221,4
|
|
paddd xmm11,xmm3
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((128-128))+rax]
|
|
paddd xmm0,xmm10
|
|
DB 69,15,56,205,218
|
|
DB 69,15,56,204,229
|
|
movups xmm5,XMMWORD[48+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
paddd xmm12,xmm3
|
|
cmp r11d,11
|
|
jb NEAR $L$aesenclast1
|
|
movups xmm4,XMMWORD[64+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[80+rcx]
|
|
aesenc xmm6,xmm4
|
|
je NEAR $L$aesenclast1
|
|
movups xmm4,XMMWORD[96+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[112+rcx]
|
|
aesenc xmm6,xmm4
|
|
$L$aesenclast1:
|
|
aesenclast xmm6,xmm5
|
|
movups xmm4,XMMWORD[((16-112))+rcx]
|
|
nop
|
|
DB 15,56,203,202
|
|
movups xmm14,XMMWORD[16+rdi]
|
|
xorps xmm14,xmm15
|
|
movups XMMWORD[rdi*1+rsi],xmm6
|
|
xorps xmm6,xmm14
|
|
movups xmm5,XMMWORD[((-80))+rcx]
|
|
aesenc xmm6,xmm4
|
|
movdqa xmm0,XMMWORD[((160-128))+rax]
|
|
paddd xmm0,xmm11
|
|
DB 69,15,56,205,227
|
|
DB 69,15,56,204,234
|
|
movups xmm4,XMMWORD[((-64))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm12
|
|
DB 102,65,15,58,15,219,4
|
|
paddd xmm13,xmm3
|
|
movups xmm5,XMMWORD[((-48))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((192-128))+rax]
|
|
paddd xmm0,xmm12
|
|
DB 69,15,56,205,236
|
|
DB 69,15,56,204,211
|
|
movups xmm4,XMMWORD[((-32))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm13
|
|
DB 102,65,15,58,15,220,4
|
|
paddd xmm10,xmm3
|
|
movups xmm5,XMMWORD[((-16))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((224-128))+rax]
|
|
paddd xmm0,xmm13
|
|
DB 69,15,56,205,213
|
|
DB 69,15,56,204,220
|
|
movups xmm4,XMMWORD[rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,221,4
|
|
paddd xmm11,xmm3
|
|
movups xmm5,XMMWORD[16+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((256-128))+rax]
|
|
paddd xmm0,xmm10
|
|
DB 69,15,56,205,218
|
|
DB 69,15,56,204,229
|
|
movups xmm4,XMMWORD[32+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
paddd xmm12,xmm3
|
|
movups xmm5,XMMWORD[48+rcx]
|
|
aesenc xmm6,xmm4
|
|
cmp r11d,11
|
|
jb NEAR $L$aesenclast2
|
|
movups xmm4,XMMWORD[64+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[80+rcx]
|
|
aesenc xmm6,xmm4
|
|
je NEAR $L$aesenclast2
|
|
movups xmm4,XMMWORD[96+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[112+rcx]
|
|
aesenc xmm6,xmm4
|
|
$L$aesenclast2:
|
|
aesenclast xmm6,xmm5
|
|
movups xmm4,XMMWORD[((16-112))+rcx]
|
|
nop
|
|
DB 15,56,203,202
|
|
movups xmm14,XMMWORD[32+rdi]
|
|
xorps xmm14,xmm15
|
|
movups XMMWORD[16+rdi*1+rsi],xmm6
|
|
xorps xmm6,xmm14
|
|
movups xmm5,XMMWORD[((-80))+rcx]
|
|
aesenc xmm6,xmm4
|
|
movdqa xmm0,XMMWORD[((288-128))+rax]
|
|
paddd xmm0,xmm11
|
|
DB 69,15,56,205,227
|
|
DB 69,15,56,204,234
|
|
movups xmm4,XMMWORD[((-64))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm12
|
|
DB 102,65,15,58,15,219,4
|
|
paddd xmm13,xmm3
|
|
movups xmm5,XMMWORD[((-48))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((320-128))+rax]
|
|
paddd xmm0,xmm12
|
|
DB 69,15,56,205,236
|
|
DB 69,15,56,204,211
|
|
movups xmm4,XMMWORD[((-32))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm13
|
|
DB 102,65,15,58,15,220,4
|
|
paddd xmm10,xmm3
|
|
movups xmm5,XMMWORD[((-16))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((352-128))+rax]
|
|
paddd xmm0,xmm13
|
|
DB 69,15,56,205,213
|
|
DB 69,15,56,204,220
|
|
movups xmm4,XMMWORD[rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,221,4
|
|
paddd xmm11,xmm3
|
|
movups xmm5,XMMWORD[16+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((384-128))+rax]
|
|
paddd xmm0,xmm10
|
|
DB 69,15,56,205,218
|
|
DB 69,15,56,204,229
|
|
movups xmm4,XMMWORD[32+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
paddd xmm12,xmm3
|
|
movups xmm5,XMMWORD[48+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
movdqa xmm0,XMMWORD[((416-128))+rax]
|
|
paddd xmm0,xmm11
|
|
DB 69,15,56,205,227
|
|
DB 69,15,56,204,234
|
|
cmp r11d,11
|
|
jb NEAR $L$aesenclast3
|
|
movups xmm4,XMMWORD[64+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[80+rcx]
|
|
aesenc xmm6,xmm4
|
|
je NEAR $L$aesenclast3
|
|
movups xmm4,XMMWORD[96+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[112+rcx]
|
|
aesenc xmm6,xmm4
|
|
$L$aesenclast3:
|
|
aesenclast xmm6,xmm5
|
|
movups xmm4,XMMWORD[((16-112))+rcx]
|
|
nop
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movdqa xmm3,xmm12
|
|
DB 102,65,15,58,15,219,4
|
|
paddd xmm13,xmm3
|
|
movups xmm14,XMMWORD[48+rdi]
|
|
xorps xmm14,xmm15
|
|
movups XMMWORD[32+rdi*1+rsi],xmm6
|
|
xorps xmm6,xmm14
|
|
movups xmm5,XMMWORD[((-80))+rcx]
|
|
aesenc xmm6,xmm4
|
|
movups xmm4,XMMWORD[((-64))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,202
|
|
|
|
movdqa xmm0,XMMWORD[((448-128))+rax]
|
|
paddd xmm0,xmm12
|
|
DB 69,15,56,205,236
|
|
movdqa xmm3,xmm7
|
|
movups xmm5,XMMWORD[((-48))+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movups xmm4,XMMWORD[((-32))+rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,202
|
|
|
|
movdqa xmm0,XMMWORD[((480-128))+rax]
|
|
paddd xmm0,xmm13
|
|
movups xmm5,XMMWORD[((-16))+rcx]
|
|
aesenc xmm6,xmm4
|
|
movups xmm4,XMMWORD[rcx]
|
|
aesenc xmm6,xmm5
|
|
DB 15,56,203,209
|
|
pshufd xmm0,xmm0,0x0e
|
|
movups xmm5,XMMWORD[16+rcx]
|
|
aesenc xmm6,xmm4
|
|
DB 15,56,203,202
|
|
|
|
movups xmm4,XMMWORD[32+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[48+rcx]
|
|
aesenc xmm6,xmm4
|
|
cmp r11d,11
|
|
jb NEAR $L$aesenclast4
|
|
movups xmm4,XMMWORD[64+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[80+rcx]
|
|
aesenc xmm6,xmm4
|
|
je NEAR $L$aesenclast4
|
|
movups xmm4,XMMWORD[96+rcx]
|
|
aesenc xmm6,xmm5
|
|
movups xmm5,XMMWORD[112+rcx]
|
|
aesenc xmm6,xmm4
|
|
$L$aesenclast4:
|
|
aesenclast xmm6,xmm5
|
|
movups xmm4,XMMWORD[((16-112))+rcx]
|
|
nop
|
|
|
|
paddd xmm2,xmm9
|
|
paddd xmm1,xmm8
|
|
|
|
dec rdx
|
|
movups XMMWORD[48+rdi*1+rsi],xmm6
|
|
lea rdi,[64+rdi]
|
|
jnz NEAR $L$oop_shaext
|
|
|
|
pshufd xmm2,xmm2,0xb1
|
|
pshufd xmm3,xmm1,0x1b
|
|
pshufd xmm1,xmm1,0xb1
|
|
punpckhqdq xmm1,xmm2
|
|
DB 102,15,58,15,211,8
|
|
|
|
movups XMMWORD[r8],xmm6
|
|
movdqu XMMWORD[r9],xmm1
|
|
movdqu XMMWORD[16+r9],xmm2
|
|
movaps xmm6,XMMWORD[rsp]
|
|
movaps xmm7,XMMWORD[16+rsp]
|
|
movaps xmm8,XMMWORD[32+rsp]
|
|
movaps xmm9,XMMWORD[48+rsp]
|
|
movaps xmm10,XMMWORD[64+rsp]
|
|
movaps xmm11,XMMWORD[80+rsp]
|
|
movaps xmm12,XMMWORD[96+rsp]
|
|
movaps xmm13,XMMWORD[112+rsp]
|
|
movaps xmm14,XMMWORD[128+rsp]
|
|
movaps xmm15,XMMWORD[144+rsp]
|
|
lea rsp,[((8+160))+rsp]
|
|
$L$epilogue_shaext:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_cbc_sha256_enc_shaext:
|
|
EXTERN __imp_RtlVirtualUnwind
|
|
|
|
ALIGN 16
|
|
se_handler:
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD[120+r8]
|
|
mov rbx,QWORD[248+r8]
|
|
|
|
mov rsi,QWORD[8+r9]
|
|
mov r11,QWORD[56+r9]
|
|
|
|
mov r10d,DWORD[r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jb NEAR $L$in_prologue
|
|
|
|
mov rax,QWORD[152+r8]
|
|
|
|
mov r10d,DWORD[4+r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jae NEAR $L$in_prologue
|
|
lea r10,[aesni_cbc_sha256_enc_shaext]
|
|
cmp rbx,r10
|
|
jb NEAR $L$not_in_shaext
|
|
|
|
lea rsi,[rax]
|
|
lea rdi,[512+r8]
|
|
mov ecx,20
|
|
DD 0xa548f3fc
|
|
lea rax,[168+rax]
|
|
jmp NEAR $L$in_prologue
|
|
$L$not_in_shaext:
|
|
lea r10,[$L$avx2_shortcut]
|
|
cmp rbx,r10
|
|
jb NEAR $L$not_in_avx2
|
|
|
|
and rax,-256*4
|
|
add rax,448
|
|
$L$not_in_avx2:
|
|
mov rsi,rax
|
|
mov rax,QWORD[((64+56))+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
mov rbp,QWORD[((-16))+rax]
|
|
mov r12,QWORD[((-24))+rax]
|
|
mov r13,QWORD[((-32))+rax]
|
|
mov r14,QWORD[((-40))+rax]
|
|
mov r15,QWORD[((-48))+rax]
|
|
mov QWORD[144+r8],rbx
|
|
mov QWORD[160+r8],rbp
|
|
mov QWORD[216+r8],r12
|
|
mov QWORD[224+r8],r13
|
|
mov QWORD[232+r8],r14
|
|
mov QWORD[240+r8],r15
|
|
|
|
lea rsi,[((64+64))+rsi]
|
|
lea rdi,[512+r8]
|
|
mov ecx,20
|
|
DD 0xa548f3fc
|
|
|
|
$L$in_prologue:
|
|
mov rdi,QWORD[8+rax]
|
|
mov rsi,QWORD[16+rax]
|
|
mov QWORD[152+r8],rax
|
|
mov QWORD[168+r8],rsi
|
|
mov QWORD[176+r8],rdi
|
|
|
|
mov rdi,QWORD[40+r9]
|
|
mov rsi,r8
|
|
mov ecx,154
|
|
DD 0xa548f3fc
|
|
|
|
mov rsi,r9
|
|
xor rcx,rcx
|
|
mov rdx,QWORD[8+rsi]
|
|
mov r8,QWORD[rsi]
|
|
mov r9,QWORD[16+rsi]
|
|
mov r10,QWORD[40+rsi]
|
|
lea r11,[56+rsi]
|
|
lea r12,[24+rsi]
|
|
mov QWORD[32+rsp],r10
|
|
mov QWORD[40+rsp],r11
|
|
mov QWORD[48+rsp],r12
|
|
mov QWORD[56+rsp],rcx
|
|
call QWORD[__imp_RtlVirtualUnwind]
|
|
|
|
mov eax,1
|
|
add rsp,64
|
|
popfq
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop rbp
|
|
pop rbx
|
|
pop rdi
|
|
pop rsi
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
|
|
section .pdata rdata align=4
|
|
DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase
|
|
DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase
|
|
DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase
|
|
|
|
DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase
|
|
DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase
|
|
DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase
|
|
DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
|
|
DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
|
|
DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
|
|
DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase
|
|
DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase
|
|
DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase
|
|
section .xdata rdata align=8
|
|
ALIGN 8
|
|
$L$SEH_info_aesni_cbc_sha256_enc_xop:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
|
|
|
|
$L$SEH_info_aesni_cbc_sha256_enc_avx:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
|
|
$L$SEH_info_aesni_cbc_sha256_enc_avx2:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
|
|
$L$SEH_info_aesni_cbc_sha256_enc_shaext:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
|