mirror of https://github.com/acidanthera/audk.git
553 lines
14 KiB
ArmAsm
553 lines
14 KiB
ArmAsm
# WARNING: do not edit!
|
|
# Generated from openssl/crypto/aes/asm/aesni-mb-x86_64.pl
|
|
#
|
|
# Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the OpenSSL license (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
.text
|
|
|
|
|
|
|
|
.globl aesni_multi_cbc_encrypt
|
|
.type aesni_multi_cbc_encrypt,@function
|
|
.align 32
|
|
aesni_multi_cbc_encrypt:
|
|
.cfi_startproc
|
|
movq %rsp,%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subq $48,%rsp
|
|
andq $-64,%rsp
|
|
movq %rax,16(%rsp)
|
|
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
|
|
|
|
.Lenc4x_body:
|
|
movdqu (%rsi),%xmm12
|
|
leaq 120(%rsi),%rsi
|
|
leaq 80(%rdi),%rdi
|
|
|
|
.Lenc4x_loop_grande:
|
|
movl %edx,24(%rsp)
|
|
xorl %edx,%edx
|
|
movl -64(%rdi),%ecx
|
|
movq -80(%rdi),%r8
|
|
cmpl %edx,%ecx
|
|
movq -72(%rdi),%r12
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu -56(%rdi),%xmm2
|
|
movl %ecx,32(%rsp)
|
|
cmovleq %rsp,%r8
|
|
movl -24(%rdi),%ecx
|
|
movq -40(%rdi),%r9
|
|
cmpl %edx,%ecx
|
|
movq -32(%rdi),%r13
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu -16(%rdi),%xmm3
|
|
movl %ecx,36(%rsp)
|
|
cmovleq %rsp,%r9
|
|
movl 16(%rdi),%ecx
|
|
movq 0(%rdi),%r10
|
|
cmpl %edx,%ecx
|
|
movq 8(%rdi),%r14
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu 24(%rdi),%xmm4
|
|
movl %ecx,40(%rsp)
|
|
cmovleq %rsp,%r10
|
|
movl 56(%rdi),%ecx
|
|
movq 40(%rdi),%r11
|
|
cmpl %edx,%ecx
|
|
movq 48(%rdi),%r15
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu 64(%rdi),%xmm5
|
|
movl %ecx,44(%rsp)
|
|
cmovleq %rsp,%r11
|
|
testl %edx,%edx
|
|
jz .Lenc4x_done
|
|
|
|
movups 16-120(%rsi),%xmm1
|
|
pxor %xmm12,%xmm2
|
|
movups 32-120(%rsi),%xmm0
|
|
pxor %xmm12,%xmm3
|
|
movl 240-120(%rsi),%eax
|
|
pxor %xmm12,%xmm4
|
|
movdqu (%r8),%xmm6
|
|
pxor %xmm12,%xmm5
|
|
movdqu (%r9),%xmm7
|
|
pxor %xmm6,%xmm2
|
|
movdqu (%r10),%xmm8
|
|
pxor %xmm7,%xmm3
|
|
movdqu (%r11),%xmm9
|
|
pxor %xmm8,%xmm4
|
|
pxor %xmm9,%xmm5
|
|
movdqa 32(%rsp),%xmm10
|
|
xorq %rbx,%rbx
|
|
jmp .Loop_enc4x
|
|
|
|
.align 32
|
|
.Loop_enc4x:
|
|
addq $16,%rbx
|
|
leaq 16(%rsp),%rbp
|
|
movl $1,%ecx
|
|
subq %rbx,%rbp
|
|
|
|
.byte 102,15,56,220,209
|
|
prefetcht0 31(%r8,%rbx,1)
|
|
prefetcht0 31(%r9,%rbx,1)
|
|
.byte 102,15,56,220,217
|
|
prefetcht0 31(%r10,%rbx,1)
|
|
prefetcht0 31(%r10,%rbx,1)
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
movups 48-120(%rsi),%xmm1
|
|
cmpl 32(%rsp),%ecx
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
.byte 102,15,56,220,224
|
|
cmovgeq %rbp,%r8
|
|
cmovgq %rbp,%r12
|
|
.byte 102,15,56,220,232
|
|
movups -56(%rsi),%xmm0
|
|
cmpl 36(%rsp),%ecx
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
.byte 102,15,56,220,225
|
|
cmovgeq %rbp,%r9
|
|
cmovgq %rbp,%r13
|
|
.byte 102,15,56,220,233
|
|
movups -40(%rsi),%xmm1
|
|
cmpl 40(%rsp),%ecx
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
.byte 102,15,56,220,224
|
|
cmovgeq %rbp,%r10
|
|
cmovgq %rbp,%r14
|
|
.byte 102,15,56,220,232
|
|
movups -24(%rsi),%xmm0
|
|
cmpl 44(%rsp),%ecx
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
.byte 102,15,56,220,225
|
|
cmovgeq %rbp,%r11
|
|
cmovgq %rbp,%r15
|
|
.byte 102,15,56,220,233
|
|
movups -8(%rsi),%xmm1
|
|
movdqa %xmm10,%xmm11
|
|
.byte 102,15,56,220,208
|
|
prefetcht0 15(%r12,%rbx,1)
|
|
prefetcht0 15(%r13,%rbx,1)
|
|
.byte 102,15,56,220,216
|
|
prefetcht0 15(%r14,%rbx,1)
|
|
prefetcht0 15(%r15,%rbx,1)
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
movups 128-120(%rsi),%xmm0
|
|
pxor %xmm12,%xmm12
|
|
|
|
.byte 102,15,56,220,209
|
|
pcmpgtd %xmm12,%xmm11
|
|
movdqu -120(%rsi),%xmm12
|
|
.byte 102,15,56,220,217
|
|
paddd %xmm11,%xmm10
|
|
movdqa %xmm10,32(%rsp)
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
movups 144-120(%rsi),%xmm1
|
|
|
|
cmpl $11,%eax
|
|
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
movups 160-120(%rsi),%xmm0
|
|
|
|
jb .Lenc4x_tail
|
|
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
movups 176-120(%rsi),%xmm1
|
|
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
movups 192-120(%rsi),%xmm0
|
|
|
|
je .Lenc4x_tail
|
|
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
movups 208-120(%rsi),%xmm1
|
|
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
movups 224-120(%rsi),%xmm0
|
|
jmp .Lenc4x_tail
|
|
|
|
.align 32
|
|
.Lenc4x_tail:
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
movdqu (%r8,%rbx,1),%xmm6
|
|
movdqu 16-120(%rsi),%xmm1
|
|
|
|
.byte 102,15,56,221,208
|
|
movdqu (%r9,%rbx,1),%xmm7
|
|
pxor %xmm12,%xmm6
|
|
.byte 102,15,56,221,216
|
|
movdqu (%r10,%rbx,1),%xmm8
|
|
pxor %xmm12,%xmm7
|
|
.byte 102,15,56,221,224
|
|
movdqu (%r11,%rbx,1),%xmm9
|
|
pxor %xmm12,%xmm8
|
|
.byte 102,15,56,221,232
|
|
movdqu 32-120(%rsi),%xmm0
|
|
pxor %xmm12,%xmm9
|
|
|
|
movups %xmm2,-16(%r12,%rbx,1)
|
|
pxor %xmm6,%xmm2
|
|
movups %xmm3,-16(%r13,%rbx,1)
|
|
pxor %xmm7,%xmm3
|
|
movups %xmm4,-16(%r14,%rbx,1)
|
|
pxor %xmm8,%xmm4
|
|
movups %xmm5,-16(%r15,%rbx,1)
|
|
pxor %xmm9,%xmm5
|
|
|
|
decl %edx
|
|
jnz .Loop_enc4x
|
|
|
|
movq 16(%rsp),%rax
|
|
.cfi_def_cfa %rax,8
|
|
movl 24(%rsp),%edx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leaq 160(%rdi),%rdi
|
|
decl %edx
|
|
jnz .Lenc4x_loop_grande
|
|
|
|
.Lenc4x_done:
|
|
movq -48(%rax),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rax),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rax),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rax),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rax),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rax),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rax),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lenc4x_epilogue:
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
|
|
|
|
.globl aesni_multi_cbc_decrypt
|
|
.type aesni_multi_cbc_decrypt,@function
|
|
.align 32
|
|
aesni_multi_cbc_decrypt:
|
|
.cfi_startproc
|
|
movq %rsp,%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subq $48,%rsp
|
|
andq $-64,%rsp
|
|
movq %rax,16(%rsp)
|
|
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
|
|
|
|
.Ldec4x_body:
|
|
movdqu (%rsi),%xmm12
|
|
leaq 120(%rsi),%rsi
|
|
leaq 80(%rdi),%rdi
|
|
|
|
.Ldec4x_loop_grande:
|
|
movl %edx,24(%rsp)
|
|
xorl %edx,%edx
|
|
movl -64(%rdi),%ecx
|
|
movq -80(%rdi),%r8
|
|
cmpl %edx,%ecx
|
|
movq -72(%rdi),%r12
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu -56(%rdi),%xmm6
|
|
movl %ecx,32(%rsp)
|
|
cmovleq %rsp,%r8
|
|
movl -24(%rdi),%ecx
|
|
movq -40(%rdi),%r9
|
|
cmpl %edx,%ecx
|
|
movq -32(%rdi),%r13
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu -16(%rdi),%xmm7
|
|
movl %ecx,36(%rsp)
|
|
cmovleq %rsp,%r9
|
|
movl 16(%rdi),%ecx
|
|
movq 0(%rdi),%r10
|
|
cmpl %edx,%ecx
|
|
movq 8(%rdi),%r14
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu 24(%rdi),%xmm8
|
|
movl %ecx,40(%rsp)
|
|
cmovleq %rsp,%r10
|
|
movl 56(%rdi),%ecx
|
|
movq 40(%rdi),%r11
|
|
cmpl %edx,%ecx
|
|
movq 48(%rdi),%r15
|
|
cmovgl %ecx,%edx
|
|
testl %ecx,%ecx
|
|
movdqu 64(%rdi),%xmm9
|
|
movl %ecx,44(%rsp)
|
|
cmovleq %rsp,%r11
|
|
testl %edx,%edx
|
|
jz .Ldec4x_done
|
|
|
|
movups 16-120(%rsi),%xmm1
|
|
movups 32-120(%rsi),%xmm0
|
|
movl 240-120(%rsi),%eax
|
|
movdqu (%r8),%xmm2
|
|
movdqu (%r9),%xmm3
|
|
pxor %xmm12,%xmm2
|
|
movdqu (%r10),%xmm4
|
|
pxor %xmm12,%xmm3
|
|
movdqu (%r11),%xmm5
|
|
pxor %xmm12,%xmm4
|
|
pxor %xmm12,%xmm5
|
|
movdqa 32(%rsp),%xmm10
|
|
xorq %rbx,%rbx
|
|
jmp .Loop_dec4x
|
|
|
|
.align 32
|
|
.Loop_dec4x:
|
|
addq $16,%rbx
|
|
leaq 16(%rsp),%rbp
|
|
movl $1,%ecx
|
|
subq %rbx,%rbp
|
|
|
|
.byte 102,15,56,222,209
|
|
prefetcht0 31(%r8,%rbx,1)
|
|
prefetcht0 31(%r9,%rbx,1)
|
|
.byte 102,15,56,222,217
|
|
prefetcht0 31(%r10,%rbx,1)
|
|
prefetcht0 31(%r11,%rbx,1)
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
movups 48-120(%rsi),%xmm1
|
|
cmpl 32(%rsp),%ecx
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
.byte 102,15,56,222,224
|
|
cmovgeq %rbp,%r8
|
|
cmovgq %rbp,%r12
|
|
.byte 102,15,56,222,232
|
|
movups -56(%rsi),%xmm0
|
|
cmpl 36(%rsp),%ecx
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
.byte 102,15,56,222,225
|
|
cmovgeq %rbp,%r9
|
|
cmovgq %rbp,%r13
|
|
.byte 102,15,56,222,233
|
|
movups -40(%rsi),%xmm1
|
|
cmpl 40(%rsp),%ecx
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
.byte 102,15,56,222,224
|
|
cmovgeq %rbp,%r10
|
|
cmovgq %rbp,%r14
|
|
.byte 102,15,56,222,232
|
|
movups -24(%rsi),%xmm0
|
|
cmpl 44(%rsp),%ecx
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
.byte 102,15,56,222,225
|
|
cmovgeq %rbp,%r11
|
|
cmovgq %rbp,%r15
|
|
.byte 102,15,56,222,233
|
|
movups -8(%rsi),%xmm1
|
|
movdqa %xmm10,%xmm11
|
|
.byte 102,15,56,222,208
|
|
prefetcht0 15(%r12,%rbx,1)
|
|
prefetcht0 15(%r13,%rbx,1)
|
|
.byte 102,15,56,222,216
|
|
prefetcht0 15(%r14,%rbx,1)
|
|
prefetcht0 15(%r15,%rbx,1)
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
movups 128-120(%rsi),%xmm0
|
|
pxor %xmm12,%xmm12
|
|
|
|
.byte 102,15,56,222,209
|
|
pcmpgtd %xmm12,%xmm11
|
|
movdqu -120(%rsi),%xmm12
|
|
.byte 102,15,56,222,217
|
|
paddd %xmm11,%xmm10
|
|
movdqa %xmm10,32(%rsp)
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
movups 144-120(%rsi),%xmm1
|
|
|
|
cmpl $11,%eax
|
|
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
movups 160-120(%rsi),%xmm0
|
|
|
|
jb .Ldec4x_tail
|
|
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
movups 176-120(%rsi),%xmm1
|
|
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
movups 192-120(%rsi),%xmm0
|
|
|
|
je .Ldec4x_tail
|
|
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
movups 208-120(%rsi),%xmm1
|
|
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
movups 224-120(%rsi),%xmm0
|
|
jmp .Ldec4x_tail
|
|
|
|
.align 32
|
|
.Ldec4x_tail:
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
.byte 102,15,56,222,225
|
|
pxor %xmm0,%xmm6
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,222,233
|
|
movdqu 16-120(%rsi),%xmm1
|
|
pxor %xmm0,%xmm8
|
|
pxor %xmm0,%xmm9
|
|
movdqu 32-120(%rsi),%xmm0
|
|
|
|
.byte 102,15,56,223,214
|
|
.byte 102,15,56,223,223
|
|
movdqu -16(%r8,%rbx,1),%xmm6
|
|
movdqu -16(%r9,%rbx,1),%xmm7
|
|
.byte 102,65,15,56,223,224
|
|
.byte 102,65,15,56,223,233
|
|
movdqu -16(%r10,%rbx,1),%xmm8
|
|
movdqu -16(%r11,%rbx,1),%xmm9
|
|
|
|
movups %xmm2,-16(%r12,%rbx,1)
|
|
movdqu (%r8,%rbx,1),%xmm2
|
|
movups %xmm3,-16(%r13,%rbx,1)
|
|
movdqu (%r9,%rbx,1),%xmm3
|
|
pxor %xmm12,%xmm2
|
|
movups %xmm4,-16(%r14,%rbx,1)
|
|
movdqu (%r10,%rbx,1),%xmm4
|
|
pxor %xmm12,%xmm3
|
|
movups %xmm5,-16(%r15,%rbx,1)
|
|
movdqu (%r11,%rbx,1),%xmm5
|
|
pxor %xmm12,%xmm4
|
|
pxor %xmm12,%xmm5
|
|
|
|
decl %edx
|
|
jnz .Loop_dec4x
|
|
|
|
movq 16(%rsp),%rax
|
|
.cfi_def_cfa %rax,8
|
|
movl 24(%rsp),%edx
|
|
|
|
leaq 160(%rdi),%rdi
|
|
decl %edx
|
|
jnz .Ldec4x_loop_grande
|
|
|
|
.Ldec4x_done:
|
|
movq -48(%rax),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rax),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rax),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rax),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rax),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rax),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rax),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Ldec4x_epilogue:
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
|