#if defined(lint) || defined(__lint)
#include <sys/types.h>
void
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
uint32_t ct[4]) {
}
void
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
uint32_t pt[4]) {
}
int
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
return (0);
}
int
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
return (0);
}
#else
#include <sys/asm_linkage.h>
#include <sys/controlregs.h>
#ifdef _KERNEL
#include <sys/machprivregs.h>
#endif
#ifdef _KERNEL
#ifdef __xpv
#define PROTECTED_CLTS \
push %rsi; \
CLTS; \
pop %rsi
#else
#define PROTECTED_CLTS \
CLTS
#endif
#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
push %rbp; \
mov %rsp, %rbp; \
movq %cr0, tmpreg; \
testq $CR0_TS, tmpreg; \
jnz 1f; \
and $-XMM_ALIGN, %rsp; \
sub $[XMM_SIZE * 2], %rsp; \
movaps %xmm0, 16(%rsp); \
movaps %xmm1, (%rsp); \
jmp 2f; \
1: \
PROTECTED_CLTS; \
2:
#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
testq $CR0_TS, tmpreg; \
jnz 1f; \
movaps (%rsp), %xmm1; \
movaps 16(%rsp), %xmm0; \
jmp 2f; \
1: \
STTS(tmpreg); \
2: \
mov %rbp, %rsp; \
pop %rbp
#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
push %rbp; \
mov %rsp, %rbp; \
movq %cr0, tmpreg; \
testq $CR0_TS, tmpreg; \
jnz 1f; \
and $-XMM_ALIGN, %rsp; \
sub $[XMM_SIZE * 7], %rsp; \
movaps %xmm0, 96(%rsp); \
movaps %xmm1, 80(%rsp); \
movaps %xmm2, 64(%rsp); \
movaps %xmm3, 48(%rsp); \
movaps %xmm4, 32(%rsp); \
movaps %xmm5, 16(%rsp); \
movaps %xmm6, (%rsp); \
jmp 2f; \
1: \
PROTECTED_CLTS; \
2:
#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
testq $CR0_TS, tmpreg; \
jnz 1f; \
movaps (%rsp), %xmm6; \
movaps 16(%rsp), %xmm5; \
movaps 32(%rsp), %xmm4; \
movaps 48(%rsp), %xmm3; \
movaps 64(%rsp), %xmm2; \
movaps 80(%rsp), %xmm1; \
movaps 96(%rsp), %xmm0; \
jmp 2f; \
1: \
STTS(tmpreg); \
2: \
mov %rbp, %rsp; \
pop %rbp
#else
#define PROTECTED_CLTS
#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
#define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
#endif
.align 16
_key_expansion_128:
_key_expansion_256a:
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movaps %xmm0, (%rcx)
add $0x10, %rcx
ret
SET_SIZE(_key_expansion_128)
SET_SIZE(_key_expansion_256a)
.align 16
_key_expansion_192a:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movaps %xmm2, %xmm5
movaps %xmm2, %xmm6
pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
movaps %xmm0, %xmm1
shufps $0b01000100, %xmm0, %xmm6
movaps %xmm6, (%rcx)
shufps $0b01001110, %xmm2, %xmm1
movaps %xmm1, 0x10(%rcx)
add $0x20, %rcx
ret
SET_SIZE(_key_expansion_192a)
.align 16
_key_expansion_192b:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movaps %xmm2, %xmm5
pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
movaps %xmm0, (%rcx)
add $0x10, %rcx
ret
SET_SIZE(_key_expansion_192b)
.align 16
_key_expansion_256b:
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
pxor %xmm4, %xmm2
shufps $0b10001100, %xmm2, %xmm4
pxor %xmm4, %xmm2
pxor %xmm1, %xmm2
movaps %xmm2, (%rcx)
add $0x10, %rcx
ret
SET_SIZE(_key_expansion_256b)
#ifdef OPENSSL_INTERFACE
#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
#define USERCIPHERKEY rdi
#define KEYSIZE32 esi
#define KEYSIZE64 rsi
#define AESKEY rdx
#else
#define AESKEY rdi
#define USERCIPHERKEY rsi
#define KEYSIZE32 edx
#define KEYSIZE64 rdx
#endif
#define ROUNDS32 KEYSIZE32
#define ROUNDS64 KEYSIZE64
#define ENDAESKEY USERCIPHERKEY
ENTRY_NP(rijndael_key_setup_enc_intel)
CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
/ NULL pointer sanity check
test %USERCIPHERKEY, %USERCIPHERKEY
jz .Lenc_key_invalid_param
test %AESKEY, %AESKEY
jz .Lenc_key_invalid_param
movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
movaps %xmm0, (%AESKEY)
lea 0x10(%AESKEY), %rcx / key addr
pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x
cmp $256, %KEYSIZE32
jnz .Lenc_key192
/ AES 256: 14 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $14, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14
#endif
movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes)
movaps %xmm2, (%rcx)
add $0x10, %rcx
aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x1, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x2, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x4, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x8, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x10, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
aeskeygenassist $0x20, %xmm0, %xmm1
call _key_expansion_256b
aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
call _key_expansion_256a
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
#ifdef OPENSSL_INTERFACE
xor %rax, %rax / return 0 (OK)
#else
mov $14, %rax / return # rounds = 14
#endif
ret
.align 4
.Lenc_key192:
cmp $192, %KEYSIZE32
jnz .Lenc_key128
/ AES 192: 12 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $12, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12
#endif
movq 0x10(%USERCIPHERKEY), %xmm2 / other user key
aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
call _key_expansion_192a
aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
call _key_expansion_192b
aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
call _key_expansion_192a
aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
call _key_expansion_192b
aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
call _key_expansion_192a
aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
call _key_expansion_192b
aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
call _key_expansion_192a
aeskeygenassist $0x80, %xmm2, %xmm1 / expand the key
call _key_expansion_192b
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
#ifdef OPENSSL_INTERFACE
xor %rax, %rax / return 0 (OK)
#else
mov $12, %rax / return # rounds = 12
#endif
ret
.align 4
.Lenc_key128:
cmp $128, %KEYSIZE32
jnz .Lenc_key_invalid_key_bits
/ AES 128: 10 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $10, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10
#endif
aeskeygenassist $0x1, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x2, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x4, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x8, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x10, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x20, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x40, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x80, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x1b, %xmm0, %xmm1 / expand the key
call _key_expansion_128
aeskeygenassist $0x36, %xmm0, %xmm1 / expand the key
call _key_expansion_128
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
#ifdef OPENSSL_INTERFACE
xor %rax, %rax / return 0 (OK)
#else
mov $10, %rax / return # rounds = 10
#endif
ret
.Lenc_key_invalid_param:
#ifdef OPENSSL_INTERFACE
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
mov $-1, %rax / user key or AES key pointer is NULL
ret
#else
#endif
.Lenc_key_invalid_key_bits:
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
#ifdef OPENSSL_INTERFACE
mov $-2, %rax / keysize is invalid
#else
xor %rax, %rax / a key pointer is NULL or invalid keysize
#endif
ret
SET_SIZE(rijndael_key_setup_enc_intel)
ENTRY_NP(rijndael_key_setup_dec_intel)
/ Generate round keys used for encryption
call rijndael_key_setup_enc_intel
test %rax, %rax
#ifdef OPENSSL_INTERFACE
jnz .Ldec_key_exit / Failed if returned non-0
#else
jz .Ldec_key_exit / Failed if returned 0
#endif
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
#ifndef OPENSSL_INTERFACE
mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14)
/ (already set for OpenSSL)
#endif
lea 0x10(%AESKEY), %rcx / key addr
shl $4, %ROUNDS32
add %AESKEY, %ROUNDS64
mov %ROUNDS64, %ENDAESKEY
.align 4
.Ldec_key_reorder_loop:
movaps (%AESKEY), %xmm0
movaps (%ROUNDS64), %xmm1
movaps %xmm0, (%ROUNDS64)
movaps %xmm1, (%AESKEY)
lea 0x10(%AESKEY), %AESKEY
lea -0x10(%ROUNDS64), %ROUNDS64
cmp %AESKEY, %ROUNDS64
ja .Ldec_key_reorder_loop
.align 4
.Ldec_key_inv_loop:
movaps (%rcx), %xmm0
/ Convert an encryption round key to a form usable for decryption
/ with the "AES Inverse Mix Columns" instruction
aesimc %xmm0, %xmm1
movaps %xmm1, (%rcx)
lea 0x10(%rcx), %rcx
cmp %ENDAESKEY, %rcx
jnz .Ldec_key_inv_loop
SET_TS_OR_POP_XMM0_XMM1(%r10)
.Ldec_key_exit:
/ OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
/ OpenSSL: rax = 0 for OK, or non-zero for error
ret
SET_SIZE(rijndael_key_setup_dec_intel)
#ifdef OPENSSL_INTERFACE
#define aes_encrypt_intel intel_AES_encrypt
#define aes_decrypt_intel intel_AES_decrypt
#define INP rdi
#define OUTP rsi
#define KEYP rdx
#define NROUNDS32 ecx
#define NROUNDS cl
#else
#define KEYP rdi
#define NROUNDS esi
#define INP rdx
#define OUTP rcx
#endif
#define STATE xmm0
#define KEY xmm1
ENTRY_NP(aes_encrypt_intel)
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
movups (%INP), %STATE / input
movaps (%KEYP), %KEY / key
#ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 / round count
#else
#endif
pxor %KEY, %STATE / round 0
lea 0x30(%KEYP), %KEYP
cmp $12, %NROUNDS
jb .Lenc128
lea 0x20(%KEYP), %KEYP
je .Lenc192
/ AES 256
lea 0x20(%KEYP), %KEYP
movaps -0x60(%KEYP), %KEY
aesenc %KEY, %STATE
movaps -0x50(%KEYP), %KEY
aesenc %KEY, %STATE
.align 4
.Lenc192:
/ AES 192 and 256
movaps -0x40(%KEYP), %KEY
aesenc %KEY, %STATE
movaps -0x30(%KEYP), %KEY
aesenc %KEY, %STATE
.align 4
.Lenc128:
/ AES 128, 192, and 256
movaps -0x20(%KEYP), %KEY
aesenc %KEY, %STATE
movaps -0x10(%KEYP), %KEY
aesenc %KEY, %STATE
movaps (%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x10(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x20(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x30(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x40(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x50(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x60(%KEYP), %KEY
aesenc %KEY, %STATE
movaps 0x70(%KEYP), %KEY
aesenclast %KEY, %STATE / last round
movups %STATE, (%OUTP) / output
SET_TS_OR_POP_XMM0_XMM1(%r10)
ret
SET_SIZE(aes_encrypt_intel)
ENTRY_NP(aes_decrypt_intel)
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
movups (%INP), %STATE / input
movaps (%KEYP), %KEY / key
#ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 / round count
#else
#endif
pxor %KEY, %STATE / round 0
lea 0x30(%KEYP), %KEYP
cmp $12, %NROUNDS
jb .Ldec128
lea 0x20(%KEYP), %KEYP
je .Ldec192
/ AES 256
lea 0x20(%KEYP), %KEYP
movaps -0x60(%KEYP), %KEY
aesdec %KEY, %STATE
movaps -0x50(%KEYP), %KEY
aesdec %KEY, %STATE
.align 4
.Ldec192:
/ AES 192 and 256
movaps -0x40(%KEYP), %KEY
aesdec %KEY, %STATE
movaps -0x30(%KEYP), %KEY
aesdec %KEY, %STATE
.align 4
.Ldec128:
/ AES 128, 192, and 256
movaps -0x20(%KEYP), %KEY
aesdec %KEY, %STATE
movaps -0x10(%KEYP), %KEY
aesdec %KEY, %STATE
movaps (%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x10(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x20(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x30(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x40(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x50(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x60(%KEYP), %KEY
aesdec %KEY, %STATE
movaps 0x70(%KEYP), %KEY
aesdeclast %KEY, %STATE / last round
movups %STATE, (%OUTP) / output
SET_TS_OR_POP_XMM0_XMM1(%r10)
ret
SET_SIZE(aes_decrypt_intel)
#endif