e_padlock: add CTR mode.
This commit is contained in:
parent
d18762f7c9
commit
50452b2e60
3 changed files with 120 additions and 16 deletions
|
@ -183,7 +183,7 @@ my ($mode,$opcode) = @_;
|
|||
&set_label("${mode}_pic_point");
|
||||
&lea ($ctx,&DWP(16,$ctx)); # control word
|
||||
&xor ("eax","eax");
|
||||
if ($mode eq "ctr16") {
|
||||
if ($mode eq "ctr32") {
|
||||
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
|
||||
} else {
|
||||
&xor ("ebx","ebx");
|
||||
|
@ -216,7 +216,7 @@ my ($mode,$opcode) = @_;
|
|||
&mov (&DWP(8,"ebp"),$len);
|
||||
&mov ($len,$chunk);
|
||||
&mov (&DWP(12,"ebp"),$chunk); # chunk
|
||||
if ($mode eq "ctr16") {
|
||||
if ($mode eq "ctr32") {
|
||||
&mov ("ecx",&DWP(-4,$ctx));
|
||||
&xor ($out,$out);
|
||||
&mov ("eax",&DWP(-8,$ctx)); # borrow $len
|
||||
|
@ -257,7 +257,7 @@ my ($mode,$opcode) = @_;
|
|||
}
|
||||
&mov ($out,&DWP(0,"ebp")); # restore parameters
|
||||
&mov ($chunk,&DWP(12,"ebp"));
|
||||
if ($mode eq "ctr16") {
|
||||
if ($mode eq "ctr32") {
|
||||
&mov ($inp,&DWP(4,"ebp"));
|
||||
&xor ($len,$len);
|
||||
&set_label("${mode}_xor");
|
||||
|
@ -284,7 +284,7 @@ my ($mode,$opcode) = @_;
|
|||
&sub ($len,$chunk);
|
||||
&mov ($chunk,$PADLOCK_CHUNK);
|
||||
&jnz (&label("${mode}_loop"));
|
||||
if ($mode ne "ctr16") {
|
||||
if ($mode ne "ctr32") {
|
||||
&test ($out,0x0f); # out_misaligned
|
||||
&jz (&label("${mode}_done"));
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ my ($mode,$opcode) = @_;
|
|||
&data_byte(0xf3,0xab); # rep stosl
|
||||
&set_label("${mode}_done");
|
||||
&lea ("esp",&DWP(24,"ebp"));
|
||||
if ($mode ne "ctr16") {
|
||||
if ($mode ne "ctr32") {
|
||||
&jmp (&label("${mode}_exit"));
|
||||
|
||||
&set_label("${mode}_aligned",16);
|
||||
|
@ -311,7 +311,7 @@ my ($mode,$opcode) = @_;
|
|||
&set_label("${mode}_exit"); }
|
||||
&mov ("eax",1);
|
||||
&lea ("esp",&DWP(4,"esp")); # popf
|
||||
&emms () if ($mode eq "ctr16");
|
||||
&emms () if ($mode eq "ctr32");
|
||||
&set_label("${mode}_abort");
|
||||
&function_end("padlock_${mode}_encrypt");
|
||||
}
|
||||
|
@ -320,10 +320,11 @@ my ($mode,$opcode) = @_;
|
|||
&generate_mode("cbc",0xd0);
|
||||
&generate_mode("cfb",0xe0);
|
||||
&generate_mode("ofb",0xe8);
|
||||
&generate_mode("ctr16",0xc8); # yes, it implements own ctr with ecb opcode,
|
||||
# because hardware ctr was introduced later
|
||||
# and even has errata on certain CPU stepping.
|
||||
# own implementation *always* works...
|
||||
&generate_mode("ctr32",0xc8); # yes, it implements own CTR with ECB opcode,
|
||||
# because hardware CTR was introduced later
|
||||
# and even has errata on certain C7 stepping.
|
||||
# own implementation *always* works, though
|
||||
# ~15% slower than dedicated hardware...
|
||||
|
||||
&function_begin_B("padlock_xstore");
|
||||
&push ("edi");
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
|
||||
# September 2011
|
||||
#
|
||||
# Assembler helpers for Padlock engine.
|
||||
# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
|
||||
# details.
|
||||
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
|
@ -26,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
|
|||
|
||||
$code=".text\n";
|
||||
|
||||
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
|
||||
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
|
||||
|
||||
$ctx="%rdx";
|
||||
$out="%rdi";
|
||||
|
@ -234,9 +235,23 @@ padlock_${mode}_encrypt:
|
|||
neg %rax
|
||||
and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
|
||||
lea (%rax,%rbp),%rsp
|
||||
___
|
||||
$code.=<<___ if ($mode eq "ctr32");
|
||||
mov -4($ctx),%eax # pull 32-bit counter
|
||||
bswap %eax
|
||||
neg %eax
|
||||
and \$`$PADLOCK_CHUNK/16-1`,%eax
|
||||
jz .L${mode}_loop
|
||||
shl \$4,%eax
|
||||
cmp %rax,$len
|
||||
cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
|
||||
___
|
||||
$code.=<<___;
|
||||
jmp .L${mode}_loop
|
||||
.align 16
|
||||
.L${mode}_loop:
|
||||
cmp $len,$chunk # ctr32 artefact
|
||||
cmova $len,$chunk # ctr32 artefact
|
||||
mov $out,%r8 # save parameters
|
||||
mov $inp,%r9
|
||||
mov $len,%r10
|
||||
|
@ -261,6 +276,16 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
|
|||
movdqa (%rax),%xmm0
|
||||
movdqa %xmm0,-16($ctx) # copy [or refresh] iv
|
||||
___
|
||||
$code.=<<___ if ($mode eq "ctr32");
|
||||
mov -4($ctx),%eax # pull 32-bit counter
|
||||
test \$0xffff0000,%eax
|
||||
jnz .L${mode}_no_corr
|
||||
bswap %eax
|
||||
add \$0x10000,%eax
|
||||
bswap %eax
|
||||
mov %eax,-4($ctx)
|
||||
.L${mode}_no_corr:
|
||||
___
|
||||
$code.=<<___;
|
||||
mov %r8,$out # restore paramters
|
||||
mov %r11,$chunk
|
||||
|
@ -295,6 +320,29 @@ $code.=<<___;
|
|||
|
||||
.align 16
|
||||
.L${mode}_aligned:
|
||||
___
|
||||
$code.=<<___ if ($mode eq "ctr32");
|
||||
mov -4($ctx),%eax # pull 32-bit counter
|
||||
mov \$`16*0x10000`,$chunk
|
||||
bswap %eax
|
||||
cmp $len,$chunk
|
||||
cmova $len,$chunk
|
||||
neg %eax
|
||||
and \$0xffff,%eax
|
||||
jz .L${mode}_aligned_loop
|
||||
shl \$4,%eax
|
||||
cmp %rax,$len
|
||||
cmova %rax,$chunk # don't let counter cross 2^16
|
||||
jmp .L${mode}_aligned_loop
|
||||
.align 16
|
||||
.L${mode}_aligned_loop:
|
||||
cmp $len,$chunk
|
||||
cmova $len,$chunk
|
||||
mov $len,%r10 # save parameters
|
||||
mov $chunk,$len
|
||||
mov $chunk,%r11
|
||||
___
|
||||
$code.=<<___;
|
||||
lea -16($ctx),%rax # ivp
|
||||
lea 16($ctx),%rbx # key
|
||||
shr \$4,$len # len/=AES_BLOCK_SIZE
|
||||
|
@ -304,6 +352,19 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
|
|||
movdqa (%rax),%xmm0
|
||||
movdqa %xmm0,-16($ctx) # copy [or refresh] iv
|
||||
___
|
||||
$code.=<<___ if ($mode eq "ctr32");
|
||||
mov -4($ctx),%eax # pull 32-bit counter
|
||||
bswap %eax
|
||||
add \$0x10000,%eax
|
||||
bswap %eax
|
||||
mov %eax,-4($ctx)
|
||||
|
||||
mov %r11,$chunk # restore paramters
|
||||
mov %r10,$len
|
||||
sub $chunk,$len
|
||||
mov \$`16*0x10000`,$chunk
|
||||
jnz .L${mode}_aligned_loop
|
||||
___
|
||||
$code.=<<___;
|
||||
.L${mode}_exit:
|
||||
mov \$1,%eax
|
||||
|
@ -320,7 +381,7 @@ ___
|
|||
&generate_mode("cbc",0xd0);
|
||||
&generate_mode("cfb",0xe0);
|
||||
&generate_mode("ofb",0xe8);
|
||||
&generate_mode("ctr16",0xd8);
|
||||
&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
|
||||
|
||||
$code.=<<___;
|
||||
.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
|
|
|
@ -76,6 +76,7 @@
|
|||
#endif
|
||||
#include <openssl/rand.h>
|
||||
#include <openssl/err.h>
|
||||
#include <openssl/modes.h>
|
||||
|
||||
#ifndef OPENSSL_NO_HW
|
||||
#ifndef OPENSSL_NO_HW_PADLOCK
|
||||
|
@ -337,16 +338,19 @@ static int padlock_cipher_nids[] = {
|
|||
NID_aes_128_cbc,
|
||||
NID_aes_128_cfb,
|
||||
NID_aes_128_ofb,
|
||||
NID_aes_128_ctr,
|
||||
|
||||
NID_aes_192_ecb,
|
||||
NID_aes_192_cbc,
|
||||
NID_aes_192_cfb,
|
||||
NID_aes_192_ofb,
|
||||
NID_aes_192_ctr,
|
||||
|
||||
NID_aes_256_ecb,
|
||||
NID_aes_256_cbc,
|
||||
NID_aes_256_cfb,
|
||||
NID_aes_256_ofb,
|
||||
NID_aes_256_ctr
|
||||
};
|
||||
static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
|
||||
sizeof(padlock_cipher_nids[0]));
|
||||
|
@ -505,10 +509,35 @@ padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void padlock_ctr32_encrypt_glue(const unsigned char *in,
|
||||
unsigned char *out, size_t blocks,
|
||||
struct padlock_cipher_data *ctx,
|
||||
const unsigned char *ivec)
|
||||
{
|
||||
memcpy(ctx->iv,ivec,AES_BLOCK_SIZE);
|
||||
padlock_ctr32_encrypt(out,in,ctx,AES_BLOCK_SIZE*blocks);
|
||||
}
|
||||
|
||||
static int
|
||||
padlock_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
|
||||
const unsigned char *in_arg, size_t nbytes)
|
||||
{
|
||||
struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
|
||||
unsigned int num = ctx->num;
|
||||
|
||||
CRYPTO_ctr128_encrypt_ctr32(in_arg,out_arg,nbytes,
|
||||
cdata,ctx->iv,ctx->buf,&num,
|
||||
(ctr128_f)padlock_ctr32_encrypt_glue);
|
||||
|
||||
ctx->num = (size_t)num;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
|
||||
#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
|
||||
#define EVP_CIPHER_block_size_OFB 1
|
||||
#define EVP_CIPHER_block_size_CFB 1
|
||||
#define EVP_CIPHER_block_size_CTR 1
|
||||
|
||||
/* Declaring so many ciphers by hand would be a pain.
|
||||
Instead introduce a bit of preprocessor magic :-) */
|
||||
|
@ -533,16 +562,19 @@ DECLARE_AES_EVP(128,ecb,ECB);
|
|||
DECLARE_AES_EVP(128,cbc,CBC);
|
||||
DECLARE_AES_EVP(128,cfb,CFB);
|
||||
DECLARE_AES_EVP(128,ofb,OFB);
|
||||
DECLARE_AES_EVP(128,ctr,CTR);
|
||||
|
||||
DECLARE_AES_EVP(192,ecb,ECB);
|
||||
DECLARE_AES_EVP(192,cbc,CBC);
|
||||
DECLARE_AES_EVP(192,cfb,CFB);
|
||||
DECLARE_AES_EVP(192,ofb,OFB);
|
||||
DECLARE_AES_EVP(192,ctr,CTR);
|
||||
|
||||
DECLARE_AES_EVP(256,ecb,ECB);
|
||||
DECLARE_AES_EVP(256,cbc,CBC);
|
||||
DECLARE_AES_EVP(256,cfb,CFB);
|
||||
DECLARE_AES_EVP(256,ofb,OFB);
|
||||
DECLARE_AES_EVP(256,ctr,CTR);
|
||||
|
||||
static int
|
||||
padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
|
||||
|
@ -567,6 +599,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
|
|||
case NID_aes_128_ofb:
|
||||
*cipher = &padlock_aes_128_ofb;
|
||||
break;
|
||||
case NID_aes_128_ctr:
|
||||
*cipher = &padlock_aes_128_ctr;
|
||||
break;
|
||||
|
||||
case NID_aes_192_ecb:
|
||||
*cipher = &padlock_aes_192_ecb;
|
||||
|
@ -580,6 +615,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
|
|||
case NID_aes_192_ofb:
|
||||
*cipher = &padlock_aes_192_ofb;
|
||||
break;
|
||||
case NID_aes_192_ctr:
|
||||
*cipher = &padlock_aes_192_ctr;
|
||||
break;
|
||||
|
||||
case NID_aes_256_ecb:
|
||||
*cipher = &padlock_aes_256_ecb;
|
||||
|
@ -593,6 +631,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
|
|||
case NID_aes_256_ofb:
|
||||
*cipher = &padlock_aes_256_ofb;
|
||||
break;
|
||||
case NID_aes_256_ctr:
|
||||
*cipher = &padlock_aes_256_ctr;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Sorry, we don't support this NID */
|
||||
|
@ -610,6 +651,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|||
{
|
||||
struct padlock_cipher_data *cdata;
|
||||
int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
|
||||
unsigned long mode = EVP_CIPHER_CTX_mode(ctx);
|
||||
|
||||
if (key==NULL) return 0; /* ERROR */
|
||||
|
||||
|
@ -617,7 +659,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|||
memset(cdata, 0, sizeof(struct padlock_cipher_data));
|
||||
|
||||
/* Prepare Control word. */
|
||||
if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
|
||||
if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE)
|
||||
cdata->cword.b.encdec = 0;
|
||||
else
|
||||
cdata->cword.b.encdec = (ctx->encrypt == 0);
|
||||
|
@ -640,8 +682,8 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|||
and is listed as hardware errata. They most
|
||||
likely will fix it at some point and then
|
||||
a check for stepping would be due here. */
|
||||
if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE ||
|
||||
EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE)
|
||||
if ((mode == EVP_CIPH_ECB_MODE ||
|
||||
mode == EVP_CIPH_CBC_MODE)
|
||||
&& !enc)
|
||||
AES_set_decrypt_key(key, key_len, &cdata->ks);
|
||||
else
|
||||
|
|
Loading…
Reference in a new issue