bsaes-armv7.pl: add bsaes_cbc_encrypt and bsaes_ctr32_encrypt_blocks.
Submitted by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Contributor claims ~50% improvement in CTR and ~9% in CBC decrypt on Cortex-A15.
This commit is contained in:
parent
75fe422323
commit
9575d1a91a
2 changed files with 440 additions and 1 deletions
|
@ -715,6 +715,8 @@ _bsaes_const:
|
|||
.quad 0x0304090e00050a0f, 0x01060b0c0207080d
|
||||
.LM0:
|
||||
.quad 0x02060a0e03070b0f, 0x0004080c0105090d
|
||||
.LREVM0SR:
|
||||
.quad 0x090d02060c030708, 0x00040b0f050a0e01
|
||||
.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 6
|
||||
.size _bsaes_const,.-_bsaes_const
|
||||
|
@ -727,6 +729,7 @@ _bsaes_encrypt8:
|
|||
sub $const,$const,#_bsaes_encrypt8-.LM0SR
|
||||
|
||||
vldmia $const!, {@XMM[8]} @ .LM0SR
|
||||
_bsaes_encrypt8_alt:
|
||||
veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
|
||||
veor @XMM[11], @XMM[1], @XMM[9]
|
||||
vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
|
||||
|
@ -879,7 +882,7 @@ _bsaes_key_convert:
|
|||
___
|
||||
}
|
||||
|
||||
if (1) { # following four functions are unsupported interface
|
||||
if (0) { # following four functions are unsupported interface
|
||||
# used for benchmarking...
|
||||
$code.=<<___;
|
||||
.globl bsaes_enc_key_convert
|
||||
|
@ -981,6 +984,432 @@ bsaes_decrypt_128:
|
|||
.size bsaes_decrypt_128,.-bsaes_decrypt_128
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
|
||||
my ($keysched)=("sp");
|
||||
|
||||
$code.=<<___;
|
||||
.extern AES_cbc_encrypt
|
||||
.extern AES_decrypt
|
||||
|
||||
.global bsaes_cbc_encrypt
|
||||
.type bsaes_cbc_encrypt,%function
|
||||
.align 5
|
||||
bsaes_cbc_encrypt:
|
||||
cmp $len, #128
|
||||
blo AES_cbc_encrypt
|
||||
|
||||
@ it is up to the caller to make sure we are called with enc == 0
|
||||
|
||||
stmdb sp!, {r4-r10, lr}
|
||||
vstmdb sp!, {d8-d15} @ ABI specification says so
|
||||
ldr $ivp, [sp, #0x60] @ IV is 1st arg on the stack
|
||||
mov $len, $len, lsr#4 @ len in 16 byte blocks
|
||||
sub sp, #0x10 @ scratch space to carry over the IV
|
||||
mov $fp, sp @ save sp
|
||||
|
||||
@ allocate the key schedule on the stack
|
||||
ldr $rounds, [$key, #240] @ get # of rounds
|
||||
sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key
|
||||
add sp, sp, #`128-32` @ size of bit-sliced key schedule
|
||||
|
||||
@ populate the key schedule
|
||||
mov r4, $key @ pass key
|
||||
mov r5, $rounds @ pass # of rounds
|
||||
mov r12, $keysched @ pass key schedule
|
||||
bl _bsaes_key_convert
|
||||
vldmia $keysched, {@XMM[6]}
|
||||
vstmia r12, {@XMM[15]} @ save last round key
|
||||
veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
|
||||
vstmia $keysched, {@XMM[7]}
|
||||
|
||||
vld1.8 {@XMM[15]}, [$ivp] @ load IV
|
||||
b .Lcbc_dec_loop
|
||||
|
||||
.align 4
|
||||
.Lcbc_dec_loop:
|
||||
subs $len, $len, #0x8
|
||||
bmi .Lcbc_dec_loop_finish
|
||||
|
||||
vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
|
||||
vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
|
||||
mov r4, $keysched @ pass the key
|
||||
vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
|
||||
mov r5, $rounds
|
||||
vld1.8 {@XMM[6]-@XMM[7]}, [$inp]
|
||||
sub $inp, $inp, #0x60
|
||||
vstmia $fp, {@XMM[15]} @ put aside IV
|
||||
|
||||
bl _bsaes_decrypt8
|
||||
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
|
||||
veor @XMM[4], @XMM[4], @XMM[10]
|
||||
veor @XMM[2], @XMM[2], @XMM[11]
|
||||
vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
|
||||
veor @XMM[7], @XMM[7], @XMM[12]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
veor @XMM[3], @XMM[3], @XMM[13]
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
veor @XMM[5], @XMM[5], @XMM[14]
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
vst1.8 {@XMM[7]}, [$out]!
|
||||
vst1.8 {@XMM[3]}, [$out]!
|
||||
vst1.8 {@XMM[5]}, [$out]!
|
||||
|
||||
b .Lcbc_dec_loop
|
||||
|
||||
.Lcbc_dec_loop_finish:
|
||||
adds $len, $len, #8
|
||||
beq .Lcbc_dec_done
|
||||
|
||||
vld1.8 {@XMM[0]}, [$inp]! @ load input
|
||||
cmp $len, #2
|
||||
blo .Lcbc_dec_one
|
||||
vld1.8 {@XMM[1]}, [$inp]!
|
||||
mov r4, $keysched @ pass the key
|
||||
mov r5, $rounds
|
||||
vstmia $fp, {@XMM[15]} @ put aside IV
|
||||
beq .Lcbc_dec_two
|
||||
vld1.8 {@XMM[2]}, [$inp]!
|
||||
cmp $len, #4
|
||||
blo .Lcbc_dec_three
|
||||
vld1.8 {@XMM[3]}, [$inp]!
|
||||
beq .Lcbc_dec_four
|
||||
vld1.8 {@XMM[4]}, [$inp]!
|
||||
cmp $len, #6
|
||||
blo .Lcbc_dec_five
|
||||
vld1.8 {@XMM[5]}, [$inp]!
|
||||
beq .Lcbc_dec_six
|
||||
vld1.8 {@XMM[6]}, [$inp]!
|
||||
sub $inp, $inp, #0x70
|
||||
|
||||
bl _bsaes_decrypt8
|
||||
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
|
||||
veor @XMM[4], @XMM[4], @XMM[10]
|
||||
veor @XMM[2], @XMM[2], @XMM[11]
|
||||
vld1.8 {@XMM[15]}, [$inp]!
|
||||
veor @XMM[7], @XMM[7], @XMM[12]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
veor @XMM[3], @XMM[3], @XMM[13]
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
vst1.8 {@XMM[7]}, [$out]!
|
||||
vst1.8 {@XMM[3]}, [$out]!
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_six:
|
||||
sub $inp, $inp, #0x60
|
||||
bl _bsaes_decrypt8
|
||||
vldmia $fp,{@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vld1.8 {@XMM[12]}, [$inp]!
|
||||
veor @XMM[4], @XMM[4], @XMM[10]
|
||||
veor @XMM[2], @XMM[2], @XMM[11]
|
||||
vld1.8 {@XMM[15]}, [$inp]!
|
||||
veor @XMM[7], @XMM[7], @XMM[12]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
vst1.8 {@XMM[7]}, [$out]!
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_five:
|
||||
sub $inp, $inp, #0x50
|
||||
bl _bsaes_decrypt8
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vld1.8 {@XMM[15]}, [$inp]!
|
||||
veor @XMM[4], @XMM[4], @XMM[10]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
veor @XMM[2], @XMM[2], @XMM[11]
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_four:
|
||||
sub $inp, $inp, #0x40
|
||||
bl _bsaes_decrypt8
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[10]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vld1.8 {@XMM[15]}, [$inp]!
|
||||
veor @XMM[4], @XMM[4], @XMM[10]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_three:
|
||||
sub $inp, $inp, #0x30
|
||||
bl _bsaes_decrypt8
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[15]}, [$inp]!
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
veor @XMM[6], @XMM[6], @XMM[9]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_two:
|
||||
sub $inp, $inp, #0x20
|
||||
bl _bsaes_decrypt8
|
||||
vldmia $fp, {@XMM[14]} @ reload IV
|
||||
vld1.8 {@XMM[8]}, [$inp]! @ reload input
|
||||
veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
|
||||
vld1.8 {@XMM[15]}, [$inp]! @ reload input
|
||||
veor @XMM[1], @XMM[1], @XMM[8]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
b .Lcbc_dec_done
|
||||
.align 4
|
||||
.Lcbc_dec_one:
|
||||
sub $inp, $inp, #0x10
|
||||
mov $rounds, $out @ save original out pointer
|
||||
mov $out, $fp @ use the iv scratch space as out buffer
|
||||
mov r2, $key
|
||||
vmov @XMM[4],@XMM[15] @ just in case ensure that IV
|
||||
vmov @XMM[5],@XMM[0] @ and input are preserved
|
||||
bl AES_decrypt
|
||||
vld1.8 {@XMM[0]}, [$fp,:64] @ load result
|
||||
veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
|
||||
vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
|
||||
vst1.8 {@XMM[0]}, [$rounds] @ write output
|
||||
|
||||
.Lcbc_dec_done:
|
||||
vmov.i32 q0, #0
|
||||
vmov.i32 q1, #0
|
||||
.Lcbc_dec_bzero: @ wipe key schedule [if any]
|
||||
vstmia $keysched!, {q0-q1}
|
||||
teq $keysched, $fp
|
||||
bne .Lcbc_dec_bzero
|
||||
|
||||
add sp, $fp, #0x10
|
||||
vst1.8 {@XMM[15]}, [$ivp] @ return IV
|
||||
vldmia sp!, {d8-d15}
|
||||
ldmia sp!, {r4-r10, pc}
|
||||
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
|
||||
my $const = "r6"; # shared with _bsaes_encrypt8_alt
|
||||
my $keysched = "sp";
|
||||
|
||||
$code.=<<___;
|
||||
.extern AES_encrypt
|
||||
.global bsaes_ctr32_encrypt_blocks
|
||||
.type bsaes_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
bsaes_ctr32_encrypt_blocks:
|
||||
cmp $len, #8 @ use plain AES for
|
||||
blo .Lctr_enc_short @ small sizes
|
||||
|
||||
stmdb sp!, {r4-r10, lr}
|
||||
vstmdb sp!, {d8-d15} @ ABI specification says so
|
||||
ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack
|
||||
sub sp, sp, #0x10 @ scratch space to carry over the ctr
|
||||
mov $fp, sp @ save sp
|
||||
|
||||
@ allocate the key schedule on the stack
|
||||
ldr $rounds, [$key, #240] @ get # of rounds
|
||||
sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key
|
||||
add sp, sp, #`128-32` @ size of bit-sliced key schedule
|
||||
|
||||
@ populate the key schedule
|
||||
mov r4, $key @ pass key
|
||||
mov r5, $rounds @ pass # of rounds
|
||||
mov r12, $keysched @ pass key schedule
|
||||
bl _bsaes_key_convert
|
||||
veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
|
||||
vstmia r12, {@XMM[7]} @ save last round key
|
||||
|
||||
vld1.8 {@XMM[0]}, [$ctr] @ load counter
|
||||
add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr
|
||||
vldmia $keysched, {@XMM[4]} @ load round0 key
|
||||
|
||||
vmov.i32 `&Dhi("@XMM[8]")`,#1 @ compose 1<<96
|
||||
vmov.i32 `&Dlo("@XMM[8]")`,#0
|
||||
vrev32.8 `&Dhi("@XMM[0]")`,`&Dhi("@XMM[0]")`
|
||||
vshl.u64 `&Dhi("@XMM[8]")`,#32
|
||||
vrev32.8 `&Dhi("@XMM[4]")`,`&Dhi("@XMM[4]")`
|
||||
vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
|
||||
vstmia $keysched, {@XMM[4]} @ save adjusted round0 key
|
||||
b .Lctr_enc_loop
|
||||
|
||||
.align 4
|
||||
.Lctr_enc_loop:
|
||||
vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96
|
||||
vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1
|
||||
vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2
|
||||
vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3
|
||||
vadd.u32 @XMM[4], @XMM[1], @XMM[10]
|
||||
vadd.u32 @XMM[5], @XMM[2], @XMM[10]
|
||||
vadd.u32 @XMM[6], @XMM[3], @XMM[10]
|
||||
vadd.u32 @XMM[7], @XMM[4], @XMM[10]
|
||||
vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter
|
||||
|
||||
@ Borrow prologue from _bsaes_encrypt8 to use the opportunity
|
||||
@ to flip byte order in 32-bit counter
|
||||
|
||||
vldmia $keysched, {@XMM[9]} @ load round0 key
|
||||
add r4, $keysched, #0x10 @ pass next round key
|
||||
vldmia $ctr, {@XMM[8]} @ .LREVM0SR
|
||||
mov r5, $rounds @ pass rounds
|
||||
vstmia $fp, {@XMM[10]} @ save next counter
|
||||
sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants
|
||||
|
||||
bl _bsaes_encrypt8_alt
|
||||
|
||||
subs $len, $len, #8
|
||||
blo .Lctr_enc_loop_done
|
||||
|
||||
vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input
|
||||
vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
|
||||
veor @XMM[0], @XMM[8]
|
||||
veor @XMM[1], @XMM[9]
|
||||
vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
|
||||
veor @XMM[4], @XMM[10]
|
||||
veor @XMM[6], @XMM[11]
|
||||
vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
|
||||
veor @XMM[3], @XMM[12]
|
||||
vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
|
||||
veor @XMM[7], @XMM[13]
|
||||
veor @XMM[2], @XMM[14]
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
veor @XMM[5], @XMM[15]
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
vmov.i32 `&Dhi("@XMM[8]")`,#1 @ compose 1<<96
|
||||
vst1.8 {@XMM[3]}, [$out]!
|
||||
vmov.i32 `&Dlo("@XMM[8]")`,#0
|
||||
vst1.8 {@XMM[7]}, [$out]!
|
||||
vshl.u64 `&Dhi("@XMM[8]")`,#32
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
|
||||
vst1.8 {@XMM[5]}, [$out]!
|
||||
vldmia $fp, {@XMM[0]} @ load counter
|
||||
|
||||
bne .Lctr_enc_loop
|
||||
b .Lctr_enc_done
|
||||
|
||||
.align 4
|
||||
.Lctr_enc_loop_done:
|
||||
add $len, $len, #8
|
||||
vld1.8 {@XMM[8]}, [$inp]! @ load input
|
||||
veor @XMM[0], @XMM[8]
|
||||
vst1.8 {@XMM[0]}, [$out]! @ write output
|
||||
cmp $len, #2
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {@XMM[9]}, [$inp]!
|
||||
veor @XMM[1], @XMM[9]
|
||||
vst1.8 {@XMM[1]}, [$out]!
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {@XMM[10]}, [$inp]!
|
||||
veor @XMM[4], @XMM[10]
|
||||
vst1.8 {@XMM[4]}, [$out]!
|
||||
cmp $len, #4
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {@XMM[11]}, [$inp]!
|
||||
veor @XMM[6], @XMM[11]
|
||||
vst1.8 {@XMM[6]}, [$out]!
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {@XMM[12]}, [$inp]!
|
||||
veor @XMM[3], @XMM[12]
|
||||
vst1.8 {@XMM[3]}, [$out]!
|
||||
cmp $len, #6
|
||||
blo .Lctr_enc_done
|
||||
vld1.8 {@XMM[13]}, [$inp]!
|
||||
veor @XMM[7], @XMM[13]
|
||||
vst1.8 {@XMM[7]}, [$out]!
|
||||
beq .Lctr_enc_done
|
||||
vld1.8 {@XMM[14]}, [$inp]
|
||||
veor @XMM[2], @XMM[14]
|
||||
vst1.8 {@XMM[2]}, [$out]!
|
||||
|
||||
.Lctr_enc_done:
|
||||
vmov.i32 q0, #0
|
||||
vmov.i32 q1, #0
|
||||
.Lctr_enc_bzero: @ wipe key schedule [if any]
|
||||
vstmia $keysched!, {q0-q1}
|
||||
teq $keysched, $fp
|
||||
bne .Lctr_enc_bzero
|
||||
|
||||
add sp, $fp, #0x10
|
||||
vldmia sp!, {d8-d15}
|
||||
ldmia sp!, {r4-r10, pc} @ return
|
||||
|
||||
.align 4
|
||||
.Lctr_enc_short:
|
||||
ldr ip, [sp] @ ctr pointer is passed on stack
|
||||
stmdb sp!, {r4-r8, lr}
|
||||
|
||||
mov r4, $inp @ copy arguments
|
||||
mov r5, $out
|
||||
mov r6, $len
|
||||
mov r7, $key
|
||||
ldr r8, [ip, #12] @ load counter LSW
|
||||
vld1.8 {@XMM[1]}, [ip] @ load whole counter value
|
||||
#ifdef __ARMEL__
|
||||
rev r8, r8
|
||||
#endif
|
||||
sub sp, sp, #0x10
|
||||
vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value
|
||||
sub sp, sp, #0x10
|
||||
|
||||
.Lctr_enc_short_loop:
|
||||
add r0, sp, #0x10 @ input counter value
|
||||
mov r1, sp @ output on the stack
|
||||
mov r2, r7 @ key
|
||||
|
||||
bl AES_encrypt
|
||||
|
||||
vld1.8 {@XMM[0]}, [r4]! @ load input
|
||||
vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter
|
||||
add r8, r8, #1
|
||||
#ifdef __ARMEL__
|
||||
rev r0, r8
|
||||
str r0, [sp, #0x1c] @ next counter value
|
||||
#else
|
||||
str r8, [sp, #0x1c] @ next counter value
|
||||
#endif
|
||||
veor @XMM[0],@XMM[0],@XMM[1]
|
||||
vst1.8 {@XMM[0]}, [r5]! @ store output
|
||||
subs r6, r6, #1
|
||||
bne .Lctr_enc_short_loop
|
||||
|
||||
add sp, sp, #0x20
|
||||
ldmia sp!, {r4-r8, pc}
|
||||
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
#endif
|
||||
___
|
||||
|
|
|
@ -892,6 +892,14 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \
|
|||
NULL,NULL,aes_##mode##_ctrl,NULL }; \
|
||||
const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
|
||||
{ return &aes_##keylen##_##mode; }
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm))
|
||||
#include "arm_arch.h"
|
||||
#if __ARM_ARCH__>=7
|
||||
#define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \
|
||||
|
@ -1624,11 +1632,13 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|||
xctx->stream = NULL;
|
||||
#endif
|
||||
/* key_len is two AES keys */
|
||||
#if !(defined(__arm__) || defined(__arm)) /* not yet? */
|
||||
#ifdef BSAES_CAPABLE
|
||||
if (BSAES_CAPABLE)
|
||||
xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#ifdef VPAES_CAPABLE
|
||||
if (VPAES_CAPABLE)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue