ARM assembly pack: get ARMv7 instruction endianness right.

Pointer out and suggested by: Ard Biesheuvel.
This commit is contained in:
Andy Polyakov 2014-06-06 21:27:18 +02:00
parent cd91fd7c32
commit 5dcf70a1c5
8 changed files with 89 additions and 47 deletions

View file

@ -715,8 +715,8 @@ _armv4_AES_set_encrypt_key:
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
.Labrt:
#if defined(__thumb2__) && __ARM_ARCH__>=7
.short 0x4770 @ bx lr in Thumb2 encoding
#if __ARM_ARCH__>=5
ret @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
@ -1203,6 +1203,7 @@ _armv4_AES_decrypt:
___
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx\tlr/gm;
open SELF,$0;
while(<SELF>) {

View file

@ -7,42 +7,46 @@
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
.word 0xf26ee1fe @ vorr q15,q15,q15
.word 0xe12fff1e @ bx lr
.byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
mrrc p15,1,r0,r1,c14 @ CNTVCT
#if __ARM_ARCH__>=5
bx lr
#else
.word 0xe12fff1e @ bx lr
#endif
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
.word 0xf3b00300 @ aese.8 q0,q0
.word 0xe12fff1e @ bx lr
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
.word 0xf2000c40 @ sha1c.32 q0,q0,q0
.word 0xe12fff1e @ bx lr
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
.word 0xf3000c40 @ sha256h.32 q0,q0,q0
.word 0xe12fff1e @ bx lr
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
.word 0xf2a00e00 @ vmull.p64 q0,d0,d0
.word 0xe12fff1e @ bx lr
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.align 5
@ -56,7 +60,7 @@ OPENSSL_atomic_add:
cmp r2,#0
bne .Ladd
mov r0,r3
.word 0xe12fff1e @ bx lr
bx lr
#else
stmdb sp!,{r4-r6,lr}
ldr r2,.Lspinlock
@ -109,9 +113,13 @@ OPENSSL_cleanse:
adds r1,r1,#4
bne .Little
.Lcleanse_done:
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global OPENSSL_wipe_cpu
@ -125,41 +133,53 @@ OPENSSL_wipe_cpu:
eor ip,ip,ip
tst r0,#1
beq .Lwipe_done
.word 0xf3000150 @ veor q0, q0, q0
.word 0xf3022152 @ veor q1, q1, q1
.word 0xf3044154 @ veor q2, q2, q2
.word 0xf3066156 @ veor q3, q3, q3
.word 0xf34001f0 @ veor q8, q8, q8
.word 0xf34221f2 @ veor q9, q9, q9
.word 0xf34441f4 @ veor q10, q10, q10
.word 0xf34661f6 @ veor q11, q11, q11
.word 0xf34881f8 @ veor q12, q12, q12
.word 0xf34aa1fa @ veor q13, q13, q13
.word 0xf34cc1fc @ veor q14, q14, q14
.word 0xf34ee1fe @ veor q15, q15, q15
.byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
.byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
.byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
.byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
.byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
.byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
.byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
.byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
.byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
.byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
.byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
.byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
.Lwipe_done:
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.global OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.global OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5

View file

@ -202,7 +202,7 @@ bn_GF2m_mul_2x2:
veor $r, $r, $t2
vst1.32 {$r}, [r0]
bx lr
ret @ bx lr
.align 4
.Lialu:
#endif
@ -273,6 +273,7 @@ foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";

View file

@ -230,9 +230,14 @@ bn_mul_mont:
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt: tst lr,#1
.Labrt:
#if __ARM_ARCH__>=5
ret @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
___
{
@ -650,7 +655,7 @@ bn_mul8x_mont_neon:
sub sp,ip,#96
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
bx lr
ret @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
___
@ -665,5 +670,6 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT;

View file

@ -386,7 +386,7 @@ gcm_init_neon:
veor $IN,$IN,$t0 @ twisted H
vstmia r0,{$IN}
bx lr
ret @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.global gcm_gmult_neon
@ -470,7 +470,7 @@ $code.=<<___;
vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
vst1.64 $Xl#lo,[$Xi,:64]
bx lr
ret @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
___
@ -484,6 +484,7 @@ foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";

View file

@ -631,7 +631,7 @@ $code.=<<___;
vst1.32 {$E\[0]},[$ctx]
vldmia sp!,{d8-d15}
bx lr
ret @ bx lr
.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
#endif
___
@ -648,14 +648,19 @@ ___
sub unsha1 {
my ($mnemonic,$arg)=@_;
$arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o
&&
sprintf ".long\t0x%08x\t@ %s %s",
$opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
|(($2&7)<<17)|(($2&8)<<4)
|(($3&7)<<1) |(($3&8)<<2),
|(($3&7)<<1) |(($3&8)<<2);
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
}
}
}
foreach (split($/,$code)) {
@ -664,6 +669,7 @@ foreach (split($/,$code)) {
s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
s/\bret\b/bx lr/o or
s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4
print $_,$/;

View file

@ -608,7 +608,7 @@ $code.=<<___;
vst1.32 {$ABCD,$EFGH},[$ctx]
bx lr
ret @ bx lr
.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
#endif
___
@ -626,14 +626,19 @@ ___
sub unsha256 {
my ($mnemonic,$arg)=@_;
$arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o
&&
sprintf ".long\t0x%08x\t@ %s %s",
$opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
|(($2&7)<<17)|(($2&8)<<4)
|(($3&7)<<1) |(($3&8)<<2),
|(($3&7)<<1) |(($3&8)<<2);
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
}
}
}
foreach (split($/,$code)) {
@ -642,6 +647,7 @@ foreach (split($/,$code)) {
s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";

View file

@ -584,7 +584,7 @@ $code.=<<___;
bne .Loop_neon
vldmia sp!,{d8-d15} @ epilogue
bx lr
ret @ bx lr
#endif
___
}
@ -597,5 +597,6 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT; # enforce flush