1e10aee2a7
PR: 3474
Reviewed-by: Kurt Roeckx <kurt@openssl.org>
(cherry picked from commit 6696203963
)
219 lines
5.2 KiB
ArmAsm
219 lines
5.2 KiB
ArmAsm
#include "arm_arch.h"
|
|
|
|
.text
|
|
.code 32
|
|
|
|
@ Special note about using .byte directives to encode instructions.
|
|
@ Initial reason for hand-coding instructions was to allow module to
|
|
@ be compilable by legacy tool-chains. At later point it was pointed
|
|
@ out that since ARMv7, instructions are always encoded in little-endian
|
|
@ order, therefore one has to opt for endian-neutral presentation.
|
|
@ Contemporary tool-chains offer .inst directive for this purpose,
|
|
@ but not legacy ones. Therefore .byte. But there is an exception,
|
|
@ namely ARMv7-R profile still allows for big-endian encoding even for
|
|
@ instructions. This raises the question what if probe instructions
|
|
@ appear executable to such processor operating in big-endian order?
|
|
@ They have to be chosen in a way that avoids this problem. As failed
|
|
@ NEON probe disables a number of other probes we have to ensure that
|
|
@ only NEON probe instruction doesn't appear executable in big-endian
|
|
@ order, therefore 'vorr q8,q8,q8', and not some other register. The
|
|
@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
|
|
@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
|
|
@ that if fetched in alternative byte oder instruction should crash to
|
|
@ denote lack of probed capability...
|
|
|
|
.align 5
|
|
.global _armv7_neon_probe
|
|
.type _armv7_neon_probe,%function
|
|
_armv7_neon_probe:
|
|
.byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
.size _armv7_neon_probe,.-_armv7_neon_probe
|
|
|
|
.global _armv7_tick
|
|
.type _armv7_tick,%function
|
|
_armv7_tick:
|
|
.byte 0x06,0x00,0xa0,0xe1 @ mov r0,r6
|
|
.byte 0x1e,0x0f,0x51,0xec @ mrrc p15,1,r0,r1,c14 @ CNTVCT
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
nop
|
|
.size _armv7_tick,.-_armv7_tick
|
|
|
|
.global _armv8_aes_probe
|
|
.type _armv8_aes_probe,%function
|
|
_armv8_aes_probe:
|
|
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
.size _armv8_aes_probe,.-_armv8_aes_probe
|
|
|
|
.global _armv8_sha1_probe
|
|
.type _armv8_sha1_probe,%function
|
|
_armv8_sha1_probe:
|
|
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
.size _armv8_sha1_probe,.-_armv8_sha1_probe
|
|
|
|
.global _armv8_sha256_probe
|
|
.type _armv8_sha256_probe,%function
|
|
_armv8_sha256_probe:
|
|
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
.size _armv8_sha256_probe,.-_armv8_sha256_probe
|
|
.global _armv8_pmull_probe
|
|
.type _armv8_pmull_probe,%function
|
|
_armv8_pmull_probe:
|
|
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
|
|
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
|
|
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
|
|
|
.align 5
|
|
.global OPENSSL_atomic_add
|
|
.type OPENSSL_atomic_add,%function
|
|
OPENSSL_atomic_add:
|
|
#if __ARM_ARCH__>=6
|
|
.Ladd: ldrex r2,[r0]
|
|
add r3,r2,r1
|
|
strex r2,r3,[r0]
|
|
cmp r2,#0
|
|
bne .Ladd
|
|
mov r0,r3
|
|
bx lr
|
|
#else
|
|
stmdb sp!,{r4-r6,lr}
|
|
ldr r2,.Lspinlock
|
|
adr r3,.Lspinlock
|
|
mov r4,r0
|
|
mov r5,r1
|
|
add r6,r3,r2 @ &spinlock
|
|
b .+8
|
|
.Lspin: bl sched_yield
|
|
mov r0,#-1
|
|
swp r0,r0,[r6]
|
|
cmp r0,#0
|
|
bne .Lspin
|
|
|
|
ldr r2,[r4]
|
|
add r2,r2,r5
|
|
str r2,[r4]
|
|
str r0,[r6] @ release spinlock
|
|
ldmia sp!,{r4-r6,lr}
|
|
tst lr,#1
|
|
moveq pc,lr
|
|
.word 0xe12fff1e @ bx lr
|
|
#endif
|
|
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
|
|
|
.global OPENSSL_cleanse
|
|
.type OPENSSL_cleanse,%function
|
|
OPENSSL_cleanse:
|
|
eor ip,ip,ip
|
|
cmp r1,#7
|
|
subhs r1,r1,#4
|
|
bhs .Lot
|
|
cmp r1,#0
|
|
beq .Lcleanse_done
|
|
.Little:
|
|
strb ip,[r0],#1
|
|
subs r1,r1,#1
|
|
bhi .Little
|
|
b .Lcleanse_done
|
|
|
|
.Lot: tst r0,#3
|
|
beq .Laligned
|
|
strb ip,[r0],#1
|
|
sub r1,r1,#1
|
|
b .Lot
|
|
.Laligned:
|
|
str ip,[r0],#4
|
|
subs r1,r1,#4
|
|
bhs .Laligned
|
|
adds r1,r1,#4
|
|
bne .Little
|
|
.Lcleanse_done:
|
|
#if __ARM_ARCH__>=5
|
|
bx lr
|
|
#else
|
|
tst lr,#1
|
|
moveq pc,lr
|
|
.word 0xe12fff1e @ bx lr
|
|
#endif
|
|
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
|
|
|
.global OPENSSL_wipe_cpu
|
|
.type OPENSSL_wipe_cpu,%function
|
|
OPENSSL_wipe_cpu:
|
|
ldr r0,.LOPENSSL_armcap
|
|
adr r1,.LOPENSSL_armcap
|
|
ldr r0,[r1,r0]
|
|
eor r2,r2,r2
|
|
eor r3,r3,r3
|
|
eor ip,ip,ip
|
|
tst r0,#1
|
|
beq .Lwipe_done
|
|
.byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
|
|
.byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
|
|
.byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
|
|
.byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
|
|
.byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
|
|
.byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
|
|
.byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
|
|
.byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
|
|
.byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
|
|
.byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
|
|
.byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
|
|
.byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
|
|
.Lwipe_done:
|
|
mov r0,sp
|
|
#if __ARM_ARCH__>=5
|
|
bx lr
|
|
#else
|
|
tst lr,#1
|
|
moveq pc,lr
|
|
.word 0xe12fff1e @ bx lr
|
|
#endif
|
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
|
|
|
.global OPENSSL_instrument_bus
|
|
.type OPENSSL_instrument_bus,%function
|
|
OPENSSL_instrument_bus:
|
|
eor r0,r0,r0
|
|
#if __ARM_ARCH__>=5
|
|
bx lr
|
|
#else
|
|
tst lr,#1
|
|
moveq pc,lr
|
|
.word 0xe12fff1e @ bx lr
|
|
#endif
|
|
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
|
|
|
.global OPENSSL_instrument_bus2
|
|
.type OPENSSL_instrument_bus2,%function
|
|
OPENSSL_instrument_bus2:
|
|
eor r0,r0,r0
|
|
#if __ARM_ARCH__>=5
|
|
bx lr
|
|
#else
|
|
tst lr,#1
|
|
moveq pc,lr
|
|
.word 0xe12fff1e @ bx lr
|
|
#endif
|
|
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
|
|
|
.align 5
|
|
.LOPENSSL_armcap:
|
|
.word OPENSSL_armcap_P-.LOPENSSL_armcap
|
|
#if __ARM_ARCH__>=6
|
|
.align 5
|
|
#else
|
|
.Lspinlock:
|
|
.word atomic_add_spinlock-.Lspinlock
|
|
.align 5
|
|
|
|
.data
|
|
.align 2
|
|
atomic_add_spinlock:
|
|
.word 0
|
|
#endif
|
|
|
|
.comm OPENSSL_armcap_P,4,4
|
|
.hidden OPENSSL_armcap_P
|