ARM assembler pack: add platform run-time detection.
This commit is contained in:
parent
9fe51d5f73
commit
87873f4328
9 changed files with 252 additions and 14 deletions
|
@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
|
|||
my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::";
|
||||
my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::";
|
||||
my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o";
|
||||
my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
|
||||
my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
|
||||
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
|
||||
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
|
||||
my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
|
||||
|
|
4
TABLE
4
TABLE
|
@ -1032,7 +1032,7 @@ $thread_cflag = -D_REENTRANT
|
|||
$sys_id =
|
||||
$lflags = -ldl
|
||||
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
|
||||
$cpuid_obj =
|
||||
$cpuid_obj = armcap.o armv4cpuid.o
|
||||
$bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o
|
||||
$des_obj =
|
||||
$aes_obj = aes_cbc.o aes-armv4.o
|
||||
|
@ -3688,7 +3688,7 @@ $thread_cflag = -D_REENTRANT
|
|||
$sys_id =
|
||||
$lflags = -ldl
|
||||
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
|
||||
$cpuid_obj =
|
||||
$cpuid_obj = armcap.o armv4cpuid.o
|
||||
$bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o
|
||||
$des_obj =
|
||||
$aes_obj = aes_cbc.o aes-armv4.o
|
||||
|
|
1
config
1
config
|
@ -630,6 +630,7 @@ case "$GUESSOS" in
|
|||
options="$options -DB_ENDIAN -mschedule=$CPUSCHEDULE -march=$CPUARCH"
|
||||
OUT="linux-generic32" ;;
|
||||
armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
|
||||
armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
|
||||
arm*-*-linux2) OUT="linux-armv4" ;;
|
||||
sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
|
||||
sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
|
||||
defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
|
||||
defined(__ARM_ARCH_7EM)
|
||||
defined(__ARM_ARCH_7EM__)
|
||||
# define __ARM_ARCH__ 7
|
||||
# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
|
||||
defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \
|
||||
|
@ -39,5 +39,12 @@
|
|||
#include <openssl/fipssyms.h>
|
||||
#endif
|
||||
|
||||
#if !__ASSEMBLER__
|
||||
extern unsigned int OPENSSL_armcap_P;
|
||||
|
||||
#define ARMV7_NEON (1<<0)
|
||||
#define ARMV7_TICK (1<<1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
77
crypto/armcap.c
Normal file
77
crypto/armcap.c
Normal file
|
@ -0,0 +1,77 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <setjmp.h>
|
||||
#include <signal.h>
|
||||
#include <crypto.h>
|
||||
|
||||
#include "arm_arch.h"
|
||||
|
||||
unsigned int OPENSSL_armcap_P;
|
||||
|
||||
static sigset_t all_masked;
|
||||
|
||||
static sigjmp_buf ill_jmp;
|
||||
static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
|
||||
|
||||
/*
|
||||
* Following subroutines could have been inlined, but it's not all
|
||||
* ARM compilers support inline assembler...
|
||||
*/
|
||||
void _armv7_neon_probe(void);
|
||||
unsigned int _armv7_tick(void);
|
||||
|
||||
unsigned int OPENSSL_rdtsc(void)
|
||||
{
|
||||
if (OPENSSL_armcap_P|ARMV7_TICK)
|
||||
return _armv7_tick();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void OPENSSL_cpuid_setup(void)
|
||||
{
|
||||
char *e;
|
||||
struct sigaction ill_oact,ill_act;
|
||||
sigset_t oset;
|
||||
static int trigger=0;
|
||||
|
||||
if (trigger) return;
|
||||
trigger=1;
|
||||
|
||||
if ((e=getenv("OPENSSL_armcap")))
|
||||
{
|
||||
OPENSSL_armcap_P=strtoul(e,NULL,0);
|
||||
return;
|
||||
}
|
||||
|
||||
sigfillset(&all_masked);
|
||||
sigdelset(&all_masked,SIGILL);
|
||||
sigdelset(&all_masked,SIGTRAP);
|
||||
sigdelset(&all_masked,SIGFPE);
|
||||
sigdelset(&all_masked,SIGBUS);
|
||||
sigdelset(&all_masked,SIGSEGV);
|
||||
|
||||
OPENSSL_armcap_P = 0;
|
||||
|
||||
memset(&ill_act,0,sizeof(ill_act));
|
||||
ill_act.sa_handler = ill_handler;
|
||||
ill_act.sa_mask = all_masked;
|
||||
|
||||
sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
|
||||
sigaction(SIGILL,&ill_act,&ill_oact);
|
||||
|
||||
if (sigsetjmp(ill_jmp,1) == 0)
|
||||
{
|
||||
_armv7_neon_probe();
|
||||
OPENSSL_armcap_P |= ARMV7_NEON;
|
||||
}
|
||||
if (sigsetjmp(ill_jmp,1) == 0)
|
||||
{
|
||||
_armv7_tick();
|
||||
OPENSSL_armcap_P |= ARMV7_TICK;
|
||||
}
|
||||
|
||||
sigaction (SIGILL,&ill_oact,NULL);
|
||||
sigprocmask(SIG_SETMASK,&oset,NULL);
|
||||
}
|
154
crypto/armv4cpuid.S
Normal file
154
crypto/armv4cpuid.S
Normal file
|
@ -0,0 +1,154 @@
|
|||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
.code 32
|
||||
|
||||
.align 5
|
||||
.global _armv7_neon_probe
|
||||
.type _armv7_neon_probe,%function
|
||||
_armv7_neon_probe:
|
||||
.word 0xf26ee1fe @ vorr q15,q15,q15
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size _armv7_neon_probe,.-_armv7_neon_probe
|
||||
|
||||
.global _armv7_tick
|
||||
.type _armv7_tick,%function
|
||||
_armv7_tick:
|
||||
mrc p15,0,r0,c9,c13,0
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size _armv7_tick,.-_armv7_tick
|
||||
|
||||
.global OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,%function
|
||||
OPENSSL_atomic_add:
|
||||
#if __ARM_ARCH__>=6
|
||||
.Ladd: ldrex r2,[r0]
|
||||
add r3,r2,r1
|
||||
strex r2,r3,[r0]
|
||||
cmp r2,#0
|
||||
bne .Ladd
|
||||
mov r0,r3
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#else
|
||||
stmdb sp!,{r4-r6,lr}
|
||||
ldr r2,.Lspinlock
|
||||
adr r3,.Lspinlock
|
||||
mov r4,r0
|
||||
mov r5,r1
|
||||
add r6,r3,r2 @ &spinlock
|
||||
b .+8
|
||||
.Lspin: bl sched_yield
|
||||
mov r0,#-1
|
||||
swp r0,r0,[r6]
|
||||
cmp r0,#0
|
||||
bne .Lspin
|
||||
|
||||
ldr r2,[r4]
|
||||
add r2,r5
|
||||
str r2,[r4]
|
||||
str r0,[r6] @ release spinlock
|
||||
ldmia sp!,{r4-r6,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.global OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,%function
|
||||
OPENSSL_cleanse:
|
||||
eor ip,ip,ip
|
||||
cmp r1,#7
|
||||
subhs r1,#4
|
||||
bhs .Lot
|
||||
cmp r1,#0
|
||||
beq .Lcleanse_done
|
||||
.Little:
|
||||
strb ip,[r0],#1
|
||||
subs r1,#1
|
||||
bhi .Little
|
||||
b .Lcleanse_done
|
||||
|
||||
.Lot: tst r0,#3
|
||||
beq .Laligned
|
||||
strb ip,[r0],#1
|
||||
sub r1,#1
|
||||
b .Lot
|
||||
.Laligned:
|
||||
str ip,[r0],#4
|
||||
subs r1,#4
|
||||
bhs .Laligned
|
||||
adds r1,#4
|
||||
bne .Little
|
||||
.Lcleanse_done:
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.global OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,%function
|
||||
OPENSSL_wipe_cpu:
|
||||
ldr r0,.LOPENSSL_armcap
|
||||
adr r1,.LOPENSSL_armcap
|
||||
ldr r0,[r1,r0]
|
||||
eor r2,r2,r2
|
||||
eor r3,r3,r3
|
||||
eor ip,ip,ip
|
||||
tst r0,#1
|
||||
beq .Lwipe_done
|
||||
.word 0xf3000150 @ veor q0, q0, q0
|
||||
.word 0xf3022152 @ veor q1, q1, q1
|
||||
.word 0xf3044154 @ veor q2, q2, q2
|
||||
.word 0xf3066156 @ veor q3, q3, q3
|
||||
.word 0xf34001f0 @ veor q8, q8, q8
|
||||
.word 0xf34221f2 @ veor q9, q9, q9
|
||||
.word 0xf34441f4 @ veor q10, q10, q10
|
||||
.word 0xf34661f6 @ veor q11, q11, q11
|
||||
.word 0xf34881f8 @ veor q12, q12, q12
|
||||
.word 0xf34aa1fa @ veor q13, q13, q13
|
||||
.word 0xf34cc1fc @ veor q14, q14, q14
|
||||
.word 0xf34ee1fe @ veor q15, q15, q15
|
||||
.Lwipe_done:
|
||||
mov r0,sp
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
|
||||
.global OPENSSL_instrument_bus
|
||||
.type OPENSSL_instrument_bus,%function
|
||||
OPENSSL_instrument_bus:
|
||||
eor r0,r0,r0
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||
|
||||
.global OPENSSL_instrument_bus2
|
||||
.type OPENSSL_instrument_bus2,%function
|
||||
OPENSSL_instrument_bus2:
|
||||
eor r0,r0,r0
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.LOPENSSL_armcap
|
||||
#if __ARM_ARCH__>=6
|
||||
.align 5
|
||||
#else
|
||||
.Lspinlock:
|
||||
.word atomic_add_spinlock-.Lspinlock
|
||||
.align 5
|
||||
|
||||
.data
|
||||
.align 2
|
||||
atomic_add_spinlock:
|
||||
.word 0
|
||||
#endif
|
||||
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
|
@ -264,12 +264,12 @@ $code.=<<___;
|
|||
#if __ARM_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap-(.Lpic+8)
|
||||
.word OPENSSL_armcap_P-(.Lpic+8)
|
||||
#endif
|
||||
.asciz "GF2m Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 5
|
||||
|
||||
.comm OPENSSL_armcap,4,4
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
|
|
|
@ -668,8 +668,6 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
|
|||
# if __ARM_ARCH__>=7
|
||||
# define GHASH_ASM_ARM
|
||||
# define GCM_FUNCREF_4BIT
|
||||
extern unsigned int OPENSSL_armcap;
|
||||
|
||||
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
|
||||
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
|
||||
# endif
|
||||
|
@ -715,7 +713,8 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
|
|||
#elif TABLE_BITS==4
|
||||
# if defined(GHASH_ASM_X86_OR_64)
|
||||
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
|
||||
if (OPENSSL_ia32cap_P[1]&(1<<1)) { /* check PCLMULQDQ bit */
|
||||
if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
|
||||
OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
|
||||
gcm_init_clmul(ctx->Htable,ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_clmul;
|
||||
ctx->ghash = gcm_ghash_clmul;
|
||||
|
@ -736,7 +735,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
|
|||
ctx->ghash = gcm_ghash_4bit;
|
||||
# endif
|
||||
# elif defined(GHASH_ASM_ARM)
|
||||
if (OPENSSL_armcap & 1) {
|
||||
if (OPENSSL_armcap_P & ARMV7_NEON) {
|
||||
ctx->gmult = gcm_gmult_neon;
|
||||
ctx->ghash = gcm_ghash_neon;
|
||||
} else {
|
||||
|
|
|
@ -221,7 +221,7 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
|
|||
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
|
||||
.size K512,.-K512
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap-sha512_block_data_order
|
||||
.word OPENSSL_armcap_P-sha512_block_data_order
|
||||
.skip 32-4
|
||||
|
||||
.global sha512_block_data_order
|
||||
|
@ -231,7 +231,7 @@ sha512_block_data_order:
|
|||
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
|
||||
#if __ARM_ARCH__>=7
|
||||
ldr r12,.LOPENSSL_armcap
|
||||
ldr r12,[r3,r12] @ OPENSSL_armcap
|
||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
||||
tst r12,#1
|
||||
bne .LNEON
|
||||
#endif
|
||||
|
@ -573,7 +573,7 @@ $code.=<<___;
|
|||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
.comm OPENSSL_armcap,4,4
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
|
|
Loading…
Reference in a new issue