ARM assembler pack: add platform run-time detection.

This commit is contained in:
Andy Polyakov 2011-07-17 17:40:29 +00:00
parent 9fe51d5f73
commit 87873f4328
9 changed files with 252 additions and 14 deletions

View file

@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::";
my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::";
my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o";
my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";

4
TABLE
View file

@ -1032,7 +1032,7 @@ $thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$cpuid_obj = armcap.o armv4cpuid.o
$bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o
$des_obj =
$aes_obj = aes_cbc.o aes-armv4.o
@ -3688,7 +3688,7 @@ $thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$cpuid_obj = armcap.o armv4cpuid.o
$bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o
$des_obj =
$aes_obj = aes_cbc.o aes-armv4.o

1
config
View file

@ -630,6 +630,7 @@ case "$GUESSOS" in
options="$options -DB_ENDIAN -mschedule=$CPUSCHEDULE -march=$CPUARCH"
OUT="linux-generic32" ;;
armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
arm*-*-linux2) OUT="linux-armv4" ;;
sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;

View file

@ -18,7 +18,7 @@
*/
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
defined(__ARM_ARCH_7EM)
defined(__ARM_ARCH_7EM__)
# define __ARM_ARCH__ 7
# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \
@ -39,5 +39,12 @@
#include <openssl/fipssyms.h>
#endif
#if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
#define ARMV7_NEON (1<<0)
#define ARMV7_TICK (1<<1)
#endif
#endif
#endif

77
crypto/armcap.c Normal file
View file

@ -0,0 +1,77 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
#include <signal.h>
#include <crypto.h>
#include "arm_arch.h"
unsigned int OPENSSL_armcap_P;
static sigset_t all_masked;
static sigjmp_buf ill_jmp;
static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
/*
* Following subroutines could have been inlined, but it's not all
* ARM compilers support inline assembler...
*/
void _armv7_neon_probe(void);
unsigned int _armv7_tick(void);
unsigned int OPENSSL_rdtsc(void)
{
if (OPENSSL_armcap_P|ARMV7_TICK)
return _armv7_tick();
else
return 0;
}
void OPENSSL_cpuid_setup(void)
{
char *e;
struct sigaction ill_oact,ill_act;
sigset_t oset;
static int trigger=0;
if (trigger) return;
trigger=1;
if ((e=getenv("OPENSSL_armcap")))
{
OPENSSL_armcap_P=strtoul(e,NULL,0);
return;
}
sigfillset(&all_masked);
sigdelset(&all_masked,SIGILL);
sigdelset(&all_masked,SIGTRAP);
sigdelset(&all_masked,SIGFPE);
sigdelset(&all_masked,SIGBUS);
sigdelset(&all_masked,SIGSEGV);
OPENSSL_armcap_P = 0;
memset(&ill_act,0,sizeof(ill_act));
ill_act.sa_handler = ill_handler;
ill_act.sa_mask = all_masked;
sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
sigaction(SIGILL,&ill_act,&ill_oact);
if (sigsetjmp(ill_jmp,1) == 0)
{
_armv7_neon_probe();
OPENSSL_armcap_P |= ARMV7_NEON;
}
if (sigsetjmp(ill_jmp,1) == 0)
{
_armv7_tick();
OPENSSL_armcap_P |= ARMV7_TICK;
}
sigaction (SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL);
}

154
crypto/armv4cpuid.S Normal file
View file

@ -0,0 +1,154 @@
#include "arm_arch.h"
.text
.code 32
.align 5
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
.word 0xf26ee1fe @ vorr q15,q15,q15
.word 0xe12fff1e @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
mrc p15,0,r0,c9,c13,0
.word 0xe12fff1e @ bx lr
.size _armv7_tick,.-_armv7_tick
.global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
OPENSSL_atomic_add:
#if __ARM_ARCH__>=6
.Ladd: ldrex r2,[r0]
add r3,r2,r1
strex r2,r3,[r0]
cmp r2,#0
bne .Ladd
mov r0,r3
.word 0xe12fff1e @ bx lr
#else
stmdb sp!,{r4-r6,lr}
ldr r2,.Lspinlock
adr r3,.Lspinlock
mov r4,r0
mov r5,r1
add r6,r3,r2 @ &spinlock
b .+8
.Lspin: bl sched_yield
mov r0,#-1
swp r0,r0,[r6]
cmp r0,#0
bne .Lspin
ldr r2,[r4]
add r2,r5
str r2,[r4]
str r0,[r6] @ release spinlock
ldmia sp!,{r4-r6,lr}
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.global OPENSSL_cleanse
.type OPENSSL_cleanse,%function
OPENSSL_cleanse:
eor ip,ip,ip
cmp r1,#7
subhs r1,#4
bhs .Lot
cmp r1,#0
beq .Lcleanse_done
.Little:
strb ip,[r0],#1
subs r1,#1
bhi .Little
b .Lcleanse_done
.Lot: tst r0,#3
beq .Laligned
strb ip,[r0],#1
sub r1,#1
b .Lot
.Laligned:
str ip,[r0],#4
subs r1,#4
bhs .Laligned
adds r1,#4
bne .Little
.Lcleanse_done:
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
tst r0,#1
beq .Lwipe_done
.word 0xf3000150 @ veor q0, q0, q0
.word 0xf3022152 @ veor q1, q1, q1
.word 0xf3044154 @ veor q2, q2, q2
.word 0xf3066156 @ veor q3, q3, q3
.word 0xf34001f0 @ veor q8, q8, q8
.word 0xf34221f2 @ veor q9, q9, q9
.word 0xf34441f4 @ veor q10, q10, q10
.word 0xf34661f6 @ veor q11, q11, q11
.word 0xf34881f8 @ veor q12, q12, q12
.word 0xf34aa1fa @ veor q13, q13, q13
.word 0xf34cc1fc @ veor q14, q14, q14
.word 0xf34ee1fe @ veor q15, q15, q15
.Lwipe_done:
mov r0,sp
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.global OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.global OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.LOPENSSL_armcap
#if __ARM_ARCH__>=6
.align 5
#else
.Lspinlock:
.word atomic_add_spinlock-.Lspinlock
.align 5
.data
.align 2
atomic_add_spinlock:
.word 0
#endif
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P

View file

@ -264,12 +264,12 @@ $code.=<<___;
#if __ARM_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap-(.Lpic+8)
.word OPENSSL_armcap_P-(.Lpic+8)
#endif
.asciz "GF2m Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
.comm OPENSSL_armcap,4,4
.comm OPENSSL_armcap_P,4,4
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;

View file

@ -668,8 +668,6 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
# if __ARM_ARCH__>=7
# define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT
extern unsigned int OPENSSL_armcap;
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif
@ -715,7 +713,8 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
#elif TABLE_BITS==4
# if defined(GHASH_ASM_X86_OR_64)
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
if (OPENSSL_ia32cap_P[1]&(1<<1)) { /* check PCLMULQDQ bit */
if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
gcm_init_clmul(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_clmul;
ctx->ghash = gcm_ghash_clmul;
@ -736,7 +735,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
ctx->ghash = gcm_ghash_4bit;
# endif
# elif defined(GHASH_ASM_ARM)
if (OPENSSL_armcap & 1) {
if (OPENSSL_armcap_P & ARMV7_NEON) {
ctx->gmult = gcm_gmult_neon;
ctx->ghash = gcm_ghash_neon;
} else {

View file

@ -221,7 +221,7 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
.LOPENSSL_armcap:
.word OPENSSL_armcap-sha512_block_data_order
.word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4
.global sha512_block_data_order
@ -231,7 +231,7 @@ sha512_block_data_order:
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
#if __ARM_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1
bne .LNEON
#endif
@ -573,7 +573,7 @@ $code.=<<___;
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
.comm OPENSSL_armcap,4,4
.comm OPENSSL_armcap_P,4,4
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;