SHA1 for PowerPC.
This commit is contained in:
parent
2c5d4daac5
commit
a9c32ace06
6 changed files with 333 additions and 11 deletions
18
Configure
18
Configure
|
@ -314,7 +314,7 @@ my %table=(
|
|||
# *-generic* is endian-neutral target, but ./config is free to
|
||||
# throw in -D[BL]_ENDIAN, whichever appropriate...
|
||||
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
#### IA-32 targets...
|
||||
"linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
|
@ -322,7 +322,7 @@ my %table=(
|
|||
####
|
||||
"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
# -bpowerpc64-linux is transient option, -m64 should be the one to use...
|
||||
"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
|
@ -407,12 +407,12 @@ my %table=(
|
|||
|
||||
#### IBM's AIX.
|
||||
"aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
|
||||
"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:",
|
||||
"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn::::::-X64",
|
||||
"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:",
|
||||
"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64",
|
||||
# Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
|
||||
# at build time. $OBJECT_MODE is respected at ./config stage!
|
||||
"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
|
||||
"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
|
||||
"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
|
||||
"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
|
||||
|
||||
#
|
||||
# Cray T90 and similar (SDSC)
|
||||
|
@ -504,10 +504,10 @@ my %table=(
|
|||
|
||||
##### MacOS X (a.k.a. Rhapsody or Darwin) setup
|
||||
"rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::",
|
||||
"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
|
||||
##### A/UX
|
||||
"aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::",
|
||||
|
|
|
@ -206,7 +206,8 @@
|
|||
: "cc"); \
|
||||
ret; \
|
||||
})
|
||||
# elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
|
||||
# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
|
||||
defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
|
||||
# define ROTATE(a,n) ({ register unsigned int ret; \
|
||||
asm ( \
|
||||
"rlwinm %0,%1,%2,0,31" \
|
||||
|
|
|
@ -71,6 +71,11 @@ sha256-x86_64.s: asm/sha512-x86_64.pl
|
|||
sha512-x86_64.s: asm/sha512-x86_64.pl
|
||||
$(PERL) asm/sha512-x86_64.pl $@
|
||||
|
||||
sha1-ppc_aix32.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@
|
||||
sha1-ppc_aix64.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@
|
||||
# non-AIX targets are believed to be armed with GNU make
|
||||
sha1-ppc_%.s: asm/sha1-ppc.pl; $(PERL) $< $@
|
||||
|
||||
files:
|
||||
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
|
||||
|
||||
|
|
309
crypto/sha/asm/sha1-ppc.pl
Executable file
309
crypto/sha/asm/sha1-ppc.pl
Executable file
|
@ -0,0 +1,309 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
# project. Rights for redistribution and usage in source and binary
|
||||
# forms are granted according to the OpenSSL license.
|
||||
# ====================================================================
|
||||
|
||||
# I let hardware handle unaligned input, except on page boundaries
|
||||
# (see below for details). Otherwise straightforward implementation
|
||||
# with X vector in register bank. The module is big-endian [which is
|
||||
# not big deal as there're no little-endian targets left around].
|
||||
|
||||
# gcc-4.0.0 -m64 -m32
|
||||
# --------------------------
|
||||
# sha1 +76% +59%
|
||||
|
||||
$output = shift;
|
||||
|
||||
if ($output =~ /64\.s/) {
|
||||
$SIZE_T =8;
|
||||
$RZONE =288;
|
||||
$UCMP ="cmpld";
|
||||
$STU ="stdu";
|
||||
$POP ="ld";
|
||||
$PUSH ="std";
|
||||
} elsif ($output =~ /32\.s/) {
|
||||
$SIZE_T =4;
|
||||
$RZONE =224;
|
||||
$UCMP ="cmplw";
|
||||
$STU ="stwu";
|
||||
$POP ="lwz";
|
||||
$PUSH ="stw";
|
||||
} else { die "nonsense $output"; }
|
||||
|
||||
( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) ||
|
||||
die "can't call ../perlasm/ppc-xlate.pl: $!";
|
||||
|
||||
$FRAME=24*$SIZE_T;
|
||||
|
||||
$K ="r0";
|
||||
$sp ="r1";
|
||||
$toc="r2";
|
||||
$ctx="r3";
|
||||
$inp="r4";
|
||||
$num="r5";
|
||||
$t0 ="r15";
|
||||
$t1 ="r6";
|
||||
|
||||
$A ="r7";
|
||||
$B ="r8";
|
||||
$C ="r9";
|
||||
$D ="r10";
|
||||
$E ="r11";
|
||||
$T ="r12";
|
||||
|
||||
@V=($A,$B,$C,$D,$E,$T);
|
||||
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
|
||||
"r24","r25","r26","r27","r28","r29","r30","r31");
|
||||
|
||||
sub BODY_00_19 {
|
||||
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___ if ($i==0);
|
||||
lwz @X[$i],$i*4($inp)
|
||||
___
|
||||
$code.=<<___ if ($i<15);
|
||||
lwz @X[$j],$j*4($inp)
|
||||
add $f,$K,$e
|
||||
rotlwi $e,$a,5
|
||||
add $f,$f,@X[$i]
|
||||
and $t0,$c,$b
|
||||
add $f,$f,$e
|
||||
andc $t1,$d,$b
|
||||
rotlwi $b,$b,30
|
||||
or $t0,$t0,$t1
|
||||
add $f,$f,$t0
|
||||
___
|
||||
$code.=<<___ if ($i>=15);
|
||||
add $f,$K,$e
|
||||
rotlwi $e,$a,5
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
||||
add $f,$f,@X[$i%16]
|
||||
and $t0,$c,$b
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
||||
add $f,$f,$e
|
||||
andc $t1,$d,$b
|
||||
rotlwi $b,$b,30
|
||||
or $t0,$t0,$t1
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
||||
add $f,$f,$t0
|
||||
rotlwi @X[$j%16],@X[$j%16],1
|
||||
___
|
||||
}
|
||||
|
||||
sub BODY_20_39 {
|
||||
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___ if ($i<79);
|
||||
add $f,$K,$e
|
||||
rotlwi $e,$a,5
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
||||
add $f,$f,@X[$i%16]
|
||||
xor $t0,$b,$c
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
||||
add $f,$f,$e
|
||||
rotlwi $b,$b,30
|
||||
xor $t0,$t0,$d
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
||||
add $f,$f,$t0
|
||||
rotlwi @X[$j%16],@X[$j%16],1
|
||||
___
|
||||
$code.=<<___ if ($i==79);
|
||||
add $f,$K,$e
|
||||
rotlwi $e,$a,5
|
||||
lwz r16,0($ctx)
|
||||
add $f,$f,@X[$i%16]
|
||||
xor $t0,$b,$c
|
||||
lwz r17,4($ctx)
|
||||
add $f,$f,$e
|
||||
rotlwi $b,$b,30
|
||||
lwz r18,8($ctx)
|
||||
xor $t0,$t0,$d
|
||||
lwz r19,12($ctx)
|
||||
add $f,$f,$t0
|
||||
lwz r20,16($ctx)
|
||||
___
|
||||
}
|
||||
|
||||
sub BODY_40_59 {
|
||||
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___;
|
||||
add $f,$K,$e
|
||||
rotlwi $e,$a,5
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
||||
add $f,$f,@X[$i%16]
|
||||
and $t0,$b,$c
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
||||
add $f,$f,$e
|
||||
or $t1,$b,$c
|
||||
rotlwi $b,$b,30
|
||||
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
||||
and $t1,$t1,$d
|
||||
or $t0,$t0,$t1
|
||||
rotlwi @X[$j%16],@X[$j%16],1
|
||||
add $f,$f,$t0
|
||||
___
|
||||
}
|
||||
|
||||
$code=<<___;
|
||||
.text
|
||||
|
||||
.globl .sha1_block_asm_data_order
|
||||
.align 4
|
||||
.sha1_block_asm_data_order:
|
||||
mflr r0
|
||||
$STU $sp,`-($FRAME+64+$RZONE)`($sp)
|
||||
$PUSH r0,`$FRAME-$SIZE_T*18`($sp)
|
||||
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
|
||||
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
|
||||
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
|
||||
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
|
||||
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
|
||||
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
|
||||
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
|
||||
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
|
||||
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
|
||||
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
|
||||
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
|
||||
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
|
||||
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
|
||||
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
|
||||
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
|
||||
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
|
||||
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
|
||||
lwz $A,0($ctx)
|
||||
lwz $B,4($ctx)
|
||||
lwz $C,8($ctx)
|
||||
lwz $D,12($ctx)
|
||||
lwz $E,16($ctx)
|
||||
andi. r0,$inp,3
|
||||
bne Lunaligned
|
||||
Laligned:
|
||||
mtctr $num
|
||||
bl Lsha1_block_private
|
||||
Ldone:
|
||||
$POP r0,`$FRAME-$SIZE_T*18`($sp)
|
||||
$POP r15,`$FRAME-$SIZE_T*17`($sp)
|
||||
$POP r16,`$FRAME-$SIZE_T*16`($sp)
|
||||
$POP r17,`$FRAME-$SIZE_T*15`($sp)
|
||||
$POP r18,`$FRAME-$SIZE_T*14`($sp)
|
||||
$POP r19,`$FRAME-$SIZE_T*13`($sp)
|
||||
$POP r20,`$FRAME-$SIZE_T*12`($sp)
|
||||
$POP r21,`$FRAME-$SIZE_T*11`($sp)
|
||||
$POP r22,`$FRAME-$SIZE_T*10`($sp)
|
||||
$POP r23,`$FRAME-$SIZE_T*9`($sp)
|
||||
$POP r24,`$FRAME-$SIZE_T*8`($sp)
|
||||
$POP r25,`$FRAME-$SIZE_T*7`($sp)
|
||||
$POP r26,`$FRAME-$SIZE_T*6`($sp)
|
||||
$POP r27,`$FRAME-$SIZE_T*5`($sp)
|
||||
$POP r28,`$FRAME-$SIZE_T*4`($sp)
|
||||
$POP r29,`$FRAME-$SIZE_T*3`($sp)
|
||||
$POP r30,`$FRAME-$SIZE_T*2`($sp)
|
||||
$POP r31,`$FRAME-$SIZE_T*1`($sp)
|
||||
mtlr r0
|
||||
addi $sp,$sp,`$FRAME+64+$RZONE`
|
||||
blr
|
||||
___
|
||||
|
||||
# PowerPC specification allows an implementation to be ill-behaved
|
||||
# upon unaligned access which crosses page boundary. "Better safe
|
||||
# than sorry" principle makes me treat it specially. But I don't
|
||||
# look for particular offending word, but rather for 64-byte input
|
||||
# block which crosses the boundary. Once found that block is aligned
|
||||
# and hashed separately...
|
||||
$code.=<<___;
|
||||
.align 4
|
||||
Lunaligned:
|
||||
li $t1,4096
|
||||
subf $t1,$inp,$t1
|
||||
andi. $t1,$t1,4095 ; distance to closest page boundary
|
||||
srwi. $t1,$t1,6 ; t1/=64
|
||||
beq Lcross_page
|
||||
$UCMP $num,$t1
|
||||
ble- Laligned ; didn't cross the page boundary
|
||||
mtctr $t1
|
||||
subf $num,$t1,$num
|
||||
bl Lsha1_block_private
|
||||
Lcross_page:
|
||||
li $t1,16
|
||||
mtctr $t1
|
||||
addi r20,$sp,$FRAME ; spot below the frame
|
||||
Lmemcpy:
|
||||
lbz r16,0($inp)
|
||||
lbz r17,1($inp)
|
||||
lbz r18,2($inp)
|
||||
lbz r19,3($inp)
|
||||
addi $inp,$inp,4
|
||||
stb r16,0(r20)
|
||||
stb r17,1(r20)
|
||||
stb r18,2(r20)
|
||||
stb r19,3(r20)
|
||||
addi r20,r20,4
|
||||
bdnz Lmemcpy
|
||||
|
||||
$PUSH $inp,`$FRAME-$SIZE_T*19`($sp)
|
||||
li $t1,1
|
||||
addi $inp,$sp,$FRAME
|
||||
mtctr $t1
|
||||
bl Lsha1_block_private
|
||||
$POP $inp,`$FRAME-$SIZE_T*19`($sp)
|
||||
addic. $num,$num,-1
|
||||
bne- Lunaligned
|
||||
b Ldone
|
||||
___
|
||||
|
||||
# This is private block function, which uses tailored calling
|
||||
# interface, namely upon entry SHA_CTX is pre-loaded to given
|
||||
# registers and counter register contains amount of chunks to
|
||||
# digest...
|
||||
$code.=<<___;
|
||||
.align 4
|
||||
Lsha1_block_private:
|
||||
___
|
||||
$code.=<<___; # load K_00_19
|
||||
lis $K,0x5a82
|
||||
ori $K,$K,0x7999
|
||||
___
|
||||
for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
|
||||
$code.=<<___; # load K_20_39
|
||||
lis $K,0x6ed9
|
||||
ori $K,$K,0xeba1
|
||||
___
|
||||
for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
||||
$code.=<<___; # load K_40_59
|
||||
lis $K,0x8f1b
|
||||
ori $K,$K,0xbcdc
|
||||
___
|
||||
for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
||||
$code.=<<___; # load K_60_79
|
||||
lis $K,0xca62
|
||||
ori $K,$K,0xc1d6
|
||||
___
|
||||
for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
||||
$code.=<<___;
|
||||
add r16,r16,$E
|
||||
add r17,r17,$T
|
||||
add r18,r18,$A
|
||||
add r19,r19,$B
|
||||
add r20,r20,$C
|
||||
stw r16,0($ctx)
|
||||
mr $A,r16
|
||||
stw r17,4($ctx)
|
||||
mr $B,r17
|
||||
stw r18,8($ctx)
|
||||
mr $C,r18
|
||||
stw r19,12($ctx)
|
||||
mr $D,r19
|
||||
stw r20,16($ctx)
|
||||
mr $E,r20
|
||||
addi $inp,$inp,`16*4`
|
||||
bdnz- Lsha1_block_private
|
||||
blr
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
|
@ -330,7 +330,7 @@ static const SHA_LONG64 K512[80] = {
|
|||
: "0"(p[1]),"1"(p[0])); \
|
||||
((SHA_LONG64)hi)<<32|lo; })
|
||||
# endif
|
||||
# elif defined(_ARCH_PPC) && defined(__64BIT__)
|
||||
# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
|
||||
# define ROTR(a,n) ({ unsigned long ret; \
|
||||
asm ("rotrdi %0,%1,%2" \
|
||||
: "=r"(ret) \
|
||||
|
|
|
@ -127,6 +127,13 @@
|
|||
# define DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
||||
# define sha1_block_data_order sha1_block_asm_data_order
|
||||
# define DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
||||
# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
|
||||
defined(__ppc) || defined(__ppc__) || defined(__powerpc) || \
|
||||
defined(__ppc64) || defined(__ppc64__) || defined(__powerpc64)
|
||||
# define sha1_block_host_order sha1_block_asm_data_order
|
||||
# define DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
||||
# define sha1_block_data_order sha1_block_asm_data_order
|
||||
# define DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
||||
# endif
|
||||
# endif
|
||||
void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num);
|
||||
|
|
Loading…
Reference in a new issue