MIPS assembly pack: jumbo update from HEAD.

This commit is contained in:
Andy Polyakov 2012-09-19 20:59:18 +00:00
parent 9a7f80c869
commit 988037fe18
7 changed files with 1100 additions and 278 deletions

View file

@ -4,6 +4,10 @@
Changes between 1.0.1 and 1.0.2 [xx XXX xxxx]
*) MIPS assembly pack updates: support for MIPS32r2 and SmartMIPS ASE,
platform support for Linux and Android.
[Andy Polyakov]
*) Call OCSP Stapling callback after ciphersuite has been chosen, so
the right response is stapled. Also change current certificate to
the certificate actually sent.

View file

@ -348,6 +348,13 @@ my %table=(
# It's believed that majority of ARM toolchains predefine appropriate -march.
# If you compiler does not, do complement config command line with one!
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# Configure script adds minimally required -march for assembly support,
# if no -march was specified at command line. mips32 and mips64 below
# refer to contemporary MIPS Architecture specifications, MIPS32 and
# MIPS64, rather than to kernel bitness.
"linux-mips32", "gcc:-mabi=32 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-mips64", "gcc:-mabi=n32 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:n32:dlfcn:linux-shared:-fPIC:-mabi=n32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::32",
"linux64-mips64", "gcc:-mabi=64 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:64:dlfcn:linux-shared:-fPIC:-mabi=64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
#### IA-32 targets...
"linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@ -406,6 +413,7 @@ my %table=(
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"android-mips","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### *BSD [do see comment about ${BSDthreads} above!]
"BSD-generic32","gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@ -1202,6 +1210,12 @@ if ($target =~ /^mingw/ && `$cc --target-help 2>&1` !~ m/\-mno\-cygwin/m)
$shared_ldflag =~ s/\-mno\-cygwin\s*//;
}
if ($target =~ /linux.*\-mips/ && !$no_asm && $flags !~ /\-m(ips|arch=)/) {
# minimally required architecture flags for assembly modules
$cflags="-mips2 $cflags" if ($target =~ /mips32/);
$cflags="-mips3 $cflags" if ($target =~ /mips64/);
}
my $no_shared_warn=0;
my $no_user_cflags=0;

132
TABLE
View file

@ -1089,6 +1089,39 @@ $ranlib =
$arflags =
$multilib =
*** android-mips
$cc = gcc
$cflags = -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -Wall
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$bn_obj = bn-mips.o mips-mont.o
$des_obj =
$aes_obj = aes_cbc.o aes-mips.o
$bf_obj =
$md5_obj =
$sha1_obj = sha1-mips.o sha256-mips.o
$cast_obj =
$rc4_obj =
$rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$engines_obj =
$perlasm_scheme = o32
$dso_scheme = dlfcn
$shared_target= linux-shared
$shared_cflag = -fPIC
$shared_ldflag =
$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib =
*** android-x86
$cc = gcc
$cflags = -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall
@ -4191,6 +4224,72 @@ $ranlib =
$arflags =
$multilib =
*** linux-mips32
$cc = gcc
$cflags = -mabi=32 -DTERMIO -O3 -Wall
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$bn_obj = bn-mips.o mips-mont.o
$des_obj =
$aes_obj = aes_cbc.o aes-mips.o
$bf_obj =
$md5_obj =
$sha1_obj = sha1-mips.o sha256-mips.o
$cast_obj =
$rc4_obj =
$rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$engines_obj =
$perlasm_scheme = o32
$dso_scheme = dlfcn
$shared_target= linux-shared
$shared_cflag = -fPIC
$shared_ldflag =
$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib =
*** linux-mips64
$cc = gcc
$cflags = -mabi=n32 -DTERMIO -O3 -Wall
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$bn_obj = bn-mips.o mips-mont.o
$des_obj =
$aes_obj = aes_cbc.o aes-mips.o
$bf_obj =
$md5_obj =
$sha1_obj = sha1-mips.o sha256-mips.o sha512-mips.o
$cast_obj =
$rc4_obj =
$rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$engines_obj =
$perlasm_scheme = n32
$dso_scheme = dlfcn
$shared_target= linux-shared
$shared_cflag = -fPIC
$shared_ldflag = -mabi=n32
$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib = 32
*** linux-ppc
$cc = gcc
$cflags = -DB_ENDIAN -DTERMIO -O3 -Wall
@ -4422,6 +4521,39 @@ $ranlib =
$arflags =
$multilib = /highgprs
*** linux64-mips64
$cc = gcc
$cflags = -mabi=64 -DTERMIO -O3 -Wall
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
$lflags = -ldl
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
$cpuid_obj =
$bn_obj = bn-mips.o mips-mont.o
$des_obj =
$aes_obj = aes_cbc.o aes-mips.o
$bf_obj =
$md5_obj =
$sha1_obj = sha1-mips.o sha256-mips.o sha512-mips.o
$cast_obj =
$rc4_obj =
$rmd160_obj =
$rc5_obj =
$wp_obj =
$cmll_obj =
$modes_obj =
$engines_obj =
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= linux-shared
$shared_cflag = -fPIC
$shared_ldflag = -mabi=64
$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib = 64
*** linux64-s390x
$cc = gcc
$cflags = -m64 -DB_ENDIAN -DTERMIO -O3 -Wall

10
config
View file

@ -596,6 +596,16 @@ case "$GUESSOS" in
OUT="linux-ppc"
;;
ppc-*-linux2) OUT="linux-ppc" ;;
mips64*-*-linux2)
echo "WARNING! If you wish to build 64-bit library, then you have to"
echo " invoke './Configure linux64-mips64' *manually*."
if [ "$TEST" = "false" -a -t 1 ]; then
echo " You have about 5 seconds to press Ctrl-C to abort."
(trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
fi
OUT="linux-mips64"
;;
mips*-*-linux2) OUT="linux-mips32" ;;
ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;;
pentium-*-vxworks*) OUT="vxworks-pentium" ;;

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,10 @@
# compatible subroutine. There is room for minor optimization on
# little-endian platforms...
# September 2012.
#
# Add MIPS32r2 code (>25% less instructions).
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@ -95,6 +99,10 @@ sub BODY_00_14 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if (!$big_endian);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00
@ -104,8 +112,22 @@ $code.=<<___ if (!$big_endian);
or @X[$i],$t0
or $t1,$t2
or @X[$i],$t1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor $t0,$c,$d
rotr $t1,$a,27
lwl @X[$j],$j*4+$MSB($inp)
and $t0,$b
addu $e,$t1
lwr @X[$j],$j*4+$LSB($inp)
xor $t0,$d
addu $e,@X[$i]
rotr $b,$b,2
addu $e,$t0
#else
lwl @X[$j],$j*4+$MSB($inp)
sll $t0,$a,5 # $i
addu $e,$K
@ -121,6 +143,7 @@ $code.=<<___;
addu $e,@X[$i]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if (!$big_endian && $i==15);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00
@ -138,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15);
or @X[$i],$t0
or @X[$i],$t1
or @X[$i],$t2
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor @X[$j%16],@X[($j+2)%16]
xor $t0,$c,$d
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
and $t0,$b
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$d
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -159,6 +202,7 @@ $code.=<<___;
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -166,6 +210,20 @@ sub BODY_20_39 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor @X[$j%16],@X[($j+2)%16]
addu $e,$K # $i
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
or @X[$j%16],$t1
or $b,$t2
addu $e,$t0
#endif
___
$code.=<<___ if ($i==79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw @X[0],0($ctx)
addu $e,$K # $i
lw @X[1],4($ctx)
rotr $t1,$a,27
lw @X[2],8($ctx)
xor $t0,$c,$d
addu $e,$t1
lw @X[3],12($ctx)
xor $t0,$b
addu $e,@X[$i%16]
lw @X[4],16($ctx)
rotr $b,$b,2
addu $e,$t0
#else
lw @X[0],0($ctx)
sll $t0,$a,5 # $i
addu $e,$K
@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -210,6 +285,22 @@ sub BODY_40_59 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
and $t0,$c,$d
xor @X[$j%16],@X[($j+2)%16]
rotr $t1,$a,27
addu $e,$t0
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
and $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -241,6 +333,10 @@ $code=<<___;
# include <openssl/fipssyms.h>
#endif
#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif
.text
.set noat

View file

@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@ -17,6 +17,10 @@
# ~17%, but it comes for free, because it's same instruction sequence.
# Improvement coefficients are for aligned input.
# September 2012.
#
# Add MIPS[32|64]R2 code (>25% less instructions).
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@ -45,7 +49,7 @@
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
@ -83,6 +87,7 @@ if ($output =~ /512/) {
$SLL="dsll"; # shift left logical
$SRL="dsrl"; # shift right logical
$ADDU="daddu";
$ROTR="drotr";
@Sigma0=(28,34,39);
@Sigma1=(14,18,41);
@sigma0=( 7, 1, 8); # right shift first
@ -97,6 +102,7 @@ if ($output =~ /512/) {
$SLL="sll"; # shift left logical
$SRL="srl"; # shift right logical
$ADDU="addu";
$ROTR="rotr";
@Sigma0=( 2,13,22);
@Sigma1=( 6,11,25);
@sigma0=( 3, 7,18); # right shift first
@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[0],@X[0] # byte swap($i)
rotr @X[0],@X[0],16
#else
srl $tmp0,@X[0],24 # byte swap($i)
srl $tmp1,@X[0],8
andi $tmp2,@X[0],0xFF00
@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
or @X[0],$tmp0
or $tmp1,$tmp2
or @X[0],$tmp1
#endif
___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
#if defined(_MIPS_ARCH_MIPS64R2)
dsbh @X[0],@X[0] # byte swap($i)
dshd @X[0],@X[0]
#else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
dsrl $tmp1,@X[0],32
dsll @X[0],32
or @X[0],$tmp1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor $tmp2,$f,$g # $i
$ROTR $tmp0,$e,@Sigma1[0]
$ADDU $T1,$X[0],$h
$ROTR $tmp1,$e,@Sigma1[1]
and $tmp2,$e
$ROTR $h,$e,@Sigma1[2]
xor $tmp0,$tmp1
$ROTR $tmp1,$a,@Sigma0[0]
xor $tmp2,$g # Ch(e,f,g)
xor $tmp0,$h # Sigma1(e)
$ROTR $h,$a,@Sigma0[1]
$ADDU $T1,$tmp2
$LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
xor $h,$tmp1
$ROTR $tmp1,$a,@Sigma0[2]
$ADDU $T1,$tmp0
and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a)
xor $tmp1,$b,$c
#else
$ADDU $T1,$X[0],$h # $i
$SRL $h,$e,@Sigma1[0]
xor $tmp2,$f,$g
@ -184,16 +222,15 @@ $code.=<<___;
xor $h,$tmp1
$SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
xor $h,$tmp0
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a)
or $tmp0,$a,$b
and $tmp1,$a,$b
and $tmp0,$c
or $tmp1,$tmp0 # Maj(a,b,c)
xor $tmp1,$b,$c
#endif
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
$ADDU $h,$tmp0
and $tmp1,$a
$ADDU $T1,$tmp2 # +=K[$i]
$ADDU $h,$tmp1
$ADDU $h,$tmp1 # +=Maj(a,b,c)
$ADDU $d,$T1
$ADDU $h,$T1
___
@ -207,6 +244,20 @@ my $i=@_[0];
my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ROTR $tmp0,@X[1],@sigma0[1]
$ADDU @X[0],@X[9] # +=X[i+9]
xor $tmp2,$tmp0
$ROTR $tmp0,@X[1],@sigma0[2]
$SRL $tmp3,@X[14],@sigma1[0]
$ROTR $tmp1,@X[14],@sigma1[1]
xor $tmp2,$tmp0 # sigma0(X[i+1])
$ROTR $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1
$ADDU @X[0],$tmp2
#else
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ADDU @X[0],@X[9] # +=X[i+9]
$SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
@ -227,7 +278,7 @@ $code.=<<___;
xor $tmp3,$tmp0
$SRL $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1
#endif
xor $tmp3,$tmp0 # sigma1(X[i+14])
$ADDU @X[0],$tmp3
___
@ -242,9 +293,13 @@ $code.=<<___;
# include <openssl/fipssyms.h>
#endif
#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif
.text
.set noat
#if !defined(__vxworks) || defined(__pic__)
#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2
#endif