f8927c89d0
PR: 2335
322 lines
5.8 KiB
Raku
322 lines
5.8 KiB
Raku
#!/usr/bin/env perl
|
|
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
|
|
# SHA1 block procedure for Alpha.
|
|
|
|
# On 21264 performance is 33% better than code generated by vendor
|
|
# compiler, and 75% better than GCC [3.4], and in absolute terms is
|
|
# 8.7 cycles per processed byte. Implementation features vectorized
|
|
# byte swap, but not Xupdate.
|
|
|
|
@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
|
|
"\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
|
|
$ctx="a0"; # $16
|
|
$inp="a1";
|
|
$num="a2";
|
|
$A="a3";
|
|
$B="a4"; # 20
|
|
$C="a5";
|
|
$D="t8";
|
|
$E="t9"; @V=($A,$B,$C,$D,$E);
|
|
$t0="t10"; # 24
|
|
$t1="t11";
|
|
$t2="ra";
|
|
$t3="t12";
|
|
$K="AT"; # 28
|
|
|
|
sub BODY_00_19 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___ if ($i==0);
|
|
ldq_u @X[0],0+0($inp)
|
|
ldq_u @X[1],0+7($inp)
|
|
___
|
|
$code.=<<___ if (!($i&1) && $i<14);
|
|
ldq_u @X[$i+2],($i+2)*4+0($inp)
|
|
ldq_u @X[$i+3],($i+2)*4+7($inp)
|
|
___
|
|
$code.=<<___ if (!($i&1) && $i<15);
|
|
extql @X[$i],$inp,@X[$i]
|
|
extqh @X[$i+1],$inp,@X[$i+1]
|
|
|
|
or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
|
|
|
|
srl @X[$i],24,$t0 # vectorized byte swap
|
|
srl @X[$i],8,$t2
|
|
|
|
sll @X[$i],8,$t3
|
|
sll @X[$i],24,@X[$i]
|
|
zapnot $t0,0x11,$t0
|
|
zapnot $t2,0x22,$t2
|
|
|
|
zapnot @X[$i],0x88,@X[$i]
|
|
or $t0,$t2,$t0
|
|
zapnot $t3,0x44,$t3
|
|
sll $a,5,$t1
|
|
|
|
or @X[$i],$t0,@X[$i]
|
|
addl $K,$e,$e
|
|
and $b,$c,$t2
|
|
zapnot $a,0xf,$a
|
|
|
|
or @X[$i],$t3,@X[$i]
|
|
srl $a,27,$t0
|
|
bic $d,$b,$t3
|
|
sll $b,30,$b
|
|
|
|
extll @X[$i],4,@X[$i+1] # extract upper half
|
|
or $t2,$t3,$t2
|
|
addl @X[$i],$e,$e
|
|
|
|
addl $t1,$e,$e
|
|
srl $b,32,$t3
|
|
zapnot @X[$i],0xf,@X[$i]
|
|
|
|
addl $t0,$e,$e
|
|
addl $t2,$e,$e
|
|
or $t3,$b,$b
|
|
___
|
|
$code.=<<___ if (($i&1) && $i<15);
|
|
sll $a,5,$t1
|
|
addl $K,$e,$e
|
|
and $b,$c,$t2
|
|
zapnot $a,0xf,$a
|
|
|
|
srl $a,27,$t0
|
|
addl @X[$i%16],$e,$e
|
|
bic $d,$b,$t3
|
|
sll $b,30,$b
|
|
|
|
or $t2,$t3,$t2
|
|
addl $t1,$e,$e
|
|
srl $b,32,$t3
|
|
zapnot @X[$i],0xf,@X[$i]
|
|
|
|
addl $t0,$e,$e
|
|
addl $t2,$e,$e
|
|
or $t3,$b,$b
|
|
___
|
|
$code.=<<___ if ($i>=15); # with forward Xupdate
|
|
sll $a,5,$t1
|
|
addl $K,$e,$e
|
|
and $b,$c,$t2
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
|
|
|
zapnot $a,0xf,$a
|
|
addl @X[$i%16],$e,$e
|
|
bic $d,$b,$t3
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
|
|
srl $a,27,$t0
|
|
addl $t1,$e,$e
|
|
or $t2,$t3,$t2
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
|
|
sll $b,30,$b
|
|
addl $t0,$e,$e
|
|
srl @X[$j%16],31,$t1
|
|
|
|
addl $t2,$e,$e
|
|
srl $b,32,$t3
|
|
addl @X[$j%16],@X[$j%16],@X[$j%16]
|
|
|
|
or $t3,$b,$b
|
|
zapnot @X[$i%16],0xf,@X[$i%16]
|
|
or $t1,@X[$j%16],@X[$j%16]
|
|
___
|
|
}
|
|
|
|
sub BODY_20_39 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___ if ($i<79); # with forward Xupdate
|
|
sll $a,5,$t1
|
|
addl $K,$e,$e
|
|
zapnot $a,0xf,$a
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
|
|
|
sll $b,30,$t3
|
|
addl $t1,$e,$e
|
|
xor $b,$c,$t2
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
|
|
srl $b,2,$b
|
|
addl @X[$i%16],$e,$e
|
|
xor $d,$t2,$t2
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
|
|
srl @X[$j%16],31,$t1
|
|
addl $t2,$e,$e
|
|
srl $a,27,$t0
|
|
addl @X[$j%16],@X[$j%16],@X[$j%16]
|
|
|
|
or $t3,$b,$b
|
|
addl $t0,$e,$e
|
|
or $t1,@X[$j%16],@X[$j%16]
|
|
___
|
|
$code.=<<___ if ($i<77);
|
|
zapnot @X[$i%16],0xf,@X[$i%16]
|
|
___
|
|
$code.=<<___ if ($i==79); # with context fetch
|
|
sll $a,5,$t1
|
|
addl $K,$e,$e
|
|
zapnot $a,0xf,$a
|
|
ldl @X[0],0($ctx)
|
|
|
|
sll $b,30,$t3
|
|
addl $t1,$e,$e
|
|
xor $b,$c,$t2
|
|
ldl @X[1],4($ctx)
|
|
|
|
srl $b,2,$b
|
|
addl @X[$i%16],$e,$e
|
|
xor $d,$t2,$t2
|
|
ldl @X[2],8($ctx)
|
|
|
|
srl $a,27,$t0
|
|
addl $t2,$e,$e
|
|
ldl @X[3],12($ctx)
|
|
|
|
or $t3,$b,$b
|
|
addl $t0,$e,$e
|
|
ldl @X[4],16($ctx)
|
|
___
|
|
}
|
|
|
|
sub BODY_40_59 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___; # with forward Xupdate
|
|
sll $a,5,$t1
|
|
addl $K,$e,$e
|
|
zapnot $a,0xf,$a
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
|
|
|
srl $a,27,$t0
|
|
and $b,$c,$t2
|
|
and $b,$d,$t3
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
|
|
sll $b,30,$b
|
|
addl $t1,$e,$e
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
|
|
srl @X[$j%16],31,$t1
|
|
addl $t0,$e,$e
|
|
or $t2,$t3,$t2
|
|
and $c,$d,$t3
|
|
|
|
or $t2,$t3,$t2
|
|
srl $b,32,$t3
|
|
addl @X[$i%16],$e,$e
|
|
addl @X[$j%16],@X[$j%16],@X[$j%16]
|
|
|
|
or $t3,$b,$b
|
|
addl $t2,$e,$e
|
|
or $t1,@X[$j%16],@X[$j%16]
|
|
zapnot @X[$i%16],0xf,@X[$i%16]
|
|
___
|
|
}
|
|
|
|
$code=<<___;
|
|
#ifdef __linux__
|
|
#include <asm/regdef.h>
|
|
#else
|
|
#include <asm.h>
|
|
#include <regdef.h>
|
|
#endif
|
|
|
|
.text
|
|
|
|
.set noat
|
|
.set noreorder
|
|
.globl sha1_block_data_order
|
|
.align 5
|
|
.ent sha1_block_data_order
|
|
sha1_block_data_order:
|
|
lda sp,-64(sp)
|
|
stq ra,0(sp)
|
|
stq s0,8(sp)
|
|
stq s1,16(sp)
|
|
stq s2,24(sp)
|
|
stq s3,32(sp)
|
|
stq s4,40(sp)
|
|
stq s5,48(sp)
|
|
stq fp,56(sp)
|
|
.mask 0x0400fe00,-64
|
|
.frame sp,64,ra
|
|
.prologue 0
|
|
|
|
ldl $A,0($ctx)
|
|
ldl $B,4($ctx)
|
|
sll $num,6,$num
|
|
ldl $C,8($ctx)
|
|
ldl $D,12($ctx)
|
|
ldl $E,16($ctx)
|
|
addq $inp,$num,$num
|
|
|
|
.Lloop:
|
|
.set noreorder
|
|
ldah $K,23170(zero)
|
|
zapnot $B,0xf,$B
|
|
lda $K,31129($K) # K_00_19
|
|
___
|
|
for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
|
|
|
|
$code.=<<___;
|
|
ldah $K,28378(zero)
|
|
lda $K,-5215($K) # K_20_39
|
|
___
|
|
for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
|
|
$code.=<<___;
|
|
ldah $K,-28900(zero)
|
|
lda $K,-17188($K) # K_40_59
|
|
___
|
|
for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
|
|
|
$code.=<<___;
|
|
ldah $K,-13725(zero)
|
|
lda $K,-15914($K) # K_60_79
|
|
___
|
|
for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
|
|
$code.=<<___;
|
|
addl @X[0],$A,$A
|
|
addl @X[1],$B,$B
|
|
addl @X[2],$C,$C
|
|
addl @X[3],$D,$D
|
|
addl @X[4],$E,$E
|
|
stl $A,0($ctx)
|
|
stl $B,4($ctx)
|
|
addq $inp,64,$inp
|
|
stl $C,8($ctx)
|
|
stl $D,12($ctx)
|
|
stl $E,16($ctx)
|
|
cmpult $inp,$num,$t1
|
|
bne $t1,.Lloop
|
|
|
|
.set noreorder
|
|
ldq ra,0(sp)
|
|
ldq s0,8(sp)
|
|
ldq s1,16(sp)
|
|
ldq s2,24(sp)
|
|
ldq s3,32(sp)
|
|
ldq s4,40(sp)
|
|
ldq s5,48(sp)
|
|
ldq fp,56(sp)
|
|
lda sp,64(sp)
|
|
ret (ra)
|
|
.end sha1_block_data_order
|
|
.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
|
|
.align 2
|
|
___
|
|
$output=shift and open STDOUT,">$output";
|
|
print $code;
|
|
close STDOUT;
|