Add support for Windows CE and C64+ to FIPS module.

This commit is contained in:
Dr. Stephen Henson 2012-10-04 13:27:11 +00:00
parent 7b899c10cd
commit c616200172
47 changed files with 4749 additions and 97 deletions

View file

@ -610,12 +610,14 @@ my %table=(
"uClinux-dist","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):BN_LLONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::",
"uClinux-dist64","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):SIXTY_FOUR_BIT_LONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::",
"c64xplus","cl6x:-mv6400+ -o2 -ox -ms -pden -DNO_SYS_TYPES_H -DGETPID_IS_MEANINGLESS -DMD32_REG_T=int -DOPENSSL_SMALL_FOOTPRINT:<c6x.h>::DSPBIOS::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:",
);
my @MK1MF_Builds=qw(VC-WIN64I VC-WIN64A
debug-VC-WIN64I debug-VC-WIN64A
VC-NT VC-CE VC-WIN32 debug-VC-WIN32
BC-32
BC-32 c64xplus
netware-clib netware-clib-bsdsock
netware-libc netware-libc-bsdsock);

View file

@ -186,7 +186,7 @@ SHARED_LDFLAGS=
GENERAL= Makefile
BASENAME= openssl
NAME= $(BASENAME)-$(VERSION)
TARFILE= openssl-fips-2.0-test.tar
TARFILE= openssl-fips-2.0.tar
WTARFILE= $(NAME)-win.tar
EXHEADER= e_os2.h
HEADER= e_os.h

7
c6x/do_fips Normal file
View file

@ -0,0 +1,7 @@
#!/bin/sh
perl Configure c64xplus fipscanisteronly no-engine
perl util/mkfiles.pl > MINFO
perl util/mk1mf.pl auto > c6x/fips.mak
make -f c6x/fips.mak
make -f c6x/fips_algvs.mak

7
c6x/env Normal file
View file

@ -0,0 +1,7 @@
# MSYS-style PATH
export PATH=/c/CCStudio_v3.3/c6000/cgtools/bin:/c/Program\ Files/ActivePerl58/bin:$PATH
# Windows-style variables
export C6X_C_DIR='C:\CCStudio_v3.3\c6000\cgtools\include;C:\CCStudio_v3.3\c6000\cgtools\lib'
export PERL5LIB=C:/CCStudio_v3.3/bin/utilities/ccs_scripting

32
c6x/fips_standalone_sha1 Normal file
View file

@ -0,0 +1,32 @@
#!/usr/bin/env perl
#
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
unshift(@INC,$dir);
require "hmac_sha1.pl";
(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file";
$verify=shift if (@ARGV[0] eq "-verify");
sysopen(FD,@ARGV[0],0) || die "$!";
binmode(FD);
my $ctx = HMAC->Init("etaonrishdlcupfm");
while (read(FD,$blob,4*1024)) { $ctx->Update($blob); }
close(FD);
my $signature = unpack("H*",$ctx->Final());
print "HMAC-SHA1(@ARGV[0])= $signature\n";
if ($verify) {
open(FD,"<@ARGV[0].sha1") || die "$!";
$line = <FD>;
close(FD);
exit(0) if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i &&
$1 eq $signature);
die "signature mismatch";
}

19
c6x/fipscanister.cmd Normal file
View file

@ -0,0 +1,19 @@
SECTIONS
{
.text:
{
*(.fips_text:start)
*(.text)
*(.const:aes_asm)
*(.const:sha_asm)
*(.const:des_sptrans)
*(.switch)
*(.fips_text:end)
}
.const:
{
*(.fips_const:start)
*(.const)
*(.fips_const:end)
}
}

196
c6x/hmac_sha1.pl Normal file
View file

@ -0,0 +1,196 @@
#!/usr/bin/env perl
#
# Copyright (c) 2011 The OpenSSL Project.
#
######################################################################
#
# SHA1 and HMAC in Perl by <appro@openssl.org>.
#
{ package SHA1;
use integer;
{
################################### SHA1 block code generator
my @V = ('$A','$B','$C','$D','$E');
my $i;
sub XUpdate {
my $ret;
$ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t";
if ((1<<31)<<1) {
$ret.=" \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t ";
} else {
$ret.=" \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t ";
}
}
sub tail {
my ($a,$b,$c,$d,$e)=@V;
my $ret;
if ((1<<31)<<1) {
$ret.="(($a<<5)|($a>>27));\n\t";
$ret.="$b=($b<<30)|($b>>2); $e&=0xffffffff; #$b&=0xffffffff;\n\t";
} else {
$ret.="(($a<<5)|($a>>27)&0x1f);\n\t";
$ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t";
}
$ret;
}
sub BODY_00_15 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail();
}
sub BODY_16_19 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail();
}
sub BODY_20_39 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail();
}
sub BODY_40_59 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail();
}
sub BODY_60_79 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail();
}
my $sha1_impl =
'sub block {
my $self = @_[0];
my @W = unpack("N16",@_[1]);
my ($A,$B,$C,$D,$E,$T) = @{$self->{H}};
';
$sha1_impl.='
$A &= 0xffffffff;
$B &= 0xffffffff;
' if ((1<<31)<<1);
for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); }
for(;$i<20;$i++) { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); }
for(;$i<40;$i++) { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); }
for(;$i<60;$i++) { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); }
for(;$i<80;$i++) { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); }
$sha1_impl.='
$self->{H}[0]+=$A; $self->{H}[1]+=$B; $self->{H}[2]+=$C;
$self->{H}[3]+=$D; $self->{H}[4]+=$E; }';
#print $sha1_impl,"\n";
eval($sha1_impl); # generate code
}
sub Init {
my $class = shift; # multiple instances...
my $self = {};
bless $self,$class;
$self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0];
$self->{N} = 0;
return $self;
}
sub Update {
my $self = shift;
my $msg;
foreach $msg (@_) {
my $len = length($msg);
my $num = length($self->{buf});
my $off = 0;
$self->{N} += $len;
if (($num+$len)<64)
{ $self->{buf} .= $msg; next; }
elsif ($num)
{ $self->{buf} .= substr($msg,0,($off=64-$num));
$self->block($self->{buf});
}
while(($off+64) <= $len)
{ $self->block(substr($msg,$off,64));
$off += 64;
}
$self->{buf} = substr($msg,$off);
}
return $self;
}
sub Final {
my $self = shift;
my $num = length($self->{buf});
$self->{buf} .= chr(0x80); $num++;
if ($num>56)
{ $self->{buf} .= chr(0)x(64-$num);
$self->block($self->{buf});
$self->{buf}=undef;
$num=0;
}
$self->{buf} .= chr(0)x(56-$num);
$self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3);
$self->block($self->{buf});
return pack("N*",@{$self->{H}});
}
sub Selftest {
my $hash;
$hash=SHA1->Init()->Update('abc')->Final();
die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d');
$hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final();
die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1');
#$hash=SHA1->Init()->Update('a'x1000000)->Final();
#die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f');
}
}
{ package HMAC;
sub Init {
my $class = shift;
my $key = shift;
my $self = {};
bless $self,$class;
if (length($key)>64) {
$key = SHA1->Init()->Update($key)->Final();
}
$key .= chr(0x00)x(64-length($key));
my @ikey = map($_^=0x36,unpack("C*",$key));
($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey));
$self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey));
return $self;
}
sub Update {
my $self = shift;
$self->{hash}->Update(@_);
return $self;
}
sub Final {
my $self = shift;
my $ihash = $self->{hash}->Final();
return SHA1->Init()->Update($self->{okey},$ihash)->Final();
}
sub Selftest {
my $hmac;
$hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final();
die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24');
}
}
1;

241
c6x/incore6x Normal file
View file

@ -0,0 +1,241 @@
#!/usr/bin/env perl
#
# Copyright (c) 2011 The OpenSSL Project.
#
# The script embeds fingerprint into TI-COFF executable object.
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
unshift(@INC,$dir);
require "hmac_sha1.pl";
######################################################################
#
# COFF symbol table parser by <appro@openssl.org>. The table entries
# are extended with offset within executable file...
#
{ package COFF;
use FileHandle;
sub dup { my %copy=map {$_} @_; return \%copy; }
sub Load {
my $class = shift;
my $self = {};
my $FD = FileHandle->new(); # autoclose
bless $self,$class;
sysopen($FD,shift,0) or die "$!";
binmode($FD);
#################################################
# read and parse COFF header...
#
read($FD,my $coff,22) or die "$!";
my %coff_header;
@coff_header{version,nsects,date,syms_off,nsyms,opt,flags,magic}=
unpack("v2V3v3",$coff);
$!=42; # signal fipsld to revert to two-step link
die "not TI-COFF file" if ($coff_header{version} != 0xC2);
my $big_endian = ($coff_header{flags}>>9)&1; # 0 or 1
my $strings;
my $symsize;
#################################################
# load strings table
#
seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!";
read($FD,$strings,4) or die "$!";
$symsize = unpack("V",$strings);
read($FD,$strings,$symsize,4) or die "$!";
#################################################
# read sections
#
my $i;
my @sections;
# seek to section headers
seek($FD,22+@coff_header{opt},0) or die "$!";
for ($i=0;$i<$coff_header{nsects};$i++) {
my %coff_shdr;
my $name;
read($FD,my $section,48) or die "$!";
@coff_shdr{sh_name,sh_phaddr,sh_vaddr,
sh_size,sh_offset,sh_relocs,sh_reserved,
sh_relocoff,sh_lines,sh_flags} =
unpack("a8V9",$section);
$name = $coff_shdr{sh_name};
# see if sh_name is a an offset in $strings
my ($hi,$lo) = unpack("V2",$name);
if ($hi==0 && $lo<$symsize) {
$name = substr($strings,$lo,64);
}
$coff_shdr{sh_name} = (split(chr(0),$name))[0];
push(@sections,dup(%coff_shdr));
}
#################################################
# load symbols table
#
seek($FD,$coff_header{syms_off},0) or die "$!";
for ($i=0;$i<$coff_header{nsyms};$i++) {
my %coff_sym;
my $name;
read($FD,my $blob,18) or die "$!";
@coff_sym{st_name,st_value,st_shndx,reserved,class,aux} =
unpack("a8Vv2C2",$blob);
# skip aux entries
if ($coff_sym{aux}) {
seek($FD,18*$coff_sym{aux},1) or die "$!";
$i+=$coff_sym{aux};
}
$name = $coff_sym{st_name};
# see if st_name is a an offset in $strings
my ($hi,$lo) = unpack("V2",$name);
if ($hi==0 && $lo<$symsize) {
$name = substr($strings,$lo,64);
}
$coff_sym{st_name} = $name = (split(chr(0),$name))[0];
my $st_secn = $coff_sym{st_shndx}-1;
if ($st_secn>=0 && $st_secn<=$#sections
&& @sections[$st_secn]->{sh_offset}
&& $name =~ m/^_[a-z]+/i) {
# synthesize st_offset, ...
$coff_sym{st_offset} = $coff_sym{st_value}
- @sections[$st_secn]->{sh_vaddr}
+ @sections[$st_secn]->{sh_offset};
$coff_sym{st_section} = @sections[$st_secn]->{sh_name};
# ... and add to lookup table
$self->{symbols}{$name} = dup(%coff_sym);
}
}
return $self;
}
sub Lookup {
my $self = shift;
my $name = shift;
return $self->{symbols}{"_$name"};
}
sub Traverse {
my $self = shift;
my $code = shift;
if (ref($code) eq 'CODE') {
for (keys(%{$self->{symbols}})) { &$code($self->{symbols}{$_}); }
}
}
}
######################################################################
#
# main()
#
my $legacy_mode;
if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) {
print STDERR "usage: $0 [-dso|-exe] ti-coff-binary\n";
exit(1);
}
$exe = COFF->Load(@ARGV[$#ARGV]);
$FIPS_text_start = $exe->Lookup("FIPS_text_start") or die;
$FIPS_text_end = $exe->Lookup("FIPS_text_end") or die;
$FIPS_rodata_start = $exe->Lookup("FIPS_rodata_start") or die;
$FIPS_rodata_end = $exe->Lookup("FIPS_rodata_end") or die;
$FIPS_signature = $exe->Lookup("FIPS_signature") or die;
# new cross-compile support
$FIPS_text_startX = $exe->Lookup("FIPS_text_startX");
$FIPS_text_endX = $exe->Lookup("FIPS_text_endX");
if (!$legacy_mode) {
if (!$FIPS_text_startX || !$FIPS_text_endX) {
print STDERR "@ARGV[$#ARGV] is not cross-compiler aware.\n";
exit(42); # signal fipsld to revert to two-step link
}
$FINGERPRINT_ascii_value
= $exe->Lookup("FINGERPRINT_ascii_value");
}
if ($FIPS_text_startX && $FIPS_text_endX) {
$FIPS_text_start = $FIPS_text_startX;
$FIPS_text_end = $FIPS_text_endX;
}
sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!"; # 2 is read/write
binmode(FD);
sub HMAC_Update {
my ($hmac,$off,$len) = @_;
my $blob;
seek(FD,$off,0) or die "$!";
read(FD,$blob,$len) or die "$!";
$$hmac->Update($blob);
}
# fips/fips.c:FIPS_incore_fingerprint's Perl twin
#
sub FIPS_incore_fingerprint {
my $p1 = $FIPS_text_start->{st_offset};
my $p2 = $FIPS_text_end->{st_offset};
my $p3 = $FIPS_rodata_start->{st_offset};
my $p4 = $FIPS_rodata_end->{st_offset};
my $sig = $FIPS_signature->{st_offset};
my $ctx = HMAC->Init("etaonrishdlcupfm");
# detect overlapping regions
if ($p1<=$p3 && $p2>=$p3) {
$p3 = $p1; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0;
} elsif ($p3<=$p1 && $p4>=$p1) {
$p3 = $p3; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0;
}
if ($p1) {
HMAC_Update (\$ctx,$p1,$p2-$p1);
}
if ($sig>=$p3 && $sig<$p4) {
# "punch" hole
HMAC_Update(\$ctx,$p3,$sig-$p3);
$p3 = $sig+20;
HMAC_Update(\$ctx,$p3,$p4-$p3);
} else {
HMAC_Update(\$ctx,$p3,$p4-$p3);
}
return $ctx->Final();
}
$fingerprint = FIPS_incore_fingerprint();
if ($legacy_mode) {
print unpack("H*",$fingerprint);
} elsif ($FINGERPRINT_ascii_value) {
seek(FD,$FINGERPRINT_ascii_value->{st_offset},0) or die "$!";
print FD unpack("H*",$fingerprint) or die "$!";
} else {
seek(FD,$FIPS_signature->{st_offset},0) or die "$!";
print FD $fingerprint or die "$!";
}
close (FD);

43
c6x/run6x Normal file
View file

@ -0,0 +1,43 @@
#!/usr/bin/env perl
$exe = @ARGV[0];
$exe .= ".out" if (! -f $exe);
die if (! -f $exe);
use CCS_SCRIPTING_PERL;
my $studio=new CCS_SCRIPTING_PERL::CCS_Scripting();
$studio->CCSOpenNamed("*","*",1); # connect to board
$studio->TargetReset();
print "loading $exe\n";
$studio->ProgramLoad($exe);
sub write_string {
my ($studio,$addr,$str) = @_;
my $len = length($str);
my $i;
for ($i=0; $i<$len; $i++) {
$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,vec($str,$i,8));
}
$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,0);
return $i+1;
}
$addr= $studio->SymbolGetAddress("__c_args");
printf "setting up __c_args at 0x%X\n",$addr;#\n";
$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr,32,$#ARGV+1);
for ($i=0,$strings=$addr+($#ARGV+3)*4; $i<=$#ARGV; $i++) {
$off = write_string($studio,$strings,@ARGV[$i]);
$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,$strings);
$strings += $off;
}
$studio->MemoryWrite($SCC_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,0);
print "running...\n";
$studio->TargetRun();

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,333 @@
;;====================================================================
;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
;; project.
;;
;; Rights for redistribution and usage in source and binary forms are
;; granted according to the OpenSSL license. Warranty of any kind is
;; disclaimed.
;;====================================================================
;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n
;; being the number of 32-bit words, addition - 8*n. Corresponding 4x
;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
.text
.asg B3,RA
.asg A4,ARG0
.asg B4,ARG1
.asg A6,ARG2
.asg B6,ARG3
.asg A8,ARG4
.asg B8,ARG5
.asg A4,RET
.asg A15,FP
.asg B14,DP
.asg B15,SP
.global _bn_mul_add_words
_bn_mul_add_words:
.asmfunc
MV ARG2,B0
[!B0] BNOP RA
||[!B0] MVK 0,RET
[B0] MVC B0,ILC
[B0] ZERO A19 ; high part of accumulator
|| [B0] MV ARG0,A2
|| [B0] MV ARG3,A3
NOP 3
SPLOOP 2 ; 2*n+10
;;====================================================================
LDW *ARG1++,B7 ; ap[i]
NOP 3
LDW *ARG0++,A7 ; rp[i]
MPY32U B7,A3,A17:A16
NOP 3 ; [2,0] in epilogue
ADDU A16,A7,A21:A20
ADDU A19,A21:A20,A19:A18
|| MV.S A17,A23
SPKERNEL 2,1 ; leave slot for "return value"
|| STW A18,*A2++ ; rp[i]
|| ADD A19,A23,A19
;;====================================================================
BNOP RA,4
MV A19,RET ; return value
.endasmfunc
.global _bn_mul_words
_bn_mul_words:
.asmfunc
MV ARG2,B0
[!B0] BNOP RA
||[!B0] MVK 0,RET
[B0] MVC B0,ILC
[B0] ZERO A19 ; high part of accumulator
NOP 3
SPLOOP 2 ; 2*n+10
;;====================================================================
LDW *ARG1++,A7 ; ap[i]
NOP 4
MPY32U A7,ARG3,A17:A16
NOP 4 ; [2,0] in epiloque
ADDU A19,A16,A19:A18
|| MV.S A17,A21
SPKERNEL 2,1 ; leave slot for "return value"
|| STW A18,*ARG0++ ; rp[i]
|| ADD.L A19,A21,A19
;;====================================================================
BNOP RA,4
MV A19,RET ; return value
.endasmfunc
.global _bn_sqr_words
_bn_sqr_words:
.asmfunc
MV ARG2,B0
[!B0] BNOP RA
||[!B0] MVK 0,RET
[B0] MVC B0,ILC
[B0] MV ARG0,B2
|| [B0] ADD 4,ARG0,ARG0
NOP 3
SPLOOP 2 ; 2*n+10
;;====================================================================
LDW *ARG1++,B7 ; ap[i]
NOP 4
MPY32U B7,B7,B1:B0
NOP 3 ; [2,0] in epilogue
STW B0,*B2++(8) ; rp[2*i]
MV B1,A1
SPKERNEL 2,0 ; fully overlap BNOP RA,5
|| STW A1,*ARG0++(8) ; rp[2*i+1]
;;====================================================================
BNOP RA,5
.endasmfunc
.global _bn_add_words
_bn_add_words:
.asmfunc
MV ARG3,B0
[!B0] BNOP RA
||[!B0] MVK 0,RET
[B0] MVC B0,ILC
[B0] ZERO A1 ; carry flag
|| [B0] MV ARG0,A3
NOP 3
SPLOOP 2 ; 2*n+6
;;====================================================================
LDW *ARG2++,A7 ; bp[i]
|| LDW *ARG1++,B7 ; ap[i]
NOP 4
ADDU A7,B7,A9:A8
ADDU A1,A9:A8,A1:A0
SPKERNEL 0,0 ; fully overlap BNOP RA,5
|| STW A0,*A3++ ; write result
|| MV A1,RET ; keep carry flag in RET
;;====================================================================
BNOP RA,5
.endasmfunc
.global _bn_sub_words
_bn_sub_words:
.asmfunc
MV ARG3,B0
[!B0] BNOP RA
||[!B0] MVK 0,RET
[B0] MVC B0,ILC
[B0] ZERO A2 ; borrow flag
|| [B0] MV ARG0,A3
NOP 3
SPLOOP 2 ; 2*n+6
;;====================================================================
LDW *ARG2++,A7 ; bp[i]
|| LDW *ARG1++,B7 ; ap[i]
NOP 4
SUBU B7,A7,A1:A0
[A2] SUB A1:A0,1,A1:A0
SPKERNEL 0,1 ; leave slot for "return borrow flag"
|| STW A0,*A3++ ; write result
|| AND 1,A1,A2 ; pass on borrow flag
;;====================================================================
BNOP RA,4
AND 1,A1,RET ; return borrow flag
.endasmfunc
.global _bn_div_words
.global __divull
_bn_div_words:
.asmfunc
CALLP __divull,A3 ; jump to rts64plus.lib
|| MV ARG0,A5
|| MV ARG1,ARG0
|| MV ARG2,ARG1
|| ZERO B5
.endasmfunc
;;====================================================================
;; Not really Comba algorithm, just straightforward NxM... Dedicated
;; fully unrolled real Comba implementations are asymptotically 2x
;; faster, but naturally larger undertaking. Purpose of this exercise
;; was rather to learn to master nested SPLOOPs...
;;====================================================================
.global _bn_sqr_comba8
.global _bn_mul_comba8
_bn_sqr_comba8:
MV ARG1,ARG2
_bn_mul_comba8:
.asmfunc
MVK 8,B0 ; N, RILC
|| MVK 8,A0 ; M, outer loop counter
|| MV ARG1,A5 ; copy ap
|| MV ARG0,B4 ; copy rp
|| ZERO B19 ; high part of accumulator
MVC B0,RILC
|| SUB B0,2,B1 ; N-2, initial ILC
|| SUB B0,1,B2 ; const B2=N-1
|| LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M
sploopNxM?: ; for best performance arrange M<=N
[A0] SPLOOPD 2 ; 2*n+10
|| MVC B1,ILC
|| ADDAW B4,B0,B5
|| ZERO B7
|| LDW *A5++,A9 ; pre-fetch ap[1]
|| ZERO A1
|| SUB A0,1,A0
;;====================================================================
;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files.
;; This is because of Advisory 15 from TI publication SPRZ247I.
LDW *ARG2++,A7 ; bp[i]
NOP 3
[A1] LDW *B5++,B7 ; rp[i]
MPY32U A7,B6,B17:B16
NOP 3
ADDU B16,B7,B21:B20
ADDU B19,B21:B20,B19:B18
|| MV.S B17,B23
SPKERNEL
|| STW B18,*B4++ ; rp[i]
|| ADD.S B19,B23,B19
;;====================================================================
outer?: ; m*2*(n+1)+10
SUBAW ARG2,A3,ARG2 ; rewind bp to bp[0]
SPMASKR
|| CMPGT A0,1,A2 ; done pre-fetching ap[i+1]?
MVD A9,B6 ; move through .M unit(*)
[A2] LDW *A5++,A9 ; pre-fetch ap[i+1]
SUBAW B5,B2,B5 ; rewind rp to rp[1]
MVK 1,A1
[A0] BNOP.S1 outer?,4
|| [A0] SUB.L A0,1,A0
STW B19,*B4--[B2] ; rewind rp tp rp[1]
|| ZERO.S B19 ; high part of accumulator
;; end of outer?
BNOP RA,5 ; return
.endasmfunc
;; (*) It should be noted that B6 is used as input to MPY32U in
;; chronologically next cycle in *preceding* SPLOOP iteration.
;; Normally such arrangement would require DINT, but at this
;; point SPLOOP is draining and interrupts are disabled
;; implicitly.
.global _bn_sqr_comba4
.global _bn_mul_comba4
_bn_sqr_comba4:
MV ARG1,ARG2
_bn_mul_comba4:
.asmfunc
.if 0
BNOP sploopNxM?,3
;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
;; because of read-after-write penalties, it's rather
;; n*2*(n+3)+10, or 66 cycles [plus various overheads]...
MVK 4,B0 ; N, RILC
|| MVK 4,A0 ; M, outer loop counter
|| MV ARG1,A5 ; copy ap
|| MV ARG0,B4 ; copy rp
|| ZERO B19 ; high part of accumulator
MVC B0,RILC
|| SUB B0,2,B1 ; first ILC
|| SUB B0,1,B2 ; const B2=N-1
|| LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M
.else
;; This alternative is exercise in fully unrolled Comba
;; algorithm implementation that operates at n*(n+1)+12, or
;; as little as 32 cycles...
LDW *ARG1[0],B16 ; a[0]
|| LDW *ARG2[0],A16 ; b[0]
LDW *ARG1[1],B17 ; a[1]
|| LDW *ARG2[1],A17 ; b[1]
LDW *ARG1[2],B18 ; a[2]
|| LDW *ARG2[2],A18 ; b[2]
LDW *ARG1[3],B19 ; a[3]
|| LDW *ARG2[3],A19 ; b[3]
NOP
MPY32U A16,B16,A1:A0 ; a[0]*b[0]
MPY32U A17,B16,A23:A22 ; a[0]*b[1]
MPY32U A16,B17,A25:A24 ; a[1]*b[0]
MPY32U A16,B18,A27:A26 ; a[2]*b[0]
STW A0,*ARG0[0]
|| MPY32U A17,B17,A29:A28 ; a[1]*b[1]
MPY32U A18,B16,A31:A30 ; a[0]*b[2]
|| ADDU A22,A1,A1:A0
MV A23,B0
|| MPY32U A19,B16,A21:A20 ; a[3]*b[0]
|| ADDU A24,A1:A0,A1:A0
ADDU A25,B0,B1:B0
|| STW A0,*ARG0[1]
|| MPY32U A18,B17,A23:A22 ; a[2]*b[1]
|| ADDU A26,A1,A9:A8
ADDU A27,B1,B9:B8
|| MPY32U A17,B18,A25:A24 ; a[1]*b[2]
|| ADDU A28,A9:A8,A9:A8
ADDU A29,B9:B8,B9:B8
|| MPY32U A16,B19,A27:A26 ; a[0]*b[3]
|| ADDU A30,A9:A8,A9:A8
ADDU A31,B9:B8,B9:B8
|| ADDU B0,A9:A8,A9:A8
STW A8,*ARG0[2]
|| ADDU A20,A9,A1:A0
ADDU A21,B9,B1:B0
|| MPY32U A19,B17,A21:A20 ; a[3]*b[1]
|| ADDU A22,A1:A0,A1:A0
ADDU A23,B1:B0,B1:B0
|| MPY32U A18,B18,A23:A22 ; a[2]*b[2]
|| ADDU A24,A1:A0,A1:A0
ADDU A25,B1:B0,B1:B0
|| MPY32U A17,B19,A25:A24 ; a[1]*b[3]
|| ADDU A26,A1:A0,A1:A0
ADDU A27,B1:B0,B1:B0
|| ADDU B8,A1:A0,A1:A0
STW A0,*ARG0[3]
|| MPY32U A19,B18,A27:A26 ; a[3]*b[2]
|| ADDU A20,A1,A9:A8
ADDU A21,B1,B9:B8
|| MPY32U A18,B19,A29:A28 ; a[2]*b[3]
|| ADDU A22,A9:A8,A9:A8
ADDU A23,B9:B8,B9:B8
|| MPY32U A19,B19,A31:A30 ; a[3]*b[3]
|| ADDU A24,A9:A8,A9:A8
ADDU A25,B9:B8,B9:B8
|| ADDU B0,A9:A8,A9:A8
STW A8,*ARG0[4]
|| ADDU A26,A9,A1:A0
ADDU A27,B9,B1:B0
|| ADDU A28,A1:A0,A1:A0
ADDU A29,B1:B0,B1:B0
|| BNOP RA
|| ADDU B8,A1:A0,A1:A0
STW A0,*ARG0[5]
|| ADDU A30,A1,A9:A8
ADD A31,B1,B8
ADDU B0,A9:A8,A9:A8 ; removed || to avoid cross-path stall below
ADD B8,A9,A9
|| STW A8,*ARG0[6]
STW A9,*ARG0[7]
.endif
.endasmfunc

View file

@ -0,0 +1,146 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# February 2012
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication
# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
# C for the time being... The subroutine runs in 37 cycles, which is
# 4.5x faster than compiler-generated code. Though comparison is
# totally unfair, because this module utilizes Galois Field Multiply
# instruction.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
($A,$B)=($Alo,$B_1);
$xFF="B1";
sub mul_1x1_upper {
my ($A,$B)=@_;
$code.=<<___;
EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|| AND $B,$xFF,$B_0
|| SHRU $B,24,$B_3
SHRU $A,16, $Ahi ; smash $A to two halfwords
|| EXTU $A,16,16,$Alo
XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits muliplication
|| XORMPY $Ahi,$B_2,$Ahix2
|| EXTU $B,16,24,$B_1
XORMPY $Alo,$B_0,$Alox0
|| XORMPY $Ahi,$B_0,$Ahix0
XORMPY $Alo,$B_3,$Alox3
|| XORMPY $Ahi,$B_3,$Ahix3
XORMPY $Alo,$B_1,$Alox1
|| XORMPY $Ahi,$B_1,$Ahix1
___
}
sub mul_1x1_merged {
my ($OUTlo,$OUThi,$A,$B)=@_;
$code.=<<___;
EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|| AND $B,$xFF,$B_0
|| SHRU $B,24,$B_3
SHRU $A,16, $Ahi ; smash $A to two halfwords
|| EXTU $A,16,16,$Alo
XOR $Ahix0,$Alox2,$Ahix0
|| MV $Ahix2,$OUThi
|| XORMPY $Alo,$B_2,$Alox2
XORMPY $Ahi,$B_2,$Ahix2
|| EXTU $B,16,24,$B_1
|| XORMPY $Alo,$B_0,A1 ; $Alox0
XOR $Ahix1,$Alox3,$Ahix1
|| SHL $Ahix0,16,$OUTlo
|| SHRU $Ahix0,16,$Ahix0
XOR $Alox0,$OUTlo,$OUTlo
|| XOR $Ahix0,$OUThi,$OUThi
|| XORMPY $Ahi,$B_0,$Ahix0
|| XORMPY $Alo,$B_3,$Alox3
|| SHL $Alox1,8,$Alox1
|| SHL $Ahix3,8,$Ahix3
XOR $Alox1,$OUTlo,$OUTlo
|| XOR $Ahix3,$OUThi,$OUThi
|| XORMPY $Ahi,$B_3,$Ahix3
|| SHL $Ahix1,24,$Alox1
|| SHRU $Ahix1,8, $Ahix1
XOR $Alox1,$OUTlo,$OUTlo
|| XOR $Ahix1,$OUThi,$OUThi
|| XORMPY $Alo,$B_1,$Alox1
|| XORMPY $Ahi,$B_1,$Ahix1
|| MV A1,$Alox0
___
}
sub mul_1x1_lower {
my ($OUTlo,$OUThi)=@_;
$code.=<<___;
;NOP
XOR $Ahix0,$Alox2,$Ahix0
|| MV $Ahix2,$OUThi
NOP
XOR $Ahix1,$Alox3,$Ahix1
|| SHL $Ahix0,16,$OUTlo
|| SHRU $Ahix0,16,$Ahix0
XOR $Alox0,$OUTlo,$OUTlo
|| XOR $Ahix0,$OUThi,$OUThi
|| SHL $Alox1,8,$Alox1
|| SHL $Ahix3,8,$Ahix3
XOR $Alox1,$OUTlo,$OUTlo
|| XOR $Ahix3,$OUThi,$OUThi
|| SHL $Ahix1,24,$Alox1
|| SHRU $Ahix1,8, $Ahix1
XOR $Alox1,$OUTlo,$OUTlo
|| XOR $Ahix1,$OUThi,$OUThi
___
}
$code.=<<___;
.text
.global _bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
.asmfunc
MVK 0xFF,$xFF
___
&mul_1x1_upper($a0,$b0); # a0·b0
$code.=<<___;
|| MV $b1,$B
MV $a1,$A
___
&mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
$code.=<<___;
|| XOR $b0,$b1,$B
XOR $a0,$a1,$A
___
&mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
$code.=<<___;
XOR A28,A31,A29
|| XOR B28,B31,B29 ; a0·b0+a1·b1
___
&mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
$code.=<<___;
|| BNOP B3
XOR A29,A30,A30
|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
XOR B28,A30,A30
|| STW A28,*${rp}[0]
XOR B30,A31,A31
|| STW A30,*${rp}[1]
STW A31,*${rp}[2]
STW B31,*${rp}[3]
.endasmfunc
___
print $code;
close STDOUT;

View file

@ -366,6 +366,10 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
# endif
#endif /* BN_BITS2 != 64 */
#if defined(_TMS320C6X) && defined(NIST_INT64)
# undef NIST_INT64 /* compiler bug */
# pragma diag_suppress 177
#endif
#define nist_set_192(to, from, a1, a2, a3) \
{ \
@ -1047,6 +1051,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
return 1;
}
#ifdef _WIN32_WCE
/* Workaround for compiler bug under CE */
#pragma optimize( "", off )
#endif
#define BN_NIST_521_RSHIFT (521%BN_BITS2)
#define BN_NIST_521_LSHIFT (BN_BITS2-BN_NIST_521_RSHIFT)
#define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
@ -1113,6 +1122,10 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
return 1;
}
#ifdef _WIN32_WCE
#pragma optimize( "", on )
#endif
int (*BN_nist_mod_func(const BIGNUM *p))(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
{
if (BN_ucmp(&_bignum_nist_p_192, p) == 0)

246
crypto/c64xpluscpuid.pl Normal file
View file

@ -0,0 +1,246 @@
#!/usr/bin/env perl
#
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$code.=<<___;
.text
.asg B3,RA
.global _OPENSSL_rdtsc
_OPENSSL_rdtsc:
.asmfunc
B RA
MVC TSCL,B0
MVC TSCH,B1
[!B0] MVC B0,TSCL ; start TSC
MV B0,A4
MV B1,A5
.endasmfunc
.global _OPENSSL_cleanse
_OPENSSL_cleanse:
.asmfunc
ZERO A3:A2
|| ZERO B2
|| SHRU B4,3,B0 ; is length >= 8
|| ADD 1,A4,B6
[!B0] BNOP RA
|| ZERO A1
|| ZERO B1
[B0] MVC B0,ILC
||[!B0] CMPLT 0,B4,A1
||[!B0] CMPLT 1,B4,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
||[!B0] CMPLT 2,B4,A1
||[!B0] CMPLT 3,B4,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
||[!B0] CMPLT 4,B4,A1
||[!B0] CMPLT 5,B4,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
||[!B0] CMPLT 6,B4,A1
[A1] STB A2,*A4++[2]
SPLOOP 1
STNDW A3:A2,*A4++
|| SUB B4,8,B4
SPKERNEL
MV B4,B0 ; remaining bytes
|| ADD 1,A4,B6
|| BNOP RA
[B0] CMPLT 0,B0,A1
|| [B0] CMPLT 1,B0,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
|| [B0] CMPLT 2,B0,A1
|| [B0] CMPLT 3,B0,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
|| [B0] CMPLT 4,B0,A1
|| [B0] CMPLT 5,B0,B1
[A1] STB A2,*A4++[2]
|| [B1] STB B2,*B6++[2]
|| [B0] CMPLT 6,B0,A1
[A1] STB A2,*A4++[2]
.endasmfunc
.global _OPENSSL_atomic_add
_OPENSSL_atomic_add:
.asmfunc
MV A4,B0
atomic_add?:
LL *B0,B5
NOP 4
ADD B4,B5,B5
SL B5,*B0
CMTL *B0,B1
NOP 4
[!B1] B atomic_add?
[B1] BNOP RA,4
MV B5,A4
.endasmfunc
.global _OPENSSL_wipe_cpu
_OPENSSL_wipe_cpu:
.asmfunc
ZERO A0
|| ZERO B0
|| ZERO A1
|| ZERO B1
ZERO A3:A2
|| MVD B0,B2
|| ZERO A4
|| ZERO B4
|| ZERO A5
|| ZERO B5
|| BNOP RA
ZERO A7:A6
|| ZERO B7:B6
|| ZERO A8
|| ZERO B8
|| ZERO A9
|| ZERO B9
ZERO A17:A16
|| ZERO B17:B16
|| ZERO A18
|| ZERO B18
|| ZERO A19
|| ZERO B19
ZERO A21:A20
|| ZERO B21:B20
|| ZERO A22
|| ZERO B22
|| ZERO A23
|| ZERO B23
ZERO A25:A24
|| ZERO B25:B24
|| ZERO A26
|| ZERO B26
|| ZERO A27
|| ZERO B27
ZERO A29:A28
|| ZERO B29:B28
|| ZERO A30
|| ZERO B30
|| ZERO A31
|| ZERO B31
.endasmfunc
CLFLUSH .macro CONTROL,ADDR,LEN
B passthrough?
|| STW ADDR,*CONTROL[0]
STW LEN,*CONTROL[1]
spinlock?:
LDW *CONTROL[1],A0
NOP 3
passthrough?:
NOP
[A0] BNOP spinlock?,5
.endm
.global _OPENSSL_instrument_bus
_OPENSSL_instrument_bus:
.asmfunc
MV B4,B0 ; reassign sizeof(output)
|| MV A4,B4 ; reassign output
|| MVK 0x00004030,A3
MV B0,A4 ; return value
|| MVK 1,A1
|| MVKH 0x01840000,A3 ; L1DWIBAR
MVC TSCL,B8 ; collect 1st tick
|| MVK 0x00004010,A5
MV B8,B9 ; lasttick = tick
|| MVK 0,B7 ; lastdiff = 0
|| MVKH 0x01840000,A5 ; L2WIBAR
CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line
CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line
LL *B4,B5
NOP 4
ADD B7,B5,B5
SL B5,*B4
CMTL *B4,B1
NOP 4
STW B5,*B4
bus_loop1?:
MVC TSCL,B8
|| [B0] SUB B0,1,B0
SUB B8,B9,B7 ; lastdiff = tick - lasttick
|| MV B8,B9 ; lasttick = tick
CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line
CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line
LL *B4,B5
NOP 4
ADD B7,B5,B5
SL B5,*B4
CMTL *B4,B1
STW B5,*B4 ; [!B1] is removed to flatten samples
|| ADDK 4,B4
|| [B0] BNOP bus_loop1?,5
BNOP RA,5
.endasmfunc
.global _OPENSSL_instrument_bus2
_OPENSSL_instrument_bus2:
.asmfunc
MV A6,B0 ; reassign max
|| MV B4,A6 ; reassing sizeof(output)
|| MVK 0x00004030,A3
MV A4,B4 ; reassign output
|| MVK 0,A4 ; return value
|| MVK 1,A1
|| MVKH 0x01840000,A3 ; L1DWIBAR
MVC TSCL,B8 ; collect 1st tick
|| MVK 0x00004010,A5
MV B8,B9 ; lasttick = tick
|| MVK 0,B7 ; lastdiff = 0
|| MVKH 0x01840000,A5 ; L2WIBAR
CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line
CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line
LL *B4,B5
NOP 4
ADD B7,B5,B5
SL B5,*B4
CMTL *B4,B1
NOP 4
STW B5,*B4
MVC TSCL,B8 ; collect 1st diff
SUB B8,B9,B7 ; lastdiff = tick - lasttick
|| MV B8,B9 ; lasttick = tick
|| SUB B0,1,B0
bus_loop2?:
CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line
CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line
LL *B4,B5
NOP 4
ADD B7,B5,B5
SL B5,*B4
CMTL *B4,B1
STW B5,*B4 ; [!B1] is removed to flatten samples
||[!B0] BNOP bus_loop2_done?,2
|| SUB B0,1,B0
MVC TSCL,B8
SUB B8,B9,B8
|| MV B8,B9
CMPEQ B8,B7,B2
|| MV B8,B7
[!B2] ADDAW B4,1,B4
||[!B2] ADDK 1,A4
CMPEQ A4,A6,A2
[!A2] BNOP bus_loop2?,5
bus_loop2_done?:
BNOP RA,5
.endasmfunc
___
print $code;
close STDOUT;

View file

@ -143,7 +143,8 @@ int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in)
int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen,
const EVP_CIPHER *cipher, ENGINE *impl)
{
static unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH];
__fips_constseg
static const unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH] = {0};
/* All zeros means restart */
if (!key && !cipher && !impl && keylen == 0)
{

View file

@ -382,7 +382,9 @@ void OpenSSLDie(const char *file,int line,const char *assertion)
abort();
#else
/* Win32 abort() customarily shows a dialog, but we just did that... */
#ifdef SIGABRT
raise(SIGABRT);
#endif
_exit(3);
#endif
}

View file

@ -56,6 +56,9 @@
* [including the GNU Public Licence.]
*/
#ifdef _TMS320C6X
# pragma DATA_SECTION(DES_SPtrans,".const:des_sptrans")
#endif
__fips_constseg
OPENSSL_GLOBAL const DES_LONG DES_SPtrans[8][64]={
{

View file

@ -0,0 +1,231 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# December 2011
#
# The module implements GCM GHASH function and underlying single
# multiplication operation in GF(2^128). Even though subroutines
# have _4bit suffix, they are not using any tables, but rely on
# hardware Galois Field Multiply support. Streamed GHASH processes
# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven
# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are
# comparing apples vs. oranges, but compiler surely could have done
# better, because theoretical [though not necessarily achievable]
# estimate for "4-bit" table-driven implementation is ~12 cycles.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6"); # arguments
($Z0,$Z1,$Z2,$Z3, $H0, $H1, $H2, $H3,
$H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27));
($H01u,$H01y,$H2u,$H3u, $H0y,$H1y,$H2y,$H3y,
$H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27));
($FF000000,$E10000)=("B30","B31");
($xip,$x0,$x1,$xib)=map("B$_",(6..9)); # $xip zaps $len
$xia="A9";
($rem,$res)=("B4","B5"); # $rem zaps $Htable
$code.=<<___;
.text
.asg B3,RA
.if 0
.global _gcm_gmult_1bit
_gcm_gmult_1bit:
ADDAD $Htable,2,$Htable
.endif
.global _gcm_gmult_4bit
_gcm_gmult_4bit:
.asmfunc
LDDW *${Htable}[-1],$H1:$H0 ; H.lo
LDDW *${Htable}[-2],$H3:$H2 ; H.hi
|| MV $Xip,${xip} ; reassign Xi
|| MVK 15,B1 ; SPLOOPD constant
MVK 0xE1,$E10000
|| LDBU *++${xip}[15],$x1 ; Xi[15]
MVK 0xFF,$FF000000
|| LDBU *--${xip},$x0 ; Xi[14]
SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
SHL $FF000000,24,$FF000000 ; upper byte mask
|| BNOP ghash_loop?
|| MVK 1,B0 ; take a single spin
PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
AND $H2,$FF000000,$H2u ; H2's upper byte
AND $H3,$FF000000,$H3u ; H3's upper byte
|| SHRU $H2u,8,$H2u
SHRU $H3u,8,$H3u
|| ZERO $Z1:$Z0
SHRU2 $xia,8,$H01u
|| ZERO $Z3:$Z2
.endasmfunc
.global _gcm_ghash_4bit
_gcm_ghash_4bit:
.asmfunc
LDDW *${Htable}[-1],$H1:$H0 ; H.lo
|| SHRU $len,4,B0 ; reassign len
LDDW *${Htable}[-2],$H3:$H2 ; H.hi
|| MV $Xip,${xip} ; reassign Xi
|| MVK 15,B1 ; SPLOOPD constant
MVK 0xE1,$E10000
|| [B0] LDNDW *${inp}[1],$H1x:$H0x
MVK 0xFF,$FF000000
|| [B0] LDNDW *${inp}++[2],$H3x:$H2x
SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
|| LDDW *${xip}[1],$Z1:$Z0
SHL $FF000000,24,$FF000000 ; upper byte mask
|| LDDW *${xip}[0],$Z3:$Z2
PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
AND $H2,$FF000000,$H2u ; H2's upper byte
AND $H3,$FF000000,$H3u ; H3's upper byte
|| SHRU $H2u,8,$H2u
SHRU $H3u,8,$H3u
SHRU2 $xia,8,$H01u
|| [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|| [B0] XOR $H1x,$Z1,$Z1
.if .LITTLE_ENDIAN
[B0] XOR $H2x,$Z2,$Z2
|| [B0] XOR $H3x,$Z3,$Z3
|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
STDW $Z1:$Z0,*${xip}[1]
|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
|| [B0] ZERO $Z1:$Z0
.else
[B0] XOR $H2x,$Z2,$Z2
|| [B0] XOR $H3x,$Z3,$Z3
|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
STDW $Z1:$Z0,*${xip}[1]
|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
|| [B0] ZERO $Z1:$Z0
.endif
STDW $Z3:$Z2,*${xip}[0]
|| [B0] ZERO $Z3:$Z2
|| [B0] MV $xia,$x1
[B0] ADDK 14,${xip}
ghash_loop?:
SPLOOPD 6 ; 6*16+7
|| MVC B1,ILC
|| [B0] SUB B0,1,B0
|| ZERO A0
|| ADD $x1,$x1,$xib ; SHL $x1,1,$xib
|| SHL $x1,1,$xia
___
########____________________________
# 0 D2. M1 M2 |
# 1 M1 |
# 2 M1 M2 |
# 3 D1. M1 M2 |
# 4 S1. L1 |
# 5 S2 S1x L1 D2 L2 |____________________________
# 6/0 L1 S1 L2 S2x |D2. M1 M2 |
# 7/1 L1 S1 D1x S2 M2 | M1 |
# 8/2 S1 L1x S2 | M1 M2 |
# 9/3 S1 L1x | D1. M1 M2 |
# 10/4 D1x | S1. L1 |
# 11/5 |S2 S1x L1 D2 L2 |____________
# 12/6/0 D1x __| L1 S1 L2 S2x |D2. ....
# 7/1 L1 S1 D1x S2 M2 | ....
# 8/2 S1 L1x S2 | ....
#####... ................|............
$code.=<<___;
XORMPY $H0,$xia,$H0x ; 0 ; H·Xi[i]
|| XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1
XORMPY $H2,$xia,$H2x ; 2
|| XORMPY $H2u,$xib,$H2y
XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter
XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·Xi[i]
|| [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z
|| SWAP2.L $H01y,$H1y ; ; SHL $H01y,16,$H1y
|| SHL $x0,1,$xib
|| SHL $x0,1,$xia
XOR.L $H2x,$Z2,$Z2 ; 6/0 ; [0,0] in epilogue
|| SHL $Z0,1,$rem ; ; rem=Z<<1
|| SHRMB.S $Z1,$Z0,$Z0 ; ; Z>>=8
|| AND.L $H1y,$FF000000,$H1z
XOR.L $H3x,$Z3,$Z3 ; 7/1
|| SHRMB.S $Z2,$Z1,$Z1
|| XOR.D $H0z,$Z0,$Z0 ; merge upper byte products
|| AND.S $H2y,$FF000000,$H2z
|| XORMPY $E10000,$rem,$res ; ; implicit rem&0x1FE
XOR.L $H1z,$Z1,$Z1 ; 8/2
|| SHRMB.S $Z3,$Z2,$Z2
|| AND.S $H3y,$FF000000,$H3z
XOR.L $H2z,$Z2,$Z2 ; 9/3
|| SHRU $Z3,8,$Z3
XOR.D $H3z,$Z3,$Z3 ; 10/4
NOP ; 11/5
SPKERNEL 0,2
|| XOR.D $res,$Z3,$Z3 ; 12/6/0; Z^=res
; input pre-fetch is possible where D1 slot is available...
[B0] LDNDW *${inp}[1],$H1x:$H0x ; 8/-
[B0] LDNDW *${inp}++[2],$H3x:$H2x ; 9/-
NOP ; 10/-
.if .LITTLE_ENDIAN
SWAP2 $Z0,$Z1 ; 11/-
|| SWAP4 $Z1,$Z0
SWAP4 $Z1,$Z1 ; 12/-
|| SWAP2 $Z0,$Z0
SWAP2 $Z2,$Z3
|| SWAP4 $Z3,$Z2
||[!B0] BNOP RA
SWAP4 $Z3,$Z3
|| SWAP2 $Z2,$Z2
|| [B0] BNOP ghash_loop?
[B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|| [B0] XOR $H1x,$Z1,$Z1
[B0] XOR $H2x,$Z2,$Z2
|| [B0] XOR $H3x,$Z3,$Z3
|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
STDW $Z1:$Z0,*${xip}[1]
|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
|| [B0] ZERO $Z1:$Z0
.else
[!B0] BNOP RA ; 11/-
[B0] BNOP ghash_loop? ; 12/-
[B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|| [B0] XOR $H1x,$Z1,$Z1
[B0] XOR $H2x,$Z2,$Z2
|| [B0] XOR $H3x,$Z3,$Z3
|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
STDW $Z1:$Z0,*${xip}[1]
|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
|| [B0] ZERO $Z1:$Z0
.endif
STDW $Z3:$Z2,*${xip}[0]
|| [B0] ZERO $Z3:$Z2
|| [B0] MV $xia,$x1
[B0] ADDK 14,${xip}
.endasmfunc
.sect .const
.cstring "GHASH for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
print $code;
close STDOUT;

View file

@ -674,6 +674,8 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif
# elif defined(_TMS320C6400_PLUS)
# define GHASH_ASM_C64Xplus
# endif
#endif
@ -746,6 +748,10 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
ctx->gmult = gcm_gmult_4bit;
ctx->ghash = gcm_ghash_4bit;
}
# elif defined(GHASH_ASM_C64Xplus)
/* C64x+ assembler doesn't use tables, skip gcm_init_4bit.
* This is likely to trigger "function never referenced"
* warning and code being eliminated. */
# else
gcm_init_4bit(ctx->Htable,ctx->H.u);
# endif

View file

@ -0,0 +1,323 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# SHA1 for C64x+.
#
# November 2011
#
# If compared to compiler-generated code with similar characteristics,
# i.e. compiled with OPENSSL_SMALL_FOOTPRINT and utilizing SPLOOPs,
# this implementation is 25% smaller and >2x faster. In absolute terms
# performance is (quite impressive) ~6.5 cycles per processed byte.
# Fully unrolled assembler would be ~5x larger and is likely to be
# ~15% faster. It would be free from references to intermediate ring
# buffer, but put more pressure on L1P [both because the code would be
# larger and won't be using SPLOOP buffer]. There are no plans to
# realize fully unrolled variant though...
#
# !!! Note that this module uses AMR, which means that all interrupt
# service routines are expected to preserve it and for own well-being
# zero it upon entry.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments
($A,$B,$C,$D,$E, $Arot,$F,$F0,$T,$K) = map("A$_",(16..20, 21..25));
($X0,$X2,$X8,$X13) = ("A26","B26","A27","B27");
($TX0,$TX1,$TX2,$TX3) = map("B$_",(28..31));
($XPA,$XPB) = ("A5","B5"); # X circular buffer
($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM
$code=<<___;
.text
.asg B3,RA
.asg A15,FP
.asg B15,SP
.if .BIG_ENDIAN
.asg MV,SWAP2
.asg MV,SWAP4
.endif
.global _sha1_block_data_order
_sha1_block_data_order:
.asmfunc stack_usage(64)
MV $NUM,A0 ; reassign $NUM
|| MVK -64,B0
[!A0] BNOP RA ; if ($NUM==0) return;
|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64)
|| [A0] MV SP,FP
[A0] LDW *${CTX}[0],$A ; load A-E...
|| [A0] AND B0,SP,SP ; align stack at 64 bytes
[A0] LDW *${CTX}[1],$B
|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
[A0] LDW *${CTX}[2],$C
|| [A0] MVK 0x00404,B0
[A0] LDW *${CTX}[3],$D
|| [A0] MVKH 0x50000,B0 ; 0x050404, 64 bytes for $XP[AB]
[A0] LDW *${CTX}[4],$E
|| [A0] MVC B0,AMR ; setup circular addressing
LDNW *${INP}++,$TX1 ; pre-fetch input
NOP 1
loop?:
MVK 0x00007999,$K
|| ADDAW SP,2,$XPA
|| SUB A0,1,A0
|| MVK 13,B0
MVKH 0x5a820000,$K ; K_00_19
|| ADDAW SP,2,$XPB
|| MV $A,$Actx
|| MV $B,$Bctx
;;==================================================
SPLOOPD 5 ; BODY_00_13
|| MV $C,$Cctx
|| MV $D,$Dctx
|| MV $E,$Ectx
|| MVC B0,ILC
ROTL $A,5,$Arot
|| AND $C,$B,$F
|| ANDN $D,$B,$F0
|| ADD $K,$E,$T ; T=E+K
XOR $F0,$F,$F ; F_00_19(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
|| SWAP2 $TX1,$TX2
|| LDNW *${INP}++,$TX1
ADD $F,$T,$T ; T+=F_00_19(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| SWAP4 $TX2,$TX3 ; byte swap
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
ADD $TX3,$T,$A ; A=T+Xi
|| STW $TX3,*${XPB}++
SPKERNEL
;;==================================================
ROTL $A,5,$Arot ; BODY_14
|| AND $C,$B,$F
|| ANDN $D,$B,$F0
|| ADD $K,$E,$T ; T=E+K
XOR $F0,$F,$F ; F_00_19(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
|| SWAP2 $TX1,$TX2
|| LDNW *${INP}++,$TX1
ADD $F,$T,$T ; T+=F_00_19(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| SWAP4 $TX2,$TX2 ; byte swap
|| LDW *${XPA}++,$X0 ; fetches from X ring buffer are
|| LDW *${XPB}[4],$X2 ; 2 iterations ahead
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
|| LDW *${XPA}[7],$X8
|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
|| MV $TX2,$TX3
ADD $TX2,$T,$A ; A=T+Xi
|| STW $TX2,*${XPB}++
;;==================================================
ROTL $A,5,$Arot ; BODY_15
|| AND $C,$B,$F
|| ANDN $D,$B,$F0
|| ADD $K,$E,$T ; T=E+K
XOR $F0,$F,$F ; F_00_19(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
|| SWAP2 $TX1,$TX2
ADD $F,$T,$T ; T+=F_00_19(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| SWAP4 $TX2,$TX2 ; byte swap
|| XOR $X0,$X2,$TX0 ; Xupdate XORs are 1 iteration ahead
|| LDW *${XPA}++,$X0
|| LDW *${XPB}[4],$X2
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
|| XOR $X8,$X13,$TX1
|| LDW *${XPA}[7],$X8
|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
|| MV $TX2,$TX3
ADD $TX2,$T,$A ; A=T+Xi
|| STW $TX2,*${XPB}++
|| XOR $TX0,$TX1,$TX1
|| MVK 3,B0
;;==================================================
SPLOOPD 5 ; BODY_16_19
|| MVC B0,ILC
ROTL $A,5,$Arot
|| AND $C,$B,$F
|| ANDN $D,$B,$F0
|| ADD $K,$E,$T ; T=E+K
|| ROTL $TX1,1,$TX2 ; Xupdate output
XOR $F0,$F,$F ; F_00_19(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
ADD $F,$T,$T ; T+=F_00_19(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| XOR $X0,$X2,$TX0
|| LDW *${XPA}++,$X0
|| LDW *${XPB}[4],$X2
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
|| XOR $X8,$X13,$TX1
|| LDW *${XPA}[7],$X8
|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
|| MV $TX2,$TX3
ADD $TX2,$T,$A ; A=T+Xi
|| STW $TX2,*${XPB}++
|| XOR $TX0,$TX1,$TX1
SPKERNEL
MVK 0xffffeba1,$K
|| MVK 19,B0
MVKH 0x6ed90000,$K ; K_20_39
___
sub BODY_20_39 {
$code.=<<___;
;;==================================================
SPLOOPD 5 ; BODY_20_39
|| MVC B0,ILC
ROTL $A,5,$Arot
|| XOR $B,$C,$F
|| ADD $K,$E,$T ; T=E+K
|| ROTL $TX1,1,$TX2 ; Xupdate output
XOR $D,$F,$F ; F_20_39(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
ADD $F,$T,$T ; T+=F_20_39(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| XOR $X0,$X2,$TX0
|| LDW *${XPA}++,$X0
|| LDW *${XPB}[4],$X2
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
|| XOR $X8,$X13,$TX1
|| LDW *${XPA}[7],$X8
|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
|| MV $TX2,$TX3
ADD $TX2,$T,$A ; A=T+Xi
|| STW $TX2,*${XPB}++ ; last one is redundant
|| XOR $TX0,$TX1,$TX1
SPKERNEL
___
$code.=<<___ if (!shift);
MVK 0xffffbcdc,$K
MVKH 0x8f1b0000,$K ; K_40_59
___
} &BODY_20_39();
$code.=<<___;
;;==================================================
SPLOOPD 5 ; BODY_40_59
|| MVC B0,ILC
|| AND $B,$C,$F
|| AND $B,$D,$F0
ROTL $A,5,$Arot
|| XOR $F0,$F,$F
|| AND $C,$D,$F0
|| ADD $K,$E,$T ; T=E+K
|| ROTL $TX1,1,$TX2 ; Xupdate output
XOR $F0,$F,$F ; F_40_59(B,C,D)
|| MV $D,$E ; E=D
|| MV $C,$D ; D=C
ADD $F,$T,$T ; T+=F_40_59(B,C,D)
|| ROTL $B,30,$C ; C=ROL(B,30)
|| XOR $X0,$X2,$TX0
|| LDW *${XPA}++,$X0
|| LDW *${XPB}[4],$X2
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| MV $A,$B ; B=A
|| XOR $X8,$X13,$TX1
|| LDW *${XPA}[7],$X8
|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
|| MV $TX2,$TX3
ADD $TX2,$T,$A ; A=T+Xi
|| STW $TX2,*${XPB}++
|| XOR $TX0,$TX1,$TX1
|| AND $B,$C,$F
|| AND $B,$D,$F0
SPKERNEL
MVK 0xffffc1d6,$K
|| MVK 18,B0
MVKH 0xca620000,$K ; K_60_79
___
&BODY_20_39(-1); # BODY_60_78
$code.=<<___;
;;==================================================
[A0] B loop?
|| ROTL $A,5,$Arot ; BODY_79
|| XOR $B,$C,$F
|| ROTL $TX1,1,$TX2 ; Xupdate output
[A0] LDNW *${INP}++,$TX1 ; pre-fetch input
|| ADD $K,$E,$T ; T=E+K
|| XOR $D,$F,$F ; F_20_39(B,C,D)
ADD $F,$T,$T ; T+=F_20_39(B,C,D)
|| ADD $Ectx,$D,$E ; E=D,E+=Ectx
|| ADD $Dctx,$C,$D ; D=C,D+=Dctx
|| ROTL $B,30,$C ; C=ROL(B,30)
ADD $Arot,$T,$T ; T+=ROL(A,5)
|| ADD $Bctx,$A,$B ; B=A,B+=Bctx
ADD $TX2,$T,$A ; A=T+Xi
ADD $Actx,$A,$A ; A+=Actx
|| ADD $Cctx,$C,$C ; C+=Cctx
;; end of loop?
BNOP RA ; return
|| MV FP,SP ; restore stack pointer
|| LDW *FP[0],FP ; restore frame pointer
STW $A,*${CTX}[0] ; emit A-E...
|| MVK 0,B0
STW $B,*${CTX}[1]
|| MVC B0,AMR ; clear AMR
STW $C,*${CTX}[2]
STW $D,*${CTX}[3]
STW $E,*${CTX}[4]
.endasmfunc
.sect .const
.cstring "SHA1 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
print $code;
close STDOUT;

View file

@ -0,0 +1,292 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# SHA256 for C64x+.
#
# January 2012
#
# Performance is just below 10 cycles per processed byte, which is
# almost 40% faster than compiler-generated code. Unroll is unlikely
# to give more than ~8% improvement...
#
# !!! Note that this module uses AMR, which means that all interrupt
# service routines are expected to preserve it and for own well-being
# zero it upon entry.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments
$K256="A3";
($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14)
=map("A$_",(16..31));
($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15)
=map("B$_",(16..31));
($Xia,$Xib)=("A5","B5"); # circular/ring buffer
$CTXB=$t2e;
($Xn,$X0,$K)=("B7","B8","B9");
($Maj,$Ch)=($T2,"B6");
$code.=<<___;
.text
.asg B3,RA
.asg A15,FP
.asg B15,SP
.if .BIG_ENDIAN
.asg SWAP2,MV
.asg SWAP4,MV
.endif
.global _sha256_block_data_order
_sha256_block_data_order:
.asmfunc stack_usage(64)
MV $NUM,A0 ; reassign $NUM
|| MVK -64,B0
[!A0] BNOP RA ; if ($NUM==0) return;
|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64)
|| [A0] MV SP,FP
[A0] ADDKPC _sha256_block_data_order,B2
|| [A0] AND B0,SP,SP ; align stack at 64 bytes
[A0] MVK 0x00404,B1
|| [A0] MVKL (K256-_sha256_block_data_order),$K256
[A0] MVKH 0x50000,B1
|| [A0] MVKH (K256-_sha256_block_data_order),$K256
[A0] MVC B1,AMR ; setup circular addressing
|| [A0] MV SP,$Xia
[A0] MV SP,$Xib
|| [A0] ADD B2,$K256,$K256
|| [A0] MV $CTXA,$CTXB
|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
LDW *${CTXA}[0],$A ; load ctx
|| LDW *${CTXB}[4],$E
LDW *${CTXA}[1],$B
|| LDW *${CTXB}[5],$F
LDW *${CTXA}[2],$C
|| LDW *${CTXB}[6],$G
LDW *${CTXA}[3],$D
|| LDW *${CTXB}[7],$H
LDNW *$INP++,$Xn ; pre-fetch input
LDW *$K256++,$K ; pre-fetch K256[0]
MVK 14,B0 ; loop counters
MVK 47,B1
|| ADDAW $Xia,9,$Xia
outerloop?:
SUB A0,1,A0
|| MV $A,$Actx
|| MV $E,$Ectx
|| MVD $B,$Bctx
|| MVD $F,$Fctx
MV $C,$Cctx
|| MV $G,$Gctx
|| MVD $D,$Dctx
|| MVD $H,$Hctx
|| SWAP4 $Xn,$X0
SPLOOPD 8 ; BODY_00_14
|| MVC B0,ILC
|| SWAP2 $X0,$X0
LDNW *$INP++,$Xn
|| ROTL $A,30,$S0
|| OR $A,$B,$Maj
|| AND $A,$B,$t2a
|| ROTL $E,26,$S1
|| AND $F,$E,$Ch
|| ANDN $G,$E,$t2e
ROTL $A,19,$t0a
|| AND $C,$Maj,$Maj
|| ROTL $E,21,$t0e
|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
ROTL $A,10,$t1a
|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
|| ROTL $E,7,$t1e
|| ADD $K,$H,$T1 ; T1 = h + K256[i]
ADD $X0,$T1,$T1 ; T1 += X[i];
|| STW $X0,*$Xib++
|| XOR $t0a,$S0,$S0
|| XOR $t0e,$S1,$S1
XOR $t1a,$S0,$S0 ; Sigma0(a)
|| XOR $t1e,$S1,$S1 ; Sigma1(e)
|| LDW *$K256++,$K ; pre-fetch K256[i+1]
|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
|| ROTL $G,0,$H ; h = g
|| MV $F,$G ; g = f
|| MV $X0,$X14
|| SWAP4 $Xn,$X0
SWAP2 $X0,$X0
|| MV $E,$F ; f = e
|| ADD $D,$T1,$E ; e = d + T1
|| MV $C,$D ; d = c
MV $B,$C ; c = b
|| MV $A,$B ; b = a
|| ADD $T1,$T2,$A ; a = T1 + T2
SPKERNEL
ROTL $A,30,$S0 ; BODY_15
|| OR $A,$B,$Maj
|| AND $A,$B,$t2a
|| ROTL $E,26,$S1
|| AND $F,$E,$Ch
|| ANDN $G,$E,$t2e
|| LDW *${Xib}[1],$Xn ; modulo-scheduled
ROTL $A,19,$t0a
|| AND $C,$Maj,$Maj
|| ROTL $E,21,$t0e
|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
|| LDW *${Xib}[2],$X1 ; modulo-scheduled
ROTL $A,10,$t1a
|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
|| ROTL $E,7,$t1e
|| ADD $K,$H,$T1 ; T1 = h + K256[i]
ADD $X0,$T1,$T1 ; T1 += X[i];
|| STW $X0,*$Xib++
|| XOR $t0a,$S0,$S0
|| XOR $t0e,$S1,$S1
XOR $t1a,$S0,$S0 ; Sigma0(a)
|| XOR $t1e,$S1,$S1 ; Sigma1(e)
|| LDW *$K256++,$K ; pre-fetch K256[i+1]
|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
|| ROTL $G,0,$H ; h = g
|| MV $F,$G ; g = f
|| MV $X0,$X15
MV $E,$F ; f = e
|| ADD $D,$T1,$E ; e = d + T1
|| MV $C,$D ; d = c
|| MV $Xn,$X0 ; modulo-scheduled
|| LDW *$Xia,$X9 ; modulo-scheduled
|| ROTL $X1,25,$t0e ; modulo-scheduled
|| ROTL $X14,15,$t0a ; modulo-scheduled
SHRU $X1,3,$s0 ; modulo-scheduled
|| SHRU $X14,10,$s1 ; modulo-scheduled
|| ROTL $B,0,$C ; c = b
|| MV $A,$B ; b = a
|| ADD $T1,$T2,$A ; a = T1 + T2
SPLOOPD 10 ; BODY_16_63
|| MVC B1,ILC
|| ROTL $X1,14,$t1e ; modulo-scheduled
|| ROTL $X14,13,$t1a ; modulo-scheduled
XOR $t0e,$s0,$s0
|| XOR $t0a,$s1,$s1
|| MV $X15,$X14
|| MV $X1,$Xn
XOR $t1e,$s0,$s0 ; sigma0(X[i+1])
|| XOR $t1a,$s1,$s1 ; sigma1(X[i+14])
|| LDW *${Xib}[2],$X1 ; module-scheduled
ROTL $A,30,$S0
|| OR $A,$B,$Maj
|| AND $A,$B,$t2a
|| ROTL $E,26,$S1
|| AND $F,$E,$Ch
|| ANDN $G,$E,$t2e
|| ADD $X9,$X0,$X0 ; X[i] += X[i+9]
ROTL $A,19,$t0a
|| AND $C,$Maj,$Maj
|| ROTL $E,21,$t0e
|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
|| ADD $s0,$X0,$X0 ; X[i] += sigma1(X[i+1])
ROTL $A,10,$t1a
|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
|| ROTL $E,7,$t1e
|| ADD $H,$K,$T1 ; T1 = h + K256[i]
|| ADD $s1,$X0,$X0 ; X[i] += sigma1(X[i+14])
XOR $t0a,$S0,$S0
|| XOR $t0e,$S1,$S1
|| ADD $X0,$T1,$T1 ; T1 += X[i]
|| STW $X0,*$Xib++
XOR $t1a,$S0,$S0 ; Sigma0(a)
|| XOR $t1e,$S1,$S1 ; Sigma1(e)
|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
|| MV $X0,$X15
|| ROTL $G,0,$H ; h = g
|| LDW *$K256++,$K ; pre-fetch K256[i+1]
ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
|| MV $F,$G ; g = f
|| MV $Xn,$X0 ; modulo-scheduled
|| LDW *++$Xia,$X9 ; modulo-scheduled
|| ROTL $X1,25,$t0e ; module-scheduled
|| ROTL $X14,15,$t0a ; modulo-scheduled
ROTL $X1,14,$t1e ; modulo-scheduled
|| ROTL $X14,13,$t1a ; modulo-scheduled
|| MV $E,$F ; f = e
|| ADD $D,$T1,$E ; e = d + T1
|| MV $C,$D ; d = c
|| MV $B,$C ; c = b
MV $A,$B ; b = a
|| ADD $T1,$T2,$A ; a = T1 + T2
|| SHRU $X1,3,$s0 ; modulo-scheduled
|| SHRU $X14,10,$s1 ; modulo-scheduled
SPKERNEL
[A0] B outerloop?
|| [A0] LDNW *$INP++,$Xn ; pre-fetch input
|| [A0] ADDK -260,$K256 ; rewind K256
|| ADD $Actx,$A,$A ; accumulate ctx
|| ADD $Ectx,$E,$E
|| ADD $Bctx,$B,$B
ADD $Fctx,$F,$F
|| ADD $Cctx,$C,$C
|| ADD $Gctx,$G,$G
|| ADD $Dctx,$D,$D
|| ADD $Hctx,$H,$H
|| [A0] LDW *$K256++,$K ; pre-fetch K256[0]
[!A0] BNOP RA
||[!A0] MV $CTXA,$CTXB
[!A0] MV FP,SP ; restore stack pointer
||[!A0] LDW *FP[0],FP ; restore frame pointer
[!A0] STW $A,*${CTXA}[0] ; save ctx
||[!A0] STW $E,*${CTXB}[4]
||[!A0] MVK 0,B0
[!A0] STW $B,*${CTXA}[1]
||[!A0] STW $F,*${CTXB}[5]
||[!A0] MVC B0,AMR ; clear AMR
STW $C,*${CTXA}[2]
|| STW $G,*${CTXB}[6]
STW $D,*${CTXA}[3]
|| STW $H,*${CTXB}[7]
.endasmfunc
.sect ".const:sha_asm"
.align 128
K256:
.uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.uword 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.uword 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.uword 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.uword 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.uword 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.uword 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.uword 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.uword 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.uword 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.uword 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.uword 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.uword 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.uword 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.uword 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.uword 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
.cstring "SHA256 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
print $code;

View file

@ -0,0 +1,410 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# SHA512 for C64x+.
#
# January 2012
#
# Performance is 19 cycles per processed byte. Compared to block
# transform function from sha512.c compiled with cl6x with -mv6400+
# -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
# Loop unroll won't make it, this implementation, any faster, because
# it's effectively dominated by SHRU||SHL pairs and you can't schedule
# more of them.
#
# !!! Note that this module uses AMR, which means that all interrupt
# service routines are expected to preserve it and for own well-being
# zero it upon entry.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments
$K512="A3";
($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
$Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
$Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
($T1hi, $T2hi)= ("A6","A7");
($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
($Khi,$Klo)=("A9","A8");
($MAJhi,$MAJlo)=($T2hi,$T2lo);
($t1hi,$t1lo)=($Khi,"B2");
$CTXB=$t1lo;
($Xihi,$Xilo)=("A5","B5"); # circular/ring buffer
$code.=<<___;
.text
.asg B3,RA
.asg A15,FP
.asg B15,SP
.if .BIG_ENDIAN
.asg $Khi,KHI
.asg $Klo,KLO
.else
.asg $Khi,KLO
.asg $Klo,KHI
.endif
.global _sha512_block_data_order
_sha512_block_data_order:
.asmfunc stack_usage(40+128)
MV $NUM,A0 ; reassign $NUM
|| MVK -128,B0
[!A0] BNOP RA ; if ($NUM==0) return;
|| [A0] STW FP,*SP--(40) ; save frame pointer
|| [A0] MV SP,FP
[A0] STDW B13:B12,*SP[4]
|| [A0] MVK 0x00404,B1
[A0] STDW B11:B10,*SP[3]
|| [A0] STDW A13:A12,*FP[-3]
|| [A0] MVKH 0x60000,B1
[A0] STDW A11:A10,*SP[1]
|| [A0] MVC B1,AMR ; setup circular addressing
|| [A0] ADD B0,SP,SP ; alloca(128)
[A0] AND B0,SP,SP ; align stack at 128 bytes
|| [A0] ADDKPC _sha512_block_data_order,B1
|| [A0] MVKL (K512-_sha512_block_data_order),$K512
[A0] MVKH (K512-_sha512_block_data_order),$K512
|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
ADDAW SP,3,$Xilo
ADDAW SP,2,$Xihi
|| MV $CTXA,$CTXB
LDW *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx
|| LDW *${CTXB}[1^.LITTLE_ENDIAN],$Alo
|| ADD B1,$K512,$K512
LDW *${CTXA}[2^.LITTLE_ENDIAN],$Bhi
|| LDW *${CTXB}[3^.LITTLE_ENDIAN],$Blo
LDW *${CTXA}[4^.LITTLE_ENDIAN],$Chi
|| LDW *${CTXB}[5^.LITTLE_ENDIAN],$Clo
LDW *${CTXA}[6^.LITTLE_ENDIAN],$Dhi
|| LDW *${CTXB}[7^.LITTLE_ENDIAN],$Dlo
LDW *${CTXA}[8^.LITTLE_ENDIAN],$Ehi
|| LDW *${CTXB}[9^.LITTLE_ENDIAN],$Elo
LDW *${CTXA}[10^.LITTLE_ENDIAN],$Fhi
|| LDW *${CTXB}[11^.LITTLE_ENDIAN],$Flo
LDW *${CTXA}[12^.LITTLE_ENDIAN],$Ghi
|| LDW *${CTXB}[13^.LITTLE_ENDIAN],$Glo
LDW *${CTXA}[14^.LITTLE_ENDIAN],$Hhi
|| LDW *${CTXB}[15^.LITTLE_ENDIAN],$Hlo
LDNDW *$INP++,B11:B10 ; pre-fetch input
LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
outerloop?:
MVK 15,B0 ; loop counters
|| MVK 64,B1
|| SUB A0,1,A0
MV $Ahi,$Actxhi
|| MV $Alo,$Actxlo
|| MV $Bhi,$Bctxhi
|| MV $Blo,$Bctxlo
|| MV $Chi,$Cctxhi
|| MV $Clo,$Cctxlo
|| MVD $Dhi,$Dctxhi
|| MVD $Dlo,$Dctxlo
MV $Ehi,$Ectxhi
|| MV $Elo,$Ectxlo
|| MV $Fhi,$Fctxhi
|| MV $Flo,$Fctxlo
|| MV $Ghi,$Gctxhi
|| MV $Glo,$Gctxlo
|| MVD $Hhi,$Hctxhi
|| MVD $Hlo,$Hctxlo
loop0_15?:
.if .BIG_ENDIAN
MV B11,$T1hi
|| MV B10,$T1lo
.else
SWAP4 B10,$T1hi
|| SWAP4 B11,$T1lo
SWAP2 $T1hi,$T1hi
|| SWAP2 $T1lo,$T1lo
.endif
loop16_79?:
STW $T1hi,*$Xihi++[2]
|| STW $T1lo,*$Xilo++[2] ; X[i] = T1
|| ADD $Hhi,$T1hi,$T1hi
|| ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h
|| SHRU $Ehi,14,$S1hi
|| SHL $Ehi,32-14,$S1lo
XOR $Fhi,$Ghi,$CHhi
|| XOR $Flo,$Glo,$CHlo
|| ADD KHI,$T1hi,$T1hi
|| ADDU KLO,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += K512[i]
|| SHRU $Elo,14,$t0lo
|| SHL $Elo,32-14,$t0hi
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| AND $Ehi,$CHhi,$CHhi
|| AND $Elo,$CHlo,$CHlo
|| ROTL $Ghi,0,$Hhi
|| ROTL $Glo,0,$Hlo ; h = g
|| SHRU $Ehi,18,$t0hi
|| SHL $Ehi,32-18,$t0lo
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| XOR $Ghi,$CHhi,$CHhi
|| XOR $Glo,$CHlo,$CHlo ; Ch(e,f,g) = ((f^g)&e)^g
|| ROTL $Fhi,0,$Ghi
|| ROTL $Flo,0,$Glo ; g = f
|| SHRU $Elo,18,$t0lo
|| SHL $Elo,32-18,$t0hi
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| OR $Ahi,$Bhi,$MAJhi
|| OR $Alo,$Blo,$MAJlo
|| ROTL $Ehi,0,$Fhi
|| ROTL $Elo,0,$Flo ; f = e
|| SHRU $Ehi,41-32,$t0lo
|| SHL $Ehi,64-41,$t0hi
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| AND $Chi,$MAJhi,$MAJhi
|| AND $Clo,$MAJlo,$MAJlo
|| ROTL $Dhi,0,$Ehi
|| ROTL $Dlo,0,$Elo ; e = d
|| SHRU $Elo,41-32,$t0hi
|| SHL $Elo,64-41,$t0lo
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo ; Sigma1(e)
|| AND $Ahi,$Bhi,$t1hi
|| AND $Alo,$Blo,$t1lo
|| ROTL $Chi,0,$Dhi
|| ROTL $Clo,0,$Dlo ; d = c
|| SHRU $Ahi,28,$S0hi
|| SHL $Ahi,32-28,$S0lo
OR $t1hi,$MAJhi,$MAJhi
|| OR $t1lo,$MAJlo,$MAJlo ; Maj(a,b,c) = ((a|b)&c)|(a&b)
|| ADD $CHhi,$T1hi,$T1hi
|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Ch(e,f,g)
|| ROTL $Bhi,0,$Chi
|| ROTL $Blo,0,$Clo ; c = b
|| SHRU $Alo,28,$t0lo
|| SHL $Alo,32-28,$t0hi
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| ADD $S1hi,$T1hi,$T1hi
|| ADDU $S1lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Sigma1(e)
|| ROTL $Ahi,0,$Bhi
|| ROTL $Alo,0,$Blo ; b = a
|| SHRU $Ahi,34-32,$t0lo
|| SHL $Ahi,64-34,$t0hi
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| ADD $MAJhi,$T1hi,$T2hi
|| ADDU $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo ; T2 = T1+Maj(a,b,c)
|| SHRU $Alo,34-32,$t0hi
|| SHL $Alo,64-34,$t0lo
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| ADD $Ehi,$T1hi,$T1hi
|| ADDU $Elo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += e
|| [B0] BNOP loop0_15?
|| SHRU $Ahi,39-32,$t0lo
|| SHL $Ahi,64-39,$t0hi
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| [B0] LDNDW *$INP++,B11:B10 ; pre-fetch input
||[!B1] BNOP break?
|| SHRU $Alo,39-32,$t0hi
|| SHL $Alo,64-39,$t0lo
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo ; Sigma0(a)
|| ADD $T1carry,$T1hi,$Ehi
|| MV $T1lo,$Elo ; e = T1
||[!B0] LDW *${Xihi}[28],$T1hi
||[!B0] LDW *${Xilo}[28],$T1lo ; X[i+14]
ADD $S0hi,$T2hi,$T2hi
|| ADDU $S0lo,$T2carry:$T2lo,$T2carry:$T2lo ; T2 += Sigma0(a)
|| [B1] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[i]
NOP ; avoid cross-path stall
ADD $T2carry,$T2hi,$Ahi
|| MV $T2lo,$Alo ; a = T2
|| [B0] SUB B0,1,B0
;;===== branch to loop00_15? is taken here
NOP
;;===== branch to break? is taken here
LDW *${Xihi}[2],$T2hi
|| LDW *${Xilo}[2],$T2lo ; X[i+1]
|| SHRU $T1hi,19,$S1hi
|| SHL $T1hi,32-19,$S1lo
SHRU $T1lo,19,$t0lo
|| SHL $T1lo,32-19,$t0hi
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| SHRU $T1hi,61-32,$t0lo
|| SHL $T1hi,64-61,$t0hi
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| SHRU $T1lo,61-32,$t0hi
|| SHL $T1lo,64-61,$t0lo
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| SHRU $T1hi,6,$t0hi
|| SHL $T1hi,32-6,$t0lo
XOR $t0hi,$S1hi,$S1hi
|| XOR $t0lo,$S1lo,$S1lo
|| SHRU $T1lo,6,$t0lo
|| LDW *${Xihi}[18],$T1hi
|| LDW *${Xilo}[18],$T1lo ; X[i+9]
XOR $t0lo,$S1lo,$S1lo ; sigma1(Xi[i+14])
|| LDW *${Xihi}[0],$CHhi
|| LDW *${Xilo}[0],$CHlo ; X[i]
|| SHRU $T2hi,1,$S0hi
|| SHL $T2hi,32-1,$S0lo
SHRU $T2lo,1,$t0lo
|| SHL $T2lo,32-1,$t0hi
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| SHRU $T2hi,8,$t0hi
|| SHL $T2hi,32-8,$t0lo
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| SHRU $T2lo,8,$t0lo
|| SHL $T2lo,32-8,$t0hi
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| ADD $S1hi,$T1hi,$T1hi
|| ADDU $S1lo,$T1lo,$T1carry:$T1lo ; T1 = X[i+9]+sigma1()
|| [B1] BNOP loop16_79?
|| SHRU $T2hi,7,$t0hi
|| SHL $T2hi,32-7,$t0lo
XOR $t0hi,$S0hi,$S0hi
|| XOR $t0lo,$S0lo,$S0lo
|| ADD $CHhi,$T1hi,$T1hi
|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += X[i]
|| SHRU $T2lo,7,$t0lo
XOR $t0lo,$S0lo,$S0lo ; sigma0(Xi[i+1]
ADD $S0hi,$T1hi,$T1hi
|| ADDU $S0lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += sigma0()
|| [B1] SUB B1,1,B1
NOP ; avoid cross-path stall
ADD $T1carry,$T1hi,$T1hi
;;===== branch to loop16_79? is taken here
break?:
ADD $Ahi,$Actxhi,$Ahi ; accumulate ctx
|| ADDU $Alo,$Actxlo,$Actxlo:$Alo
|| [A0] LDNDW *$INP++,B11:B10 ; pre-fetch input
|| [A0] ADDK -640,$K512 ; rewind pointer to K512
ADD $Bhi,$Bctxhi,$Bhi
|| ADDU $Blo,$Bctxlo,$Bctxlo:$Blo
|| [A0] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
ADD $Chi,$Cctxhi,$Chi
|| ADDU $Clo,$Cctxlo,$Cctxlo:$Clo
|| ADD $Actxlo,$Ahi,$Ahi
||[!A0] MV $CTXA,$CTXB
ADD $Dhi,$Dctxhi,$Dhi
|| ADDU $Dlo,$Dctxlo,$Dctxlo:$Dlo
|| ADD $Bctxlo,$Bhi,$Bhi
||[!A0] STW $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx
||[!A0] STW $Alo,*${CTXB}[1^.LITTLE_ENDIAN]
ADD $Ehi,$Ectxhi,$Ehi
|| ADDU $Elo,$Ectxlo,$Ectxlo:$Elo
|| ADD $Cctxlo,$Chi,$Chi
|| [A0] BNOP outerloop?
||[!A0] STW $Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
||[!A0] STW $Blo,*${CTXB}[3^.LITTLE_ENDIAN]
ADD $Fhi,$Fctxhi,$Fhi
|| ADDU $Flo,$Fctxlo,$Fctxlo:$Flo
|| ADD $Dctxlo,$Dhi,$Dhi
||[!A0] STW $Chi,*${CTXA}[4^.LITTLE_ENDIAN]
||[!A0] STW $Clo,*${CTXB}[5^.LITTLE_ENDIAN]
ADD $Ghi,$Gctxhi,$Ghi
|| ADDU $Glo,$Gctxlo,$Gctxlo:$Glo
|| ADD $Ectxlo,$Ehi,$Ehi
||[!A0] STW $Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
||[!A0] STW $Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
ADD $Hhi,$Hctxhi,$Hhi
|| ADDU $Hlo,$Hctxlo,$Hctxlo:$Hlo
|| ADD $Fctxlo,$Fhi,$Fhi
||[!A0] STW $Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
||[!A0] STW $Elo,*${CTXB}[9^.LITTLE_ENDIAN]
ADD $Gctxlo,$Ghi,$Ghi
||[!A0] STW $Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
||[!A0] STW $Flo,*${CTXB}[11^.LITTLE_ENDIAN]
ADD $Hctxlo,$Hhi,$Hhi
||[!A0] STW $Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
||[!A0] STW $Glo,*${CTXB}[13^.LITTLE_ENDIAN]
;;===== branch to outerloop? is taken here
STW $Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
|| STW $Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
|| MVK -40,B0
ADD FP,B0,SP ; destroy circular buffer
|| LDDW *FP[-4],A11:A10
LDDW *SP[2],A13:A12
|| LDDW *FP[-2],B11:B10
LDDW *SP[4],B13:B12
|| BNOP RA
LDW *++SP(40),FP ; restore frame pointer
MVK 0,B0
MVC B0,AMR ; clear AMR
NOP 2 ; wait till FP is committed
.endasmfunc
.sect ".const:sha_asm"
.align 128
K512:
.uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
.uword 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
.uword 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
.uword 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
.uword 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
.uword 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
.uword 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
.uword 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
.uword 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
.uword 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
.uword 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
.uword 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
.uword 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
.uword 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
.uword 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
.uword 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
.uword 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
.uword 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
.uword 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
.uword 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
.uword 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
.uword 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
.uword 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
.uword 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
.uword 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
.uword 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
.uword 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
.uword 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
.uword 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
.uword 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
.uword 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
.uword 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
.uword 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
.uword 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
.uword 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
.uword 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
.uword 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
.uword 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
.uword 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
.uword 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
.cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
print $code;
close STDOUT;

View file

@ -65,7 +65,7 @@ int OPENSSL_issetugid(void)
return issetugid();
}
#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE)
#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE) || defined(_TMS320C6X)
int OPENSSL_issetugid(void)
{

2
e_os.h
View file

@ -668,7 +668,7 @@ extern char *sys_errlist[]; extern int sys_nerr;
#if defined(OPENSSL_SYS_WINDOWS)
# define strcasecmp _stricmp
# define strncasecmp _strnicmp
#elif defined(OPENSSL_SYS_VMS)
#elif defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_DSPBIOS)
/* VMS below version 7.0 doesn't have strcasecmp() */
# include "o_str.h"
# define strcasecmp OPENSSL_strcasecmp

View file

@ -99,7 +99,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
{
const EVP_CIPHER *cipher = NULL;
if (strcasecmp(amode, "CBC") == 0)
if (fips_strcasecmp(amode, "CBC") == 0)
{
switch (akeysz)
{
@ -117,7 +117,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
}
}
else if (strcasecmp(amode, "ECB") == 0)
else if (fips_strcasecmp(amode, "ECB") == 0)
{
switch (akeysz)
{
@ -134,7 +134,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
break;
}
}
else if (strcasecmp(amode, "CFB128") == 0)
else if (fips_strcasecmp(amode, "CFB128") == 0)
{
switch (akeysz)
{
@ -169,7 +169,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
break;
}
}
else if(!strcasecmp(amode,"CFB1"))
else if(!fips_strcasecmp(amode,"CFB1"))
{
switch (akeysz)
{
@ -186,7 +186,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
break;
}
}
else if(!strcasecmp(amode,"CFB8"))
else if(!fips_strcasecmp(amode,"CFB8"))
{
switch (akeysz)
{
@ -215,7 +215,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
}
if (FIPS_cipherinit(ctx, cipher, aKey, iVec, dir) <= 0)
return 0;
if(!strcasecmp(amode,"CFB1"))
if(!fips_strcasecmp(amode,"CFB1"))
M_EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS);
if (dir)
FIPS_cipher(ctx, ciphertext, plaintext, len);
@ -635,10 +635,8 @@ static int proc_file(char *rqfile, char *rspfile)
char *xp, *pp = ibuf+2;
int n;
if (akeysz)
{ /* insert current time & date */
time_t rtim = time(0);
fputs("# ", rfp);
copy_line(ctime(&rtim), rfp);
{
copy_line(ibuf, rfp);
}
else
{
@ -876,11 +874,11 @@ int main(int argc, char **argv)
if (argc > 1)
{
if (strcasecmp(argv[1], "-d") == 0)
if (fips_strcasecmp(argv[1], "-d") == 0)
{
d_opt = 1;
}
else if (strcasecmp(argv[1], "-f") == 0)
else if (fips_strcasecmp(argv[1], "-f") == 0)
{
d_opt = 0;
}

View file

@ -75,10 +75,11 @@ int main(int argc, char **argv)
#include "fips_utl.h"
static char buf[204800];
static char lbuf[204800];
static void gcmtest(FILE *in, FILE *out, int encrypt)
{
char buf[2048];
char lbuf[2048];
char *keyword, *value;
int keylen = -1, ivlen = -1, aadlen = -1, taglen = -1, ptlen = -1;
int rv;
@ -266,8 +267,6 @@ static void gcmtest(FILE *in, FILE *out, int encrypt)
static void xtstest(FILE *in, FILE *out)
{
char buf[204800];
char lbuf[204800];
char *keyword, *value;
int inlen = 0;
int encrypt = 0;
@ -340,8 +339,6 @@ static void xtstest(FILE *in, FILE *out)
static void ccmtest(FILE *in, FILE *out)
{
char buf[200048];
char lbuf[200048];
char *keyword, *value;
long l;
unsigned char *Key = NULL, *Nonce = NULL;

View file

@ -356,10 +356,8 @@ static int tproc_file(char *rqfile, char *rspfile)
char *xp, *pp = ibuf+2;
int n;
if(*amode)
{ /* insert current time & date */
time_t rtim = time(0);
fputs("# ", rfp);
copy_line(ctime(&rtim), rfp);
{
copy_line(ibuf, rfp);
}
else
{

View file

@ -286,7 +286,7 @@ int main(int argc, char **argv)
return 0;
parse_error:
fprintf(stderr, "Error Parsing request file\n");
exit(1);
return 1;
}
#endif

View file

@ -62,8 +62,10 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#ifndef NO_SYS_TYPES_H
#include <sys/types.h>
#include <sys/stat.h>
#endif
#include "e_os.h"

View file

@ -81,7 +81,7 @@ static int fips_started = 0;
static int fips_is_owning_thread(void);
static int fips_set_owning_thread(void);
static int fips_clear_owning_thread(void);
static unsigned char *fips_signature_witness(void);
static const unsigned char *fips_signature_witness(void);
#define fips_w_lock() CRYPTO_w_lock(CRYPTO_LOCK_FIPS)
#define fips_w_unlock() CRYPTO_w_unlock(CRYPTO_LOCK_FIPS)
@ -148,6 +148,9 @@ void fips_set_selftest_fail(void)
extern const void *FIPS_text_start(), *FIPS_text_end();
extern const unsigned char FIPS_rodata_start[], FIPS_rodata_end[];
#ifdef _TMS320C6X
const
#endif
unsigned char FIPS_signature [20] = { 0 };
__fips_constseg
static const char FIPS_hmac_key[]="etaonrishdlcupfm";
@ -413,9 +416,8 @@ int fips_clear_owning_thread(void)
return ret;
}
unsigned char *fips_signature_witness(void)
const unsigned char *fips_signature_witness(void)
{
extern unsigned char FIPS_signature[];
return FIPS_signature;
}

View file

@ -35,6 +35,7 @@ const void *FIPS_text_end(void);
(defined(__linux) && ((defined(__PPC__) && !defined(__PPC64__)) || \
defined(__arm__) || defined(__arm))) || \
(defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \
(defined(_TMS320C6X)) || \
(defined(_WIN32) && defined(_MSC_VER))
# define FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE
# endif
@ -70,6 +71,10 @@ const unsigned int FIPS_text_startX[]=
# pragma const_seg("fipsro$a")
# pragma const_seg()
__declspec(allocate("fipsro$a"))
# elif defined(_TMS320C6X)
# pragma CODE_SECTION(instruction_pointer,".fips_text:start")
# pragma CODE_SECTION(FIPS_ref_point,".fips_text:start")
# pragma DATA_SECTION(FIPS_rodata_start,".fips_const:start")
# endif
const unsigned int FIPS_rodata_start[]=
{ 0x46495053, 0x5f726f64, 0x6174615f, 0x73746172 };
@ -87,6 +92,10 @@ const unsigned int FIPS_text_endX[]=
# pragma const_seg("fipsro$z")
# pragma const_seg()
__declspec(allocate("fipsro$z"))
# elif defined(_TMS320C6X)
# pragma CODE_SECTION(instruction_pointer,".fips_text:end")
# pragma CODE_SECTION(FIPS_ref_point,".fips_text:end")
# pragma DATA_SECTION(FIPS_rodata_end,".fips_const:end")
# endif
const unsigned int FIPS_rodata_end[]=
{ 0x46495053, 0x5f726f64, 0x6174615f, 0x656e645b };

View file

@ -53,6 +53,12 @@
int lib$initialize();
globaldef int (*lib_init_ref)() = lib$initialize;
# pragma __standard
#elif defined(_TMS320C6X)
# if defined(__TI_EABI__)
asm("\t.sect \".init_array\"\n\t.align 4\n\t.field FINGERPRINT_premain,32");
# else
asm("\t.sect \".pinit\"\n\t.align 4\n\t.field _FINGERPRINT_premain,32");
# endif
#elif 0
The rest has to be taken care of through command line:

View file

@ -1 +1 @@
HMAC-SHA1(fips_premain.c)= 1eaf66f76187877ff403708a2948d240f92736a0
HMAC-SHA1(fips_premain.c)= 65b20c3cec235cec85af848e1cd2dfdfa101804a

View file

@ -495,6 +495,7 @@ my $onedir = 0;
my $filter = "";
my $tvdir;
my $tprefix;
my $sfprefix = "";
my $debug = 0;
my $quiet = 0;
my $notest = 0;
@ -615,6 +616,9 @@ foreach (@ARGV) {
elsif (/--script-tprefix=(.*)$/) {
$stprefix = $1;
}
elsif (/--script-fprefix=(.*)$/) {
$sfprefix = $1;
}
elsif (/--mkdir=(.*)$/) {
$mkcmd = $1;
}
@ -1017,6 +1021,10 @@ END
$out =~ s|/req/(\S+)\.req|/$rspdir/$1.rsp|;
my $outdir = $out;
$outdir =~ s|/[^/]*$||;
if ( !-d $outdir && ($outfile eq "" || $minimal_script)) {
print STDERR "DEBUG: Creating directory $outdir\n" if $debug;
mkdir($outdir) || die "Can't create directory $outdir";
}
if ($outfile ne "") {
if ($win32) {
$outdir =~ tr|/|\\|;
@ -1039,12 +1047,9 @@ END
}
$lastdir = $outdir;
}
} elsif ( !-d $outdir ) {
print STDERR "DEBUG: Creating directory $outdir\n" if $debug;
mkdir($outdir) || die "Can't create directory $outdir";
}
}
my $cmd = "$tcmd \"$req\" \"$out\"";
my $cmd = "$tcmd \"$sfprefix$req\" \"$sfprefix$out\"";
print STDERR "DEBUG: running test $tname\n" if ( $debug && !$verify );
if ($outfile ne "") {
if ($minimal_script) {

View file

@ -589,6 +589,7 @@
#define AES_encrypt fips_aes_encrypt
#define AES_set_decrypt_key fips_aes_set_decrypt_key
#define AES_set_encrypt_key fips_aes_set_encrypt_key
#define AES_ctr32_encrypt fips_aes_ctr32_encrypt
#define BN_from_montgomery fips_bn_from_montgomery
#define BN_num_bits_word FIPS_bn_num_bits_word
#define DES_SPtrans fips_des_sptrans

View file

@ -66,7 +66,7 @@
#include <openssl/aes.h>
#include <openssl/err.h>
#include <openssl/fips_rand.h>
#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS))
#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYSNAME_DSPBIOS))
# include <sys/time.h>
#endif
#if defined(OPENSSL_SYS_VXWORKS)
@ -232,8 +232,13 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
{
#ifdef OPENSSL_SYS_WIN32
FILETIME ft;
#ifdef _WIN32_WCE
SYSTEMTIME t;
#endif
#elif defined(OPENSSL_SYS_VXWORKS)
struct timespec ts;
#elif defined(OPENSSL_SYSNAME_DSPBIOS)
unsigned long long TSC, OPENSSL_rdtsc();
#else
struct timeval tv;
#endif
@ -243,7 +248,12 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
#endif
#ifdef OPENSSL_SYS_WIN32
#ifdef _WIN32_WCE
GetSystemTime(&t);
SystemTimeToFileTime(&t, &ft);
#else
GetSystemTimeAsFileTime(&ft);
#endif
buf[0] = (unsigned char) (ft.dwHighDateTime & 0xff);
buf[1] = (unsigned char) ((ft.dwHighDateTime >> 8) & 0xff);
buf[2] = (unsigned char) ((ft.dwHighDateTime >> 16) & 0xff);
@ -262,6 +272,16 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
buf[5] = (unsigned char) ((ts.tv_nsec >> 8) & 0xff);
buf[6] = (unsigned char) ((ts.tv_nsec >> 16) & 0xff);
buf[7] = (unsigned char) ((ts.tv_nsec >> 24) & 0xff);
#elif defined(OPENSSL_SYSNAME_DSPBIOS)
TSC = OPENSSL_rdtsc();
buf[0] = (unsigned char) (TSC & 0xff);
buf[1] = (unsigned char) ((TSC >> 8) & 0xff);
buf[2] = (unsigned char) ((TSC >> 16) & 0xff);
buf[3] = (unsigned char) ((TSC >> 24) & 0xff);
buf[4] = (unsigned char) ((TSC >> 32) & 0xff);
buf[5] = (unsigned char) ((TSC >> 40) & 0xff);
buf[6] = (unsigned char) ((TSC >> 48) & 0xff);
buf[7] = (unsigned char) ((TSC >> 56) & 0xff);
#else
gettimeofday(&tv,NULL);
buf[0] = (unsigned char) (tv.tv_sec & 0xff);

View file

@ -1,7 +1,10 @@
@echo off
rem @echo off
SET ASM=%1
SET EXARG=
SET MFILE=ntdll.mak
if NOT X%OSVERSION% == X goto wince
if NOT X%PROCESSOR_ARCHITECTURE% == X goto defined
@ -42,6 +45,14 @@ SET TARGET=VC-WIN64A
if x%ASM% == xno-asm goto compile
SET ASM=nasm
goto compile
:wince
echo Auto Configuring for WinCE
SET TARGET=VC-CE
SET MFILE=cedll.mak
:compile
if x%ASM% == xno-asm SET EXARG=no-asm
@ -52,13 +63,13 @@ echo on
perl util\mkfiles.pl >MINFO
@if ERRORLEVEL 1 goto error
perl util\mk1mf.pl dll %ASM% %TARGET% >ms\ntdll.mak
perl util\mk1mf.pl dll %ASM% %TARGET% >ms\%MFILE%
@if ERRORLEVEL 1 goto error
nmake -f ms\ntdll.mak clean
nmake -f ms\ntdll.mak
nmake -f ms\%MFILE% clean
nmake -f ms\%MFILE%
@if ERRORLEVEL 1 goto error
nmake -f ms\ntdll.mak install
nmake -f ms\%MFILE% install
@if ERRORLEVEL 1 goto error
@echo.

View file

@ -89,6 +89,7 @@ extern int fips_rsavtest_main(int argc, char **argv);
extern int fips_shatest_main(int argc, char **argv);
extern int fips_test_suite_main(int argc, char **argv);
#if !defined(_TMS320C6400_PLUS)
#include "fips_aesavs.c"
#include "fips_cmactest.c"
#include "fips_desmovs.c"
@ -106,6 +107,28 @@ extern int fips_test_suite_main(int argc, char **argv);
#include "fips_shatest.c"
#include "fips_test_suite.c"
#else
#include "aes/fips_aesavs.c"
#include "cmac/fips_cmactest.c"
#include "des/fips_desmovs.c"
#include "dh/fips_dhvs.c"
#include "rand/fips_drbgvs.c"
#include "dsa/fips_dssvs.c"
#include "ecdh/fips_ecdhvs.c"
#include "ecdsa/fips_ecdsavs.c"
#include "aes/fips_gcmtest.c"
#include "hmac/fips_hmactest.c"
#include "rand/fips_rngvs.c"
#include "rsa/fips_rsagtest.c"
#include "rsa/fips_rsastest.c"
#include "rsa/fips_rsavtest.c"
#include "sha/fips_shatest.c"
#include "fips_test_suite.c"
#pragma DATA_SECTION(aucCmBootDspLoad, "BootDspSection");
volatile unsigned char aucCmBootDspLoad[8*1024];
#endif
typedef struct
{
const char *name;
@ -221,7 +244,7 @@ static int run_prg(int argc, char **argv)
int main(int argc, char **argv)
{
char buf[1024];
static char buf[1024];
char **args = argv + 1;
const char *sname = "fipstests.sh";
ARGS arg;
@ -238,6 +261,10 @@ int main(int argc, char **argv)
CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON);
#endif
#if defined(_TMS320C6400_PLUS)
SysInit();
#endif
if (*args && *args[0] != '-')
{
rv = run_prg(argc - 1, args);

32
util/fips_standalone_sha1 Normal file
View file

@ -0,0 +1,32 @@
#!/usr/bin/env perl
#
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
unshift(@INC,$dir);
require "hmac_sha1.pl";
(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file";
$verify=shift if (@ARGV[0] eq "-verify");
sysopen(FD,@ARGV[0],0) || die "$!";
binmode(FD);
my $ctx = HMAC->Init("etaonrishdlcupfm");
while (read(FD,$blob,4*1024)) { $ctx->Update($blob); }
close(FD);
my $signature = unpack("H*",$ctx->Final());
print "HMAC-SHA1(@ARGV[0])= $signature\n";
if ($verify) {
open(FD,"<@ARGV[0].sha1") || die "$!";
$line = <FD>;
close(FD);
exit(0) if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i &&
$1 eq $signature);
die "signature mismatch";
}

View file

@ -8,9 +8,6 @@ my @ARGS = @ARGV;
my $top = shift @ARGS;
my $target = shift @ARGS;
my $tmptarg = $target;
$tmptarg =~ s/\.[^\\\/\.]+$/.tmp/;
my $runasm = 1;
@ -40,43 +37,31 @@ while (<IN>)
last if (/assembler/)
}
# Store all renames.
# Store all renames [noting minimal length].
my $minlen=0x10000;
while (<IN>)
{
if (/^#define\s+(\w+)\s+(\w+)\b/)
if (/^#define\s+_?(\w+)\s+_?(\w+)\b/)
{
$edits{$1} = $2;
my $len = length($1);
$minlen = $len if ($len<$minlen);
}
}
my ($from, $to);
open(IN,"$target") || die "Can't open $target for reading";
#delete any temp file lying around
@code = <IN>; # suck in whole file
unlink $tmptarg;
close IN;
#rename target temporarily
my $rencnt = 0;
# On windows the previous file doesn't always close straight away
# so retry the rename operation a few times if it fails.
while (!rename($target, $tmptarg))
{
sleep 2;
die "Can't rename $target" if ($rencnt++ > 10);
}
open(OUT,">$target") || die "Can't open $target for writing";
#edit target
open(IN,$tmptarg) || die "Can't open temporary file";
open(OUT, ">$target") || die "Can't open output file $target";
while (<IN>)
{
while (($from, $to) = each %edits)
{
s/(\b_*)$from(\b)/$1$to$2/g;
}
print OUT $_;
}
foreach $line (@code)
{
$line =~ s/\b(_?)(\w{$minlen,})\b/$1.($edits{$2} or $2)/geo;
print OUT $line;
}
close OUT;
@ -87,14 +72,5 @@ if ($runasm)
my $rv = $?;
# restore target
unlink $target;
rename $tmptarg, $target;
die "Error executing assembler!" if $rv != 0;
}
else
{
# Don't care about target
unlink $tmptarg;
}

View file

@ -58,7 +58,7 @@ while (<STDIN>)
}
else
{
next unless (/^(fips\/|crypto|util|test|include|ms)/);
next unless (/^(fips\/|crypto|util|test|include|ms|c6x)/);
}
if (/^crypto\/([^\/]+)/)
{

View file

@ -27,6 +27,19 @@ if (exists $ENV{"PREMAIN_DSO_EXE"})
$fips_premain_dso = "";
}
my $fips_sig = $ENV{"FIPS_SIG"};
if (defined $fips_sig)
{
if ($fips_premain_dso ne "")
{
$fips_premain_dso = "$fips_sig -dso";
}
else
{
$fips_premain_dso = "$fips_sig -exe";
}
}
check_hash($sha1_exe, "fips_premain.c");
check_hash($sha1_exe, "fipscanister.lib");

196
util/hmac_sha1.pl Executable file
View file

@ -0,0 +1,196 @@
#!/usr/bin/env perl
#
# Copyright (c) 2011 The OpenSSL Project.
#
######################################################################
#
# SHA1 and HMAC in Perl by <appro@openssl.org>.
#
{ package SHA1;
use integer;
{
################################### SHA1 block code generator
my @V = ('$A','$B','$C','$D','$E');
my $i;
sub XUpdate {
my $ret;
$ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t";
if ((1<<31)<<1) {
$ret.=" \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t ";
} else {
$ret.=" \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t ";
}
}
sub tail {
my ($a,$b,$c,$d,$e)=@V;
my $ret;
if ((1<<31)<<1) {
$ret.="(($a<<5)|($a>>27));\n\t";
$ret.="$b=($b<<30)|($b>>2); $e&=0xffffffff; #$b&=0xffffffff;\n\t";
} else {
$ret.="(($a<<5)|($a>>27)&0x1f);\n\t";
$ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t";
}
$ret;
}
sub BODY_00_15 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail();
}
sub BODY_16_19 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail();
}
sub BODY_20_39 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail();
}
sub BODY_40_59 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail();
}
sub BODY_60_79 {
my ($a,$b,$c,$d,$e)=@V;
"$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail();
}
my $sha1_impl =
'sub block {
my $self = @_[0];
my @W = unpack("N16",@_[1]);
my ($A,$B,$C,$D,$E,$T) = @{$self->{H}};
';
$sha1_impl.='
$A &= 0xffffffff;
$B &= 0xffffffff;
' if ((1<<31)<<1);
for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); }
for(;$i<20;$i++) { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); }
for(;$i<40;$i++) { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); }
for(;$i<60;$i++) { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); }
for(;$i<80;$i++) { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); }
$sha1_impl.='
$self->{H}[0]+=$A; $self->{H}[1]+=$B; $self->{H}[2]+=$C;
$self->{H}[3]+=$D; $self->{H}[4]+=$E; }';
#print $sha1_impl,"\n";
eval($sha1_impl); # generate code
}
sub Init {
my $class = shift; # multiple instances...
my $self = {};
bless $self,$class;
$self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0];
$self->{N} = 0;
return $self;
}
sub Update {
my $self = shift;
my $msg;
foreach $msg (@_) {
my $len = length($msg);
my $num = length($self->{buf});
my $off = 0;
$self->{N} += $len;
if (($num+$len)<64)
{ $self->{buf} .= $msg; next; }
elsif ($num)
{ $self->{buf} .= substr($msg,0,($off=64-$num));
$self->block($self->{buf});
}
while(($off+64) <= $len)
{ $self->block(substr($msg,$off,64));
$off += 64;
}
$self->{buf} = substr($msg,$off);
}
return $self;
}
sub Final {
my $self = shift;
my $num = length($self->{buf});
$self->{buf} .= chr(0x80); $num++;
if ($num>56)
{ $self->{buf} .= chr(0)x(64-$num);
$self->block($self->{buf});
$self->{buf}=undef;
$num=0;
}
$self->{buf} .= chr(0)x(56-$num);
$self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3);
$self->block($self->{buf});
return pack("N*",@{$self->{H}});
}
sub Selftest {
my $hash;
$hash=SHA1->Init()->Update('abc')->Final();
die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d');
$hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final();
die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1');
#$hash=SHA1->Init()->Update('a'x1000000)->Final();
#die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f');
}
}
{ package HMAC;
sub Init {
my $class = shift;
my $key = shift;
my $self = {};
bless $self,$class;
if (length($key)>64) {
$key = SHA1->Init()->Update($key)->Final();
}
$key .= chr(0x00)x(64-length($key));
my @ikey = map($_^=0x36,unpack("C*",$key));
($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey));
$self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey));
return $self;
}
sub Update {
my $self = shift;
$self->{hash}->Update(@_);
return $self;
}
sub Final {
my $self = shift;
my $ihash = $self->{hash}->Final();
return SHA1->Init()->Update($self->{okey},$ihash)->Final();
}
sub Selftest {
my $hmac;
$hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final();
die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24');
}
}
1;

View file

@ -23,6 +23,7 @@ local $fips_canister_path = "";
my $fips_premain_dso_exe_path = "";
my $fips_premain_c_path = "";
my $fips_sha1_exe_path = "";
my $fips_sha1_exe_build = 1;
local $fipscanisterbuild = 0;
@ -248,6 +249,10 @@ elsif (($platform eq "netware-clib") || ($platform eq "netware-libc") ||
$BSDSOCK=1 if ($platform eq "netware-libc-bsdsock") || ($platform eq "netware-clib-bsdsock");
require 'netware.pl';
}
elsif ($platform eq "c64xplus")
{
require "TI_CGTOOLS.pl";
}
else
{
require "unix.pl";
@ -500,8 +505,16 @@ if ($fips)
{
if ($fips_sha1_exe_path eq "")
{
$fips_sha1_exe_path =
"\$(BIN_D)${o}fips_standalone_sha1$exep";
$fips_sha1_exe_path = $ENV{"FIPS_SHA1_PATH"};
if (defined $fips_sha1_exe_path)
{
$fips_sha1_exe_build = 0;
}
else
{
$fips_sha1_exe_path =
"\$(BIN_D)${o}fips_standalone_sha1$exep";
}
}
}
else
@ -545,7 +558,7 @@ if ($fips)
if ($fipscanisteronly)
{
$build_targets = "\$(O_FIPSCANISTER) \$(T_EXE)";
$build_targets = "\$(O_FIPSCANISTER)";
$libs_dep = "";
}
@ -567,9 +580,14 @@ if ($fipscanisteronly)
\$(CP) \"fips${o}fips_premain.c.sha1\" \"\$(INSTALLTOP)${o}lib\"
\$(CP) \"\$(INCO_D)${o}fips.h\" \"\$(INSTALLTOP)${o}include${o}openssl\"
\$(CP) \"\$(INCO_D)${o}fips_rand.h\" \"\$(INSTALLTOP)${o}include${o}openssl\"
\$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\"
\$(CP) \"util${o}fipslink.pl\" \"\$(INSTALLTOP)${o}bin\"
EOF
if ($fips_sha1_exe_build)
{
$extra_install .= <<"EOF";
\$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\"
EOF
}
}
elsif ($shlib)
{
@ -716,7 +734,7 @@ LIBS_DEP=$libs_dep
EOF
$rules=<<"EOF";
all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers \$(FIPS_SHA1_EXE) $build_targets
all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers $build_targets
banner:
$banner
@ -744,7 +762,11 @@ headers: \$(HEADER) \$(EXHEADER)
lib: \$(LIBS_DEP) \$(E_SHLIB)
exe: \$(T_EXE) \$(BIN_D)$o\$(E_EXE)$exep
exe: \$(BIN_D)$o\$(E_EXE)$exep
build_tests: \$(T_EXE)
build_algvs: \$(T_SRC) \$(BIN_D)${o}fips_algvs$exep
install: all
\$(MKDIR) \"\$(INSTALLTOP)\"
@ -846,6 +868,9 @@ if ($fips)
$rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj",
"fips${o}fips_premain.c",
"-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)");
$rules.=&cc_compile_target("\$(OBJ_D)${o}fips_algvs$obj",
"test${o}fips_algvs.c",
"\$(SHLIB_CFLAGS)");
}
foreach (values %lib_nam)
@ -878,6 +903,7 @@ EOF
}
$defs.=&do_defs("T_EXE",$test,"\$(TEST_D)",$exep);
$defs.=&do_defs("T_SRC",$test,"\$(TMP_D)",".c");
foreach (split(/\s+/,$test))
{
my $t_libs;
@ -899,8 +925,11 @@ foreach (split(/\s+/,$test))
$tt="\$(OBJ_D)${o}$t${obj}";
$rules.=&do_link_rule("\$(TEST_D)$o$t$exep",$tt,"\$(LIBS_DEP)","$t_libs \$(EX_LIBS)", $ltype);
$rules.=&do_copy_rule("\$(TMP_D)",$_,".c");
}
$rules.=&do_link_rule("\$(TEST_D)${o}fips_algvs$exep","\$(OBJ_D)${o}fips_algvs$obj","\$(LIBS_DEP)","\$(O_FIPSCANISTER) \$(EX_LIBS)", 2) if $fips;
$defs.=&do_defs("E_SHLIB",$engines . $otherlibs,"\$(ENG_D)",$shlibp);
foreach (split(/\s+/,$engines))
@ -955,20 +984,20 @@ if ($fips)
"\$(OBJ_D)${o}fips_start$obj",
"\$(FIPSOBJ)",
"\$(OBJ_D)${o}fips_end$obj",
"\$(FIPS_SHA1_EXE)", "");
"");
# FIXME
$rules.=&do_link_rule("\$(FIPS_SHA1_EXE)",
"\$(OBJ_D)${o}fips_standalone_sha1$obj \$(OBJ_D)${o}sha1dgst$obj $sha1_asm_obj",
"","\$(EX_LIBS)", 1);
"","\$(EX_LIBS)", 1) if $fips_sha1_exe_build;
}
else
{
$rules.=&do_link_rule("\$(FIPS_SHA1_EXE)",
"\$(OBJ_D)${o}fips_standalone_sha1$obj \$(O_FIPSCANISTER)",
"","", 1);
"","", 1) if $fips_sha1_exe_build;
}
$rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1);
$rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1) unless defined $ENV{"FIPS_SIG"};
}
@ -1192,6 +1221,10 @@ sub do_compile_rule
{
$ret.=&Sasm_compile_target("$to${o}$n$obj",$s,$n);
}
elsif (-f ($s="${d}${o}asm${o}${n}.asm"))
{
$ret.=&cc_compile_target("$to${o}$n$obj","$s",$ex);
}
else { die "no rule for $_"; }
}
return($ret);

169
util/msincore Executable file
View file

@ -0,0 +1,169 @@
#!/usr/bin/env perl
#
# Copyright (c) 2012 The OpenSSL Project.
#
# The script embeds fingerprint into Microsoft PE-COFF executable object.
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
unshift(@INC,$dir);
require "hmac_sha1.pl";
######################################################################
#
# PE-COFF segment table parser by <appro@openssl.org>.
#
{ package PECOFF;
use FileHandle;
sub dup { my %copy=map {$_} @_; return \%copy; }
sub Load {
my $class = shift;
my $self = {};
my $FD = FileHandle->new(); # autoclose
my $file = shift;
bless $self,$class;
sysopen($FD,$file,0) or die "$!";
binmode($FD);
#################################################
# read IMAGE_DOS_HEADER
#
read($FD,my $mz,64) or die "$!";
my @dos_header=unpack("a2C58V",$mz);
$!=42; # signal fipsld to revert to two-step link
die "$file is not PE-COFF image" if (@dos_header[0] ne "MZ");
my $e_lfanew=pop(@dos_header);
seek($FD,$e_lfanew,0) or die "$!";
read($FD,my $magic,4) or die "$!";
$!=42; # signal fipsld to revert to two-step link
die "$file is not PE-COFF image" if (unpack("V",$magic)!=0x4550);
#################################################
# read and parse COFF header...
#
read($FD,my $coff,20) or die "$!";
my %coff_header;
@coff_header{machine,nsects,date,syms_off,nsyms,opt,flags}=
unpack("v2V3v2",$coff);
my $strings;
my $symsize;
#################################################
# load strings table
#
if ($coff_header{syms_off}) {
seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!";
read($FD,$strings,4) or die "$!";
$symsize = unpack("V",$strings);
read($FD,$strings,$symsize,4) or die "$!";
}
#################################################
# read sections
#
my $i;
# seek to section headers
seek($FD,$e_lfanew+24+@coff_header{opt},0) or die "$!";
for ($i=0;$i<$coff_header{nsects};$i++) {
my %coff_shdr;
my $name;
read($FD,my $section,40) or die "$!";
@coff_shdr{sh_name,sh_vsize,sh_vaddr,
sh_rawsize,sh_offset,sh_relocs,sh_lines,
sh_nrelocls,sh_nlines,sh_flags} =
unpack("a8V6v2V",$section);
$name = $coff_shdr{sh_name};
# see if sh_name is an offset in $strings
my ($hi,$lo) = unpack("V2",$name);
if ($hi==0 && $lo<$symsize) {
$name = substr($strings,$lo,64);
}
$name = (split(chr(0),$name))[0];
$coff_shdr{sh_name} = $name;
$self->{sections}{$name} = dup(%coff_shdr);
}
return $self;
}
sub Lookup {
my $self = shift;
my $name = shift;
return $self->{sections}{$name};
}
}
######################################################################
#
# main()
#
my $legacy_mode;
if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) {
print STDERR "usage: $0 [-dso|-exe] pe-coff-binary\n";
exit(1);
}
$exe = PECOFF->Load(@ARGV[$#ARGV]);
sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!"; # 2 is read/write
binmode(FD);
sub FIPS_incore_fingerprint {
my $ctx = HMAC->Init("etaonrishdlcupfm");
my ($beg,$end);
my $sect;
$sect = $exe->Lookup("fipstx") or die "no fipstx section";
seek(FD,$sect->{sh_offset},0) or die "$!";
read(FD,$blob,$sect->{sh_vsize}) or die "$!";
($beg = index($blob,"SPIFxet_ts_tXtra")) >= 0
or die "no FIPS_text_startX";
($end = rindex($blob,"SPIFxet_ne_t][Xd")) >= 0
or die "no FIPS_text_endX";
$ctx->Update(substr($blob,$beg,$end-$beg));
$sect = $exe->Lookup("fipsro") or die "no fipsro section";
seek(FD,$sect->{sh_offset},0) or die "$!";
read(FD,$blob,$sect->{sh_vsize}) or die "$!";
($beg = index($blob,"SPIFdor__atarats",40)) >= 0
or die "no FIPS_rodata_start";
($end = rindex($blob,"SPIFdor__ata[dne")) >= 0
or die "no FIPS_rodata_end";
$ctx->Update(substr($blob,$beg,$end-$beg));
return $ctx->Final();
}
$fingerprint = FIPS_incore_fingerprint();
if ($legacy_mode) {
print unpack("H*",$fingerprint);
} else {
my $sect = $exe->Lookup("fipsro");
seek(FD,$sect->{sh_offset},0) or die "$!";
print FD unpack("H*",$fingerprint) or die "$!";
}
close (FD);

274
util/pl/TI_CGTOOLS.pl Normal file
View file

@ -0,0 +1,274 @@
#!/usr/local/bin/perl
#
# TI_CGTOOLS.pl, Texas Instruments CGTOOLS under Unix or MSYS.
#
$ssl= "ssl";
$crypto="crypto";
if ($fips && !$shlib)
{
$crypto="fips";
$crypto_compat = "cryptocompat.lib";
}
else
{
$crypto="crypto";
}
if ($fipscanisterbuild)
{
$fips_canister_path = "\$(LIB_D)/fipscanister.obj";
}
$o='/';
$cp='cp';
$cp2='$(PERL) util/copy.pl -stripcr';
$mkdir='$(PERL) util/mkdir-p.pl';
$rm='rm -f';
$zlib_lib="zlib1.lib";
# Santize -L options for ms link
$l_flags =~ s/-L("\[^"]+")/\/libpath:$1/g;
$l_flags =~ s/-L(\S+)/\/libpath:$1/g;
# C compiler stuff
$cc='cl6x';
$base_cflags= " $mf_cflag";
my $f;
$opt_cflags='';
$dbg_cflags=$f.' -g -DDEBUG -D_DEBUG';
$lflags='';
*::cc_compile_target = sub {
my ($target,$source,$ex_flags)=@_;
my $ret;
$ex_flags.=" -DMK1MF_BUILD" if ($source =~/cversion/);
$ret ="$target: \$(SRC_D)$o$source\n\t";
if ($fipscanisterbuild && $source=~/\.asm$/) {
$ret.="\$(PERL) util${o}fipsas.pl . \$< norunasm \$(CFLAG)\n\t";
}
$ret.="\$(CC) --obj_directory=\$(OBJ_D) $ex_flags -c \$(SRC_D)$o$source\n";
$target =~ s/.*${o}([^${o}]+)/$1/;
$source =~ s/.*${o}([^${o}\.]+)\..*/$1${obj}/;
$ret.="\tmv \$(OBJ_D)${o}$source \$(OBJ_D)${o}$target\n" if ($target ne $source);
$ret.="\n";
return($ret);
};
*::perlasm_compile_target = sub {
my ($target,$source,$bname)=@_;
my $ret;
$bname =~ s/(.*)\.[^\.]$/$1/;
$ret=<<___;
\$(TMP_D)$o$bname.asm: $source
\$(PERL) $source \$\@
___
$ret .= "\t\$(PERL) util${o}fipsas.pl . \$@ norunasm \$(CFLAG)\n" if $fipscanisterbuild;
$ret.=<<___;
$target: \$(TMP_D)$o$bname.asm
\$(ASM) --obj_directory=\$(OBJ_D) \$(TMP_D)$o$bname.asm
___
};
$mlflags='';
$out_def ="c6x";
$tmp_def ="$out_def/tmp";
$inc_def="$out_def/inc";
if ($debug)
{
$cflags=$dbg_cflags.$base_cflags;
}
else
{
$cflags=$opt_cflags.$base_cflags;
}
$obj='.obj';
$asm_suffix='.asm';
$ofile="";
# EXE linking stuff
$link='$(CC) -z';
$efile="-o ";
$exep='.out';
$ex_libs='';
# static library stuff
$mklib='ar6x';
$ranlib='';
$plib="";
$libp=".lib";
$shlibp=($shlib)?".dll":".lib";
$lfile='-o ';
$shlib_ex_obj="";
$asm='$(CC) $(CFLAG) -c';
$bn_asm_obj='';
$bn_asm_src='';
$des_enc_obj='';
$des_enc_src='';
$bf_enc_obj='';
$bf_enc_src='';
if (!$no_asm)
{
import_asm($mf_bn_asm, "bn", \$bn_asm_obj, \$bn_asm_src);
import_asm($mf_aes_asm, "aes", \$aes_asm_obj, \$aes_asm_src);
import_asm($mf_des_asm, "des", \$des_enc_obj, \$des_enc_src);
import_asm($mf_bf_asm, "bf", \$bf_enc_obj, \$bf_enc_src);
import_asm($mf_cast_asm, "cast", \$cast_enc_obj, \$cast_enc_src);
import_asm($mf_rc4_asm, "rc4", \$rc4_enc_obj, \$rc4_enc_src);
import_asm($mf_rc5_asm, "rc5", \$rc5_enc_obj, \$rc5_enc_src);
import_asm($mf_md5_asm, "md5", \$md5_asm_obj, \$md5_asm_src);
import_asm($mf_sha_asm, "sha", \$sha1_asm_obj, \$sha1_asm_src);
import_asm($mf_rmd_asm, "ripemd", \$rmd160_asm_obj, \$rmd160_asm_src);
import_asm($mf_wp_asm, "whrlpool", \$whirlpool_asm_obj, \$whirlpool_asm_src);
import_asm($mf_modes_asm, "modes", \$modes_asm_obj, \$modes_asm_src);
import_asm($mf_cpuid_asm, "", \$cpuid_asm_obj, \$cpuid_asm_src);
$perl_asm = 1;
}
sub do_lib_rule
{
my($objs,$target,$name,$shlib,$ign,$base_addr) = @_;
local($ret);
$taget =~ s/\//$o/g if $o ne '/';
my $base_arg;
if ($base_addr ne "")
{
$base_arg= " /base:$base_addr";
}
else
{
$base_arg = "";
}
if ($name ne "")
{
$name =~ tr/a-z/A-Z/;
$name = "/def:ms/${name}.def";
}
# $target="\$(LIB_D)$o$target";
# $ret.="$target: $objs\n";
if (!$shlib)
{
# $ret.="\t\$(RM) \$(O_$Name)\n";
$ret.="$target: $objs\n";
$ret.="\t\$(MKLIB) $lfile$target $objs\n";
}
else
{
local($ex)=($target =~ /O_CRYPTO/)?'':' $(L_CRYPTO)';
$ex.=" $zlib_lib" if $zlib_opt == 1 && $target =~ /O_CRYPTO/;
if ($fips && $target =~ /O_CRYPTO/)
{
$ret.="$target: $objs \$(PREMAIN_DSO_EXE)";
$ret.="\n\tFIPS_LINK=\"\$(LINK)\" \\\n";
$ret.="\tFIPS_CC=\$(CC)\\\n";
$ret.="\tFIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\\\n";
$ret.="\tPREMAIN_DSO_EXE=\$(PREMAIN_DSO_EXE)\\\n";
$ret.="\tFIPS_SHA1_EXE=\$(FIPS_SHA1_EXE)\\\n";
$ret.="\tFIPS_TARGET=$target\\\n";
$ret.="\tFIPSLIB_D=\$(FIPSLIB_D)\\\n";
$ret.="\t\$(FIPSLINK) \$(MLFLAGS) /map $base_arg $efile$target ";
$ret.="$name \$(SHLIB_EX_OBJ) $objs \$(EX_LIBS) ";
$ret.="\$(OBJ_D)${o}fips_premain.obj $ex\n";
}
else
{
$ret.="$target: $objs";
$ret.="\n\t\$(LINK) \$(MLFLAGS) $efile$target $name \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n";
}
$ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;2\n\n";
}
$ret.="\n";
return($ret);
}
sub do_link_rule
{
my($target,$files,$dep_libs,$libs,$standalone)=@_;
local($ret,$_);
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($targer);
$ret.="$target: $files $dep_libs\n";
if ($standalone == 1)
{
$ret.=" \$(LINK) \$(LFLAGS) $efile$target ";
$ret.= "\$(EX_LIBS) " if ($files =~ /O_FIPSCANISTER/ && !$fipscanisterbuild);
$ret.="$files $libs\n";
}
elsif ($standalone == 2)
{
$ret.="\t\$(LINK) \$(LFLAGS) $efile$target $files \$(O_FIPSCANISTER) $out_def/application.cmd\n";
$ret.="\t$out_def/incore6x $target\n\n";
}
else
{
$ret.="\t\$(LINK) \$(LFLAGS) $efile$target ";
$ret.="\t\$(APP_EX_OBJ) $files $libs\n";
}
return($ret);
}
sub do_rlink_rule
{
local($target,$rl_start, $rl_mid, $rl_end,$dep_libs,$libs)=@_;
local($ret,$_);
my $files = "$rl_start $rl_mid $rl_end";
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
$ret.="\t\$(LINK) -r $lfile$target $files $out_def/fipscanister.cmd\n";
$ret.="\t\$(PERL) $out_def${o}fips_standalone_sha1 $target > ${target}.sha1\n";
$ret.="\t\$(PERL) util${o}copy.pl -stripcr fips${o}fips_premain.c \$(LIB_D)${o}fips_premain.c\n";
$ret.="\t\$(CP) fips${o}fips_premain.c.sha1 \$(LIB_D)${o}fips_premain.c.sha1\n";
$ret.="\n";
return($ret);
}
sub import_asm
{
my ($mf_var, $asm_name, $oref, $sref) = @_;
my $asm_dir;
if ($asm_name eq "")
{
$asm_dir = "crypto$o";
}
else
{
$asm_dir = "crypto$o$asm_name$oasm$o";
}
$$oref = "";
$$sref = "";
$mf_var =~ s/\.o//g;
foreach (split(/ /, $mf_var))
{
$$sref .= $asm_dir . $_ . ".asm ";
}
foreach (split(/ /, $mf_var))
{
$$oref .= "\$(TMP_D)\\" . $_ . ".obj ";
}
$$oref =~ s/ $//;
$$sref =~ s/ $//;
}
1;

View file

@ -123,7 +123,7 @@ elsif ($FLAVOR =~ /CE/)
}
$cc='$(CC)';
$base_cflags=' /W3 /WX /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT';
$base_cflags=' /W3 /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT';
$base_cflags.=" $wcecdefs";
$base_cflags.=' -I$(WCECOMPAT)/include' if (defined($ENV{'WCECOMPAT'}));
$base_cflags.=' -I$(PORTSDK_LIBPATH)/../../include' if (defined($ENV{'PORTSDK_LIBPATH'}));
@ -174,12 +174,12 @@ $rsc="rc";
$efile="/out:";
$exep='.exe';
if ($no_sock) { $ex_libs=''; }
elsif ($FLAVOR =~ /CE/) { $ex_libs='winsock.lib'; }
elsif ($FLAVOR =~ /CE/) { $ex_libs='ws2.lib'; }
else { $ex_libs='ws2_32.lib'; }
if ($FLAVOR =~ /CE/)
{
$ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib' if (defined($ENV{'WCECOMPAT'}));
$ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib crypt32.lib coredll.lib corelibc.lib' if (defined($ENV{'WCECOMPAT'}));
$ex_libs.=' $(PORTSDK_LIBPATH)/portlib.lib' if (defined($ENV{'PORTSDK_LIBPATH'}));
$ex_libs.=' /nodefaultlib:oldnames.lib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86");
}
@ -389,8 +389,9 @@ sub do_rlink_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($targer);
$ret.="$target: $files $dep_libs \$(FIPS_SHA1_EXE)\n";
$ret.="\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n";
$ret.="$target: $files $dep_libs";
$ret.=" \$(FIPS_SHA1_EXE)" unless defined $ENV{"FIPS_SHA1_PATH"};
$ret.="\n\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n";
$ret.="\t\$(PERL) ms\\segrenam.pl \$\$b $rl_mid\n";
$ret.="\t\$(PERL) ms\\segrenam.pl \$\$c $rl_end\n";
$ret.="\t\$(MKLIB) $lfile$target @<<\n\t$files\n<<\n";