416b19c6fc
following would happen on Solaris: m4 -B 8192 asm/des_enc.m4 > asm/des_enc-sparc.S gcc -DOPENSSL_SYSNAME_ULTRASPARC -DOPENSSL_NO_STATIC_ENGINE -fPIC -DOPENSSL_PIC -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -DOPENSSL_NO_KRB5 -m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W -DMD5_ASM -c -o asm/des_enc-sparc.o asm/des_enc-sparc.S asm/des_enc-sparc.S:2007: unterminated character constant asm/des_enc-sparc.S:2008: unterminated character constant
1974 lines
41 KiB
Text
1974 lines
41 KiB
Text
! des_enc.m4
|
|
! des_enc.S (generated from des_enc.m4)
|
|
!
|
|
! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
|
|
!
|
|
! Version 1.0. 32-bit version.
|
|
!
|
|
! June 8, 2000.
|
|
!
|
|
! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
|
|
! by Andy Polyakov.
|
|
!
|
|
! January 1, 2003.
|
|
!
|
|
! Assembler version: Copyright Svend Olaf Mikkelsen.
|
|
!
|
|
! Original C code: Copyright Eric A. Young.
|
|
!
|
|
! This code can be freely used by LibDES/SSLeay/OpenSSL users.
|
|
!
|
|
! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
|
|
!
|
|
! This version can be redistributed.
|
|
!
|
|
! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
|
|
!
|
|
! Global registers 1 to 5 are used. This is the same as done by the
|
|
! cc compiler. The UltraSPARC load/store little endian feature is used.
|
|
!
|
|
! Instruction grouping often refers to one CPU cycle.
|
|
!
|
|
! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
|
|
!
|
|
! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
|
|
!
|
|
! Performance improvement according to './apps/openssl speed des'
|
|
!
|
|
! 32-bit build:
|
|
! 23% faster than cc-5.2 -xarch=v8plus -xO5
|
|
! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
|
|
! 64-bit build:
|
|
! 50% faster than cc-5.2 -xarch=v9 -xO5
|
|
! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
|
|
!
|
|
|
|
.ident "des_enc.m4 2.0"
|
|
|
|
#if defined(__SUNPRO_C) && defined(__sparcv9)
|
|
# define ABI64 /* They've said -xarch=v9 at command line */
|
|
#elif defined(__GNUC__) && defined(__arch64__)
|
|
# define ABI64 /* They've said -m64 at command line */
|
|
#endif
|
|
|
|
#ifdef ABI64
|
|
.register %g2,#scratch
|
|
.register %g3,#scratch
|
|
# define FRAME -192
|
|
# define BIAS 2047
|
|
# define LDPTR ldx
|
|
# define STPTR stx
|
|
# define ARG0 128
|
|
# define ARGSZ 8
|
|
# ifndef OPENSSL_SYSNAME_ULTRASPARC
|
|
# define OPENSSL_SYSNAME_ULTRASPARC
|
|
# endif
|
|
#else
|
|
# define FRAME -96
|
|
# define BIAS 0
|
|
# define LDPTR ld
|
|
# define STPTR st
|
|
# define ARG0 68
|
|
# define ARGSZ 4
|
|
#endif
|
|
|
|
#define LOOPS 7
|
|
|
|
#define global0 %g0
|
|
#define global1 %g1
|
|
#define global2 %g2
|
|
#define global3 %g3
|
|
#define global4 %g4
|
|
#define global5 %g5
|
|
|
|
#define local0 %l0
|
|
#define local1 %l1
|
|
#define local2 %l2
|
|
#define local3 %l3
|
|
#define local4 %l4
|
|
#define local5 %l5
|
|
#define local7 %l6
|
|
#define local6 %l7
|
|
|
|
#define in0 %i0
|
|
#define in1 %i1
|
|
#define in2 %i2
|
|
#define in3 %i3
|
|
#define in4 %i4
|
|
#define in5 %i5
|
|
#define in6 %i6
|
|
#define in7 %i7
|
|
|
|
#define out0 %o0
|
|
#define out1 %o1
|
|
#define out2 %o2
|
|
#define out3 %o3
|
|
#define out4 %o4
|
|
#define out5 %o5
|
|
#define out6 %o6
|
|
#define out7 %o7
|
|
|
|
#define stub stb
|
|
|
|
changequote({,})
|
|
|
|
|
|
! Macro definitions:
|
|
|
|
|
|
! {ip_macro}
|
|
!
|
|
! The logic used in initial and final permutations is the same as in
|
|
! the C code. The permutations are done with a clever shift, xor, and
|
|
! technique.
|
|
!
|
|
! The macro also loads address sbox 1 to 5 to global 1 to 5, address
|
|
! sbox 6 to local6, and addres sbox 8 to out3.
|
|
!
|
|
! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
|
|
!
|
|
! Loads key first round from address in parameter 5 to out0, out1.
|
|
!
|
|
! After the the original LibDES initial permutation, the resulting left
|
|
! is in the variable initially used for right and vice versa. The macro
|
|
! implements the possibility to keep the halfs in the original registers.
|
|
!
|
|
! parameter 1 left
|
|
! parameter 2 right
|
|
! parameter 3 result left (modify in first round)
|
|
! parameter 4 result right (use in first round)
|
|
! parameter 5 key address
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for move in1 to in3
|
|
! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
|
! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
|
|
|
define(ip_macro, {
|
|
|
|
! {ip_macro}
|
|
! $1 $2 $4 $3 $5 $6 $7 $8 $9
|
|
|
|
ld [out2+256], local1
|
|
srl $2, 4, local4
|
|
|
|
xor local4, $1, local4
|
|
ifelse($7,1,{mov in1, in3},{nop})
|
|
|
|
ld [out2+260], local2
|
|
and local4, local1, local4
|
|
ifelse($8,1,{mov in3, in4},{})
|
|
ifelse($8,2,{mov in4, in3},{})
|
|
|
|
ld [out2+280], out4 ! loop counter
|
|
sll local4, 4, local1
|
|
xor $1, local4, $1
|
|
|
|
ld [out2+264], local3
|
|
srl $1, 16, local4
|
|
xor $2, local1, $2
|
|
|
|
ifelse($9,1,{LDPTR KS3, in4},{})
|
|
xor local4, $2, local4
|
|
nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
|
|
|
|
ifelse($9,1,{LDPTR KS2, in3},{})
|
|
and local4, local2, local4
|
|
nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
|
|
|
|
sll local4, 16, local1
|
|
xor $2, local4, $2
|
|
|
|
srl $2, 2, local4
|
|
xor $1, local1, $1
|
|
|
|
sethi %hi(16711680), local5
|
|
xor local4, $1, local4
|
|
|
|
and local4, local3, local4
|
|
or local5, 255, local5
|
|
|
|
sll local4, 2, local2
|
|
xor $1, local4, $1
|
|
|
|
srl $1, 8, local4
|
|
xor $2, local2, $2
|
|
|
|
xor local4, $2, local4
|
|
add global1, 768, global4
|
|
|
|
and local4, local5, local4
|
|
add global1, 1024, global5
|
|
|
|
ld [out2+272], local7
|
|
sll local4, 8, local1
|
|
xor $2, local4, $2
|
|
|
|
srl $2, 1, local4
|
|
xor $1, local1, $1
|
|
|
|
ld [$5], out0 ! key 7531
|
|
xor local4, $1, local4
|
|
add global1, 256, global2
|
|
|
|
ld [$5+4], out1 ! key 8642
|
|
and local4, local7, local4
|
|
add global1, 512, global3
|
|
|
|
sll local4, 1, local1
|
|
xor $1, local4, $1
|
|
|
|
sll $1, 3, local3
|
|
xor $2, local1, $2
|
|
|
|
sll $2, 3, local2
|
|
add global1, 1280, local6 ! address sbox 8
|
|
|
|
srl $1, 29, local4
|
|
add global1, 1792, out3 ! address sbox 8
|
|
|
|
srl $2, 29, local1
|
|
or local4, local3, $4
|
|
|
|
or local2, local1, $3
|
|
|
|
ifelse($6, 1, {
|
|
|
|
ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
|
or local2, local1, $3
|
|
xor $4, out0, local1
|
|
|
|
call .des_enc.1
|
|
and local1, 252, local1
|
|
|
|
},{})
|
|
|
|
ifelse($6, 2, {
|
|
|
|
ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
|
or local2, local1, $3
|
|
xor $4, out0, local1
|
|
|
|
call .des_dec.1
|
|
and local1, 252, local1
|
|
|
|
},{})
|
|
})
|
|
|
|
|
|
! {rounds_macro}
|
|
!
|
|
! The logic used in the DES rounds is the same as in the C code,
|
|
! except that calculations for sbox 1 and sbox 5 begin before
|
|
! the previous round is finished.
|
|
!
|
|
! In each round one half (work) is modified based on key and the
|
|
! other half (use).
|
|
!
|
|
! In this version we do two rounds in a loop repeated 7 times
|
|
! and two rounds seperately.
|
|
!
|
|
! One half has the bits for the sboxes in the following positions:
|
|
!
|
|
! 777777xx555555xx333333xx111111xx
|
|
!
|
|
! 88xx666666xx444444xx222222xx8888
|
|
!
|
|
! The bits for each sbox are xor-ed with the key bits for that box.
|
|
! The above xx bits are cleared, and the result used for lookup in
|
|
! the sbox table. Each sbox entry contains the 4 output bits permuted
|
|
! into 32 bits according to the P permutation.
|
|
!
|
|
! In the description of DES, left and right are switched after
|
|
! each round, except after last round. In this code the original
|
|
! left and right are kept in the same register in all rounds, meaning
|
|
! that after the 16 rounds the result for right is in the register
|
|
! originally used for left.
|
|
!
|
|
! parameter 1 first work (left in first round)
|
|
! parameter 2 first use (right in first round)
|
|
! parameter 3 enc/dec 1/-1
|
|
! parameter 4 loop label
|
|
! parameter 5 key address register
|
|
! parameter 6 optional address for key next encryption/decryption
|
|
! parameter 7 not empty for include retl
|
|
!
|
|
! also compares in2 to 8
|
|
|
|
define(rounds_macro, {
|
|
|
|
! {rounds_macro}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
xor $2, out0, local1
|
|
|
|
ld [out2+284], local5 ! 0x0000FC00
|
|
ba $4
|
|
and local1, 252, local1
|
|
|
|
.align 32
|
|
|
|
$4:
|
|
! local6 is address sbox 6
|
|
! out3 is address sbox 8
|
|
! out4 is loop counter
|
|
|
|
ld [global1+local1], local1
|
|
xor $2, out1, out1 ! 8642
|
|
xor $2, out0, out0 ! 7531
|
|
fmovs %f0, %f0 ! fxor used for alignment
|
|
|
|
srl out1, 4, local0 ! rotate 4 right
|
|
and out0, local5, local3 ! 3
|
|
fmovs %f0, %f0
|
|
|
|
ld [$5+$3*8], local7 ! key 7531 next round
|
|
srl local3, 8, local3 ! 3
|
|
and local0, 252, local2 ! 2
|
|
fmovs %f0, %f0
|
|
|
|
ld [global3+local3],local3 ! 3
|
|
sll out1, 28, out1 ! rotate
|
|
xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
|
|
|
ld [global2+local2], local2 ! 2
|
|
srl out0, 24, local1 ! 7
|
|
or out1, local0, out1 ! rotate
|
|
|
|
ldub [out2+local1], local1 ! 7 (and 0xFC)
|
|
srl out1, 24, local0 ! 8
|
|
and out1, local5, local4 ! 4
|
|
|
|
ldub [out2+local0], local0 ! 8 (and 0xFC)
|
|
srl local4, 8, local4 ! 4
|
|
xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
|
|
|
ld [global4+local4],local4 ! 4
|
|
srl out1, 16, local2 ! 6
|
|
xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
|
|
|
ld [out3+local0],local0 ! 8
|
|
and local2, 252, local2 ! 6
|
|
add global1, 1536, local5 ! address sbox 7
|
|
|
|
ld [local6+local2], local2 ! 6
|
|
srl out0, 16, local3 ! 5
|
|
xor $1, local4, $1 ! 4 finished
|
|
|
|
ld [local5+local1],local1 ! 7
|
|
and local3, 252, local3 ! 5
|
|
xor $1, local0, $1 ! 8 finished
|
|
|
|
ld [global5+local3],local3 ! 5
|
|
xor $1, local2, $1 ! 6 finished
|
|
subcc out4, 1, out4
|
|
|
|
ld [$5+$3*8+4], out0 ! key 8642 next round
|
|
xor $1, local7, local2 ! sbox 5 next round
|
|
xor $1, local1, $1 ! 7 finished
|
|
|
|
srl local2, 16, local2 ! sbox 5 next round
|
|
xor $1, local3, $1 ! 5 finished
|
|
|
|
ld [$5+$3*16+4], out1 ! key 8642 next round again
|
|
and local2, 252, local2 ! sbox5 next round
|
|
! next round
|
|
xor $1, local7, local7 ! 7531
|
|
|
|
ld [global5+local2], local2 ! 5
|
|
srl local7, 24, local3 ! 7
|
|
xor $1, out0, out0 ! 8642
|
|
|
|
ldub [out2+local3], local3 ! 7 (and 0xFC)
|
|
srl out0, 4, local0 ! rotate 4 right
|
|
and local7, 252, local1 ! 1
|
|
|
|
sll out0, 28, out0 ! rotate
|
|
xor $2, local2, $2 ! 5 finished local2 used
|
|
|
|
srl local0, 8, local4 ! 4
|
|
and local0, 252, local2 ! 2
|
|
ld [local5+local3], local3 ! 7
|
|
|
|
srl local0, 16, local5 ! 6
|
|
or out0, local0, out0 ! rotate
|
|
ld [global2+local2], local2 ! 2
|
|
|
|
srl out0, 24, local0
|
|
ld [$5+$3*16], out0 ! key 7531 next round
|
|
and local4, 252, local4 ! 4
|
|
|
|
and local5, 252, local5 ! 6
|
|
ld [global4+local4], local4 ! 4
|
|
xor $2, local3, $2 ! 7 finished local3 used
|
|
|
|
and local0, 252, local0 ! 8
|
|
ld [local6+local5], local5 ! 6
|
|
xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
|
|
|
srl local7, 8, local2 ! 3 start
|
|
ld [out3+local0], local0 ! 8
|
|
xor $2, local4, $2 ! 4 finished
|
|
|
|
and local2, 252, local2 ! 3
|
|
ld [global1+local1], local1 ! 1
|
|
xor $2, local5, $2 ! 6 finished local5 used
|
|
|
|
ld [global3+local2], local2 ! 3
|
|
xor $2, local0, $2 ! 8 finished
|
|
add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
|
|
|
|
ld [out2+284], local5 ! 0x0000FC00
|
|
xor $2, out0, local4 ! sbox 1 next round
|
|
xor $2, local1, $2 ! 1 finished
|
|
|
|
xor $2, local2, $2 ! 3 finished
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bne,pt %icc, $4
|
|
#else
|
|
bne $4
|
|
#endif
|
|
and local4, 252, local1 ! sbox 1 next round
|
|
|
|
! two rounds more:
|
|
|
|
ld [global1+local1], local1
|
|
xor $2, out1, out1
|
|
xor $2, out0, out0
|
|
|
|
srl out1, 4, local0 ! rotate
|
|
and out0, local5, local3
|
|
|
|
ld [$5+$3*8], local7 ! key 7531
|
|
srl local3, 8, local3
|
|
and local0, 252, local2
|
|
|
|
ld [global3+local3],local3
|
|
sll out1, 28, out1 ! rotate
|
|
xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
|
|
|
ld [global2+local2], local2
|
|
srl out0, 24, local1
|
|
or out1, local0, out1 ! rotate
|
|
|
|
ldub [out2+local1], local1
|
|
srl out1, 24, local0
|
|
and out1, local5, local4
|
|
|
|
ldub [out2+local0], local0
|
|
srl local4, 8, local4
|
|
xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
|
|
|
ld [global4+local4],local4
|
|
srl out1, 16, local2
|
|
xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
|
|
|
ld [out3+local0],local0
|
|
and local2, 252, local2
|
|
add global1, 1536, local5 ! address sbox 7
|
|
|
|
ld [local6+local2], local2
|
|
srl out0, 16, local3
|
|
xor $1, local4, $1 ! 4 finished
|
|
|
|
ld [local5+local1],local1
|
|
and local3, 252, local3
|
|
xor $1, local0, $1
|
|
|
|
ld [global5+local3],local3
|
|
xor $1, local2, $1 ! 6 finished
|
|
cmp in2, 8
|
|
|
|
ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
|
|
xor $1, local7, local2 ! sbox 5 next round
|
|
xor $1, local1, $1 ! 7 finished
|
|
|
|
ld [$5+$3*8+4], out0
|
|
srl local2, 16, local2 ! sbox 5 next round
|
|
xor $1, local3, $1 ! 5 finished
|
|
|
|
and local2, 252, local2
|
|
! next round (two rounds more)
|
|
xor $1, local7, local7 ! 7531
|
|
|
|
ld [global5+local2], local2
|
|
srl local7, 24, local3
|
|
xor $1, out0, out0 ! 8642
|
|
|
|
ldub [out2+local3], local3
|
|
srl out0, 4, local0 ! rotate
|
|
and local7, 252, local1
|
|
|
|
sll out0, 28, out0 ! rotate
|
|
xor $2, local2, $2 ! 5 finished local2 used
|
|
|
|
srl local0, 8, local4
|
|
and local0, 252, local2
|
|
ld [local5+local3], local3
|
|
|
|
srl local0, 16, local5
|
|
or out0, local0, out0 ! rotate
|
|
ld [global2+local2], local2
|
|
|
|
srl out0, 24, local0
|
|
ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
|
|
and local4, 252, local4
|
|
|
|
and local5, 252, local5
|
|
ld [global4+local4], local4
|
|
xor $2, local3, $2 ! 7 finished local3 used
|
|
|
|
and local0, 252, local0
|
|
ld [local6+local5], local5
|
|
xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
|
|
|
srl local7, 8, local2 ! 3 start
|
|
ld [out3+local0], local0
|
|
xor $2, local4, $2
|
|
|
|
and local2, 252, local2
|
|
ld [global1+local1], local1
|
|
xor $2, local5, $2 ! 6 finished local5 used
|
|
|
|
ld [global3+local2], local2
|
|
srl $1, 3, local3
|
|
xor $2, local0, $2
|
|
|
|
ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
|
|
sll $1, 29, local4
|
|
xor $2, local1, $2
|
|
|
|
ifelse($7,{}, {}, {retl})
|
|
xor $2, local2, $2
|
|
})
|
|
|
|
|
|
! {fp_macro}
|
|
!
|
|
! parameter 1 right (original left)
|
|
! parameter 2 left (original right)
|
|
! parameter 3 1 for optional store to [in0]
|
|
! parameter 4 1 for load input/output address to local5/7
|
|
!
|
|
! The final permutation logic switches the halfes, meaning that
|
|
! left and right ends up the the registers originally used.
|
|
|
|
define(fp_macro, {
|
|
|
|
! {fp_macro}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
! initially undo the rotate 3 left done after initial permutation
|
|
! original left is received shifted 3 right and 29 left in local3/4
|
|
|
|
sll $2, 29, local1
|
|
or local3, local4, $1
|
|
|
|
srl $2, 3, $2
|
|
sethi %hi(0x55555555), local2
|
|
|
|
or $2, local1, $2
|
|
or local2, %lo(0x55555555), local2
|
|
|
|
srl $2, 1, local3
|
|
sethi %hi(0x00ff00ff), local1
|
|
xor local3, $1, local3
|
|
or local1, %lo(0x00ff00ff), local1
|
|
and local3, local2, local3
|
|
sethi %hi(0x33333333), local4
|
|
sll local3, 1, local2
|
|
|
|
xor $1, local3, $1
|
|
|
|
srl $1, 8, local3
|
|
xor $2, local2, $2
|
|
xor local3, $2, local3
|
|
or local4, %lo(0x33333333), local4
|
|
and local3, local1, local3
|
|
sethi %hi(0x0000ffff), local1
|
|
sll local3, 8, local2
|
|
|
|
xor $2, local3, $2
|
|
|
|
srl $2, 2, local3
|
|
xor $1, local2, $1
|
|
xor local3, $1, local3
|
|
or local1, %lo(0x0000ffff), local1
|
|
and local3, local4, local3
|
|
sethi %hi(0x0f0f0f0f), local4
|
|
sll local3, 2, local2
|
|
|
|
ifelse($4,1, {LDPTR INPUT, local5})
|
|
xor $1, local3, $1
|
|
|
|
ifelse($4,1, {LDPTR OUTPUT, local7})
|
|
srl $1, 16, local3
|
|
xor $2, local2, $2
|
|
xor local3, $2, local3
|
|
or local4, %lo(0x0f0f0f0f), local4
|
|
and local3, local1, local3
|
|
sll local3, 16, local2
|
|
|
|
xor $2, local3, local1
|
|
|
|
srl local1, 4, local3
|
|
xor $1, local2, $1
|
|
xor local3, $1, local3
|
|
and local3, local4, local3
|
|
sll local3, 4, local2
|
|
|
|
xor $1, local3, $1
|
|
|
|
! optional store:
|
|
|
|
ifelse($3,1, {st $1, [in0]})
|
|
|
|
xor local1, local2, $2
|
|
|
|
ifelse($3,1, {st $2, [in0+4]})
|
|
|
|
})
|
|
|
|
|
|
! {fp_ip_macro}
|
|
!
|
|
! Does initial permutation for next block mixed with
|
|
! final permutation for current block.
|
|
!
|
|
! parameter 1 original left
|
|
! parameter 2 original right
|
|
! parameter 3 left ip
|
|
! parameter 4 right ip
|
|
! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
|
! 2: mov in4 to in3
|
|
!
|
|
! also adds -8 to length in2 and loads loop counter to out4
|
|
|
|
define(fp_ip_macro, {
|
|
|
|
! {fp_ip_macro}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
define({temp1},{out4})
|
|
define({temp2},{local3})
|
|
|
|
define({ip1},{local1})
|
|
define({ip2},{local2})
|
|
define({ip4},{local4})
|
|
define({ip5},{local5})
|
|
|
|
! $1 in local3, local4
|
|
|
|
ld [out2+256], ip1
|
|
sll out5, 29, temp1
|
|
or local3, local4, $1
|
|
|
|
srl out5, 3, $2
|
|
ifelse($5,2,{mov in4, in3})
|
|
|
|
ld [out2+272], ip5
|
|
srl $4, 4, local0
|
|
or $2, temp1, $2
|
|
|
|
srl $2, 1, temp1
|
|
xor temp1, $1, temp1
|
|
|
|
and temp1, ip5, temp1
|
|
xor local0, $3, local0
|
|
|
|
sll temp1, 1, temp2
|
|
xor $1, temp1, $1
|
|
|
|
and local0, ip1, local0
|
|
add in2, -8, in2
|
|
|
|
sll local0, 4, local7
|
|
xor $3, local0, $3
|
|
|
|
ld [out2+268], ip4
|
|
srl $1, 8, temp1
|
|
xor $2, temp2, $2
|
|
ld [out2+260], ip2
|
|
srl $3, 16, local0
|
|
xor $4, local7, $4
|
|
xor temp1, $2, temp1
|
|
xor local0, $4, local0
|
|
and temp1, ip4, temp1
|
|
and local0, ip2, local0
|
|
sll temp1, 8, temp2
|
|
xor $2, temp1, $2
|
|
sll local0, 16, local7
|
|
xor $4, local0, $4
|
|
|
|
srl $2, 2, temp1
|
|
xor $1, temp2, $1
|
|
|
|
ld [out2+264], temp2 ! ip3
|
|
srl $4, 2, local0
|
|
xor $3, local7, $3
|
|
xor temp1, $1, temp1
|
|
xor local0, $3, local0
|
|
and temp1, temp2, temp1
|
|
and local0, temp2, local0
|
|
sll temp1, 2, temp2
|
|
xor $1, temp1, $1
|
|
sll local0, 2, local7
|
|
xor $3, local0, $3
|
|
|
|
srl $1, 16, temp1
|
|
xor $2, temp2, $2
|
|
srl $3, 8, local0
|
|
xor $4, local7, $4
|
|
xor temp1, $2, temp1
|
|
xor local0, $4, local0
|
|
and temp1, ip2, temp1
|
|
and local0, ip4, local0
|
|
sll temp1, 16, temp2
|
|
xor $2, temp1, local4
|
|
sll local0, 8, local7
|
|
xor $4, local0, $4
|
|
|
|
srl $4, 1, local0
|
|
xor $3, local7, $3
|
|
|
|
srl local4, 4, temp1
|
|
xor local0, $3, local0
|
|
|
|
xor $1, temp2, $1
|
|
and local0, ip5, local0
|
|
|
|
sll local0, 1, local7
|
|
xor temp1, $1, temp1
|
|
|
|
xor $3, local0, $3
|
|
xor $4, local7, $4
|
|
|
|
sll $3, 3, local5
|
|
and temp1, ip1, temp1
|
|
|
|
sll temp1, 4, temp2
|
|
xor $1, temp1, $1
|
|
|
|
ifelse($5,1,{LDPTR KS2, in4})
|
|
sll $4, 3, local2
|
|
xor local4, temp2, $2
|
|
|
|
! reload since used as temporar:
|
|
|
|
ld [out2+280], out4 ! loop counter
|
|
|
|
srl $3, 29, local0
|
|
ifelse($5,1,{add in4, 120, in4})
|
|
|
|
ifelse($5,1,{LDPTR KS1, in3})
|
|
srl $4, 29, local7
|
|
|
|
or local0, local5, $4
|
|
or local2, local7, $3
|
|
|
|
})
|
|
|
|
|
|
|
|
! {load_little_endian}
|
|
!
|
|
! parameter 1 address
|
|
! parameter 2 destination left
|
|
! parameter 3 destination right
|
|
! parameter 4 temporar
|
|
! parameter 5 label
|
|
|
|
define(load_little_endian, {
|
|
|
|
! {load_little_endian}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
! first in memory to rightmost in register
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
andcc $1, 3, global0
|
|
bne,pn %icc, $5
|
|
nop
|
|
|
|
lda [$1] 0x88, $2
|
|
add $1, 4, $4
|
|
|
|
ba,pt %icc, $5a
|
|
lda [$4] 0x88, $3
|
|
#endif
|
|
|
|
$5:
|
|
ldub [$1+3], $2
|
|
|
|
ldub [$1+2], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
ldub [$1+1], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
ldub [$1+0], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
|
|
ldub [$1+3+4], $3
|
|
|
|
ldub [$1+2+4], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
|
|
ldub [$1+1+4], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
|
|
ldub [$1+0+4], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
$5a:
|
|
|
|
})
|
|
|
|
|
|
! {load_little_endian_inc}
|
|
!
|
|
! parameter 1 address
|
|
! parameter 2 destination left
|
|
! parameter 3 destination right
|
|
! parameter 4 temporar
|
|
! parameter 4 label
|
|
!
|
|
! adds 8 to address
|
|
|
|
define(load_little_endian_inc, {
|
|
|
|
! {load_little_endian_inc}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
! first in memory to rightmost in register
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
andcc $1, 3, global0
|
|
bne,pn %icc, $5
|
|
nop
|
|
|
|
lda [$1] 0x88, $2
|
|
add $1, 4, $1
|
|
|
|
lda [$1] 0x88, $3
|
|
ba,pt %icc, $5a
|
|
add $1, 4, $1
|
|
#endif
|
|
|
|
$5:
|
|
ldub [$1+3], $2
|
|
|
|
ldub [$1+2], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
ldub [$1+1], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
ldub [$1+0], $4
|
|
sll $2, 8, $2
|
|
or $2, $4, $2
|
|
|
|
ldub [$1+3+4], $3
|
|
add $1, 8, $1
|
|
|
|
ldub [$1+2+4-8], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
|
|
ldub [$1+1+4-8], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
|
|
ldub [$1+0+4-8], $4
|
|
sll $3, 8, $3
|
|
or $3, $4, $3
|
|
$5a:
|
|
|
|
})
|
|
|
|
|
|
! {load_n_bytes}
|
|
!
|
|
! Loads 1 to 7 bytes little endian
|
|
! Remaining bytes are zeroed.
|
|
!
|
|
! parameter 1 address
|
|
! parameter 2 length
|
|
! parameter 3 destination register left
|
|
! parameter 4 destination register right
|
|
! parameter 5 temp
|
|
! parameter 6 temp2
|
|
! parameter 7 label
|
|
! parameter 8 return label
|
|
|
|
define(load_n_bytes, {
|
|
|
|
! {load_n_bytes}
|
|
! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
|
|
|
$7.0: call .+8
|
|
sll $2, 2, $6
|
|
|
|
add %o7,$7.jmp.table-$7.0,$5
|
|
|
|
add $5, $6, $5
|
|
mov 0, $4
|
|
|
|
ld [$5], $5
|
|
|
|
jmp %o7+$5
|
|
mov 0, $3
|
|
|
|
$7.7:
|
|
ldub [$1+6], $5
|
|
sll $5, 16, $5
|
|
or $3, $5, $3
|
|
$7.6:
|
|
ldub [$1+5], $5
|
|
sll $5, 8, $5
|
|
or $3, $5, $3
|
|
$7.5:
|
|
ldub [$1+4], $5
|
|
or $3, $5, $3
|
|
$7.4:
|
|
ldub [$1+3], $5
|
|
sll $5, 24, $5
|
|
or $4, $5, $4
|
|
$7.3:
|
|
ldub [$1+2], $5
|
|
sll $5, 16, $5
|
|
or $4, $5, $4
|
|
$7.2:
|
|
ldub [$1+1], $5
|
|
sll $5, 8, $5
|
|
or $4, $5, $4
|
|
$7.1:
|
|
ldub [$1+0], $5
|
|
ba $8
|
|
or $4, $5, $4
|
|
|
|
.align 4
|
|
|
|
$7.jmp.table:
|
|
.word 0
|
|
.word $7.1-$7.0
|
|
.word $7.2-$7.0
|
|
.word $7.3-$7.0
|
|
.word $7.4-$7.0
|
|
.word $7.5-$7.0
|
|
.word $7.6-$7.0
|
|
.word $7.7-$7.0
|
|
})
|
|
|
|
|
|
! {store_little_endian}
|
|
!
|
|
! parameter 1 address
|
|
! parameter 2 source left
|
|
! parameter 3 source right
|
|
! parameter 4 temporar
|
|
|
|
define(store_little_endian, {
|
|
|
|
! {store_little_endian}
|
|
! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
|
|
|
! rightmost in register to first in memory
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
andcc $1, 3, global0
|
|
bne,pn %icc, $5
|
|
nop
|
|
|
|
sta $2, [$1] 0x88
|
|
add $1, 4, $4
|
|
|
|
ba,pt %icc, $5a
|
|
sta $3, [$4] 0x88
|
|
#endif
|
|
|
|
$5:
|
|
and $2, 255, $4
|
|
stub $4, [$1+0]
|
|
|
|
srl $2, 8, $4
|
|
and $4, 255, $4
|
|
stub $4, [$1+1]
|
|
|
|
srl $2, 16, $4
|
|
and $4, 255, $4
|
|
stub $4, [$1+2]
|
|
|
|
srl $2, 24, $4
|
|
stub $4, [$1+3]
|
|
|
|
|
|
and $3, 255, $4
|
|
stub $4, [$1+0+4]
|
|
|
|
srl $3, 8, $4
|
|
and $4, 255, $4
|
|
stub $4, [$1+1+4]
|
|
|
|
srl $3, 16, $4
|
|
and $4, 255, $4
|
|
stub $4, [$1+2+4]
|
|
|
|
srl $3, 24, $4
|
|
stub $4, [$1+3+4]
|
|
|
|
$5a:
|
|
|
|
})
|
|
|
|
|
|
! {store_n_bytes}
|
|
!
|
|
! Stores 1 to 7 bytes little endian
|
|
!
|
|
! parameter 1 address
|
|
! parameter 2 length
|
|
! parameter 3 source register left
|
|
! parameter 4 source register right
|
|
! parameter 5 temp
|
|
! parameter 6 temp2
|
|
! parameter 7 label
|
|
! parameter 8 return label
|
|
|
|
define(store_n_bytes, {
|
|
|
|
! {store_n_bytes}
|
|
! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
|
|
|
$7.0: call .+8
|
|
sll $2, 2, $6
|
|
|
|
add %o7,$7.jmp.table-$7.0,$5
|
|
|
|
add $5, $6, $5
|
|
|
|
ld [$5], $5
|
|
|
|
jmp %o7+$5
|
|
nop
|
|
|
|
$7.7:
|
|
srl $3, 16, $5
|
|
and $5, 0xff, $5
|
|
stub $5, [$1+6]
|
|
$7.6:
|
|
srl $3, 8, $5
|
|
and $5, 0xff, $5
|
|
stub $5, [$1+5]
|
|
$7.5:
|
|
and $3, 0xff, $5
|
|
stub $5, [$1+4]
|
|
$7.4:
|
|
srl $4, 24, $5
|
|
stub $5, [$1+3]
|
|
$7.3:
|
|
srl $4, 16, $5
|
|
and $5, 0xff, $5
|
|
stub $5, [$1+2]
|
|
$7.2:
|
|
srl $4, 8, $5
|
|
and $5, 0xff, $5
|
|
stub $5, [$1+1]
|
|
$7.1:
|
|
and $4, 0xff, $5
|
|
|
|
|
|
ba $8
|
|
stub $5, [$1]
|
|
|
|
.align 4
|
|
|
|
$7.jmp.table:
|
|
|
|
.word 0
|
|
.word $7.1-$7.0
|
|
.word $7.2-$7.0
|
|
.word $7.3-$7.0
|
|
.word $7.4-$7.0
|
|
.word $7.5-$7.0
|
|
.word $7.6-$7.0
|
|
.word $7.7-$7.0
|
|
})
|
|
|
|
|
|
define(testvalue,{1})
|
|
|
|
define(register_init, {
|
|
|
|
! For test purposes:
|
|
|
|
sethi %hi(testvalue), local0
|
|
or local0, %lo(testvalue), local0
|
|
|
|
ifelse($1,{},{}, {mov local0, $1})
|
|
ifelse($2,{},{}, {mov local0, $2})
|
|
ifelse($3,{},{}, {mov local0, $3})
|
|
ifelse($4,{},{}, {mov local0, $4})
|
|
ifelse($5,{},{}, {mov local0, $5})
|
|
ifelse($6,{},{}, {mov local0, $6})
|
|
ifelse($7,{},{}, {mov local0, $7})
|
|
ifelse($8,{},{}, {mov local0, $8})
|
|
|
|
mov local0, local1
|
|
mov local0, local2
|
|
mov local0, local3
|
|
mov local0, local4
|
|
mov local0, local5
|
|
mov local0, local7
|
|
mov local0, local6
|
|
mov local0, out0
|
|
mov local0, out1
|
|
mov local0, out2
|
|
mov local0, out3
|
|
mov local0, out4
|
|
mov local0, out5
|
|
mov local0, global1
|
|
mov local0, global2
|
|
mov local0, global3
|
|
mov local0, global4
|
|
mov local0, global5
|
|
|
|
})
|
|
|
|
.section ".text"
|
|
|
|
.align 32
|
|
|
|
.des_enc:
|
|
|
|
! key address in3
|
|
! loads key next encryption/decryption first round from [in4]
|
|
|
|
rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
|
|
|
|
|
|
.align 32
|
|
|
|
.des_dec:
|
|
|
|
! implemented with out5 as first parameter to avoid
|
|
! register exchange in ede modes
|
|
|
|
! key address in4
|
|
! loads key next encryption/decryption first round from [in3]
|
|
|
|
rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
|
|
|
|
|
|
|
|
! void DES_encrypt1(data, ks, enc)
|
|
! *******************************
|
|
|
|
.align 32
|
|
.global DES_encrypt1
|
|
.type DES_encrypt1,#function
|
|
|
|
DES_encrypt1:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
ld [in0], in5 ! left
|
|
cmp in2, 0 ! enc
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
be,pn %icc, .encrypt.dec ! enc/dec
|
|
#else
|
|
be .encrypt.dec
|
|
#endif
|
|
ld [in0+4], out5 ! right
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for move in1 to in3
|
|
! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
|
|
|
ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
|
|
|
|
rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
|
|
|
|
fp_macro(in5, out5, 1) ! 1 for store to [in0]
|
|
|
|
ret
|
|
restore
|
|
|
|
.encrypt.dec:
|
|
|
|
add in1, 120, in3 ! use last subkey for first round
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for move in1 to in3
|
|
! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
|
|
|
ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
|
|
|
|
fp_macro(out5, in5, 1) ! 1 for store to [in0]
|
|
|
|
ret
|
|
restore
|
|
|
|
.DES_encrypt1.end:
|
|
.size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
|
|
|
|
|
|
! void DES_encrypt2(data, ks, enc)
|
|
!*********************************
|
|
|
|
! encrypts/decrypts without initial/final permutation
|
|
|
|
.align 32
|
|
.global DES_encrypt2
|
|
.type DES_encrypt2,#function
|
|
|
|
DES_encrypt2:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
! Set sbox address 1 to 6 and rotate halfs 3 left
|
|
! Errors caught by destest? Yes. Still? *NO*
|
|
|
|
!sethi %hi(DES_SPtrans), global1 ! address sbox 1
|
|
|
|
!or global1, %lo(DES_SPtrans), global1 ! sbox 1
|
|
|
|
add global1, 256, global2 ! sbox 2
|
|
add global1, 512, global3 ! sbox 3
|
|
|
|
ld [in0], out5 ! right
|
|
add global1, 768, global4 ! sbox 4
|
|
add global1, 1024, global5 ! sbox 5
|
|
|
|
ld [in0+4], in5 ! left
|
|
add global1, 1280, local6 ! sbox 6
|
|
add global1, 1792, out3 ! sbox 8
|
|
|
|
! rotate
|
|
|
|
sll in5, 3, local5
|
|
mov in1, in3 ! key address to in3
|
|
|
|
sll out5, 3, local7
|
|
srl in5, 29, in5
|
|
|
|
srl out5, 29, out5
|
|
add in5, local5, in5
|
|
|
|
add out5, local7, out5
|
|
cmp in2, 0
|
|
|
|
! we use our own stackframe
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
be,pn %icc, .encrypt2.dec ! decryption
|
|
#else
|
|
be .encrypt2.dec
|
|
#endif
|
|
STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
|
|
|
|
ld [in3], out0 ! key 7531 first round
|
|
mov LOOPS, out4 ! loop counter
|
|
|
|
ld [in3+4], out1 ! key 8642 first round
|
|
sethi %hi(0x0000FC00), local5
|
|
|
|
call .des_enc
|
|
mov in3, in4
|
|
|
|
! rotate
|
|
sll in5, 29, in0
|
|
srl in5, 3, in5
|
|
sll out5, 29, in1
|
|
add in5, in0, in5
|
|
srl out5, 3, out5
|
|
LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
|
add out5, in1, out5
|
|
st in5, [in0]
|
|
st out5, [in0+4]
|
|
|
|
ret
|
|
restore
|
|
|
|
|
|
.encrypt2.dec:
|
|
|
|
add in3, 120, in4
|
|
|
|
ld [in4], out0 ! key 7531 first round
|
|
mov LOOPS, out4 ! loop counter
|
|
|
|
ld [in4+4], out1 ! key 8642 first round
|
|
sethi %hi(0x0000FC00), local5
|
|
|
|
mov in5, local1 ! left expected in out5
|
|
mov out5, in5
|
|
|
|
call .des_dec
|
|
mov local1, out5
|
|
|
|
.encrypt2.finish:
|
|
|
|
! rotate
|
|
sll in5, 29, in0
|
|
srl in5, 3, in5
|
|
sll out5, 29, in1
|
|
add in5, in0, in5
|
|
srl out5, 3, out5
|
|
LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
|
add out5, in1, out5
|
|
st out5, [in0]
|
|
st in5, [in0+4]
|
|
|
|
ret
|
|
restore
|
|
|
|
.DES_encrypt2.end:
|
|
.size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
|
|
|
|
|
|
! void DES_encrypt3(data, ks1, ks2, ks3)
|
|
! **************************************
|
|
|
|
.align 32
|
|
.global DES_encrypt3
|
|
.type DES_encrypt3,#function
|
|
|
|
DES_encrypt3:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
ld [in0], in5 ! left
|
|
add in2, 120, in4 ! ks2
|
|
|
|
ld [in0+4], out5 ! right
|
|
mov in3, in2 ! save ks3
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for mov in1 to in3
|
|
! parameter 8 1 for mov in3 to in4
|
|
! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
|
|
|
ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
|
|
|
|
call .des_dec
|
|
mov in2, in3 ! preload ks3
|
|
|
|
call .des_enc
|
|
nop
|
|
|
|
fp_macro(in5, out5, 1)
|
|
|
|
ret
|
|
restore
|
|
|
|
.DES_encrypt3.end:
|
|
.size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
|
|
|
|
|
|
! void DES_decrypt3(data, ks1, ks2, ks3)
|
|
! **************************************
|
|
|
|
.align 32
|
|
.global DES_decrypt3
|
|
.type DES_decrypt3,#function
|
|
|
|
DES_decrypt3:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
ld [in0], in5 ! left
|
|
add in3, 120, in4 ! ks3
|
|
|
|
ld [in0+4], out5 ! right
|
|
mov in2, in3 ! ks2
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for mov in1 to in3
|
|
! parameter 8 1 for mov in3 to in4
|
|
! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
|
|
|
ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
|
|
|
|
call .des_enc
|
|
add in1, 120, in4 ! preload ks1
|
|
|
|
call .des_dec
|
|
nop
|
|
|
|
fp_macro(out5, in5, 1)
|
|
|
|
ret
|
|
restore
|
|
|
|
.DES_decrypt3.end:
|
|
.size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
|
|
|
|
! input: out0 offset between .PIC.me.up and caller
|
|
! output: out0 pointer to .PIC.me.up
|
|
! out2 pointer to .des_and
|
|
! global1 pointer to DES_SPtrans
|
|
.align 32
|
|
.PIC.me.up:
|
|
add out0,%o7,out0 ! pointer to .PIC.me.up
|
|
|
|
#ifdef OPENSSL_PIC
|
|
! In case anybody wonders why this code is same for both ABI.
|
|
! To start with it is not. Do note LDPTR below. But of course
|
|
! you must be wondering why the rest of it does not contain
|
|
! things like %hh, %hm and %lm. Well, those are needed only
|
|
! if OpenSSL library *itself* will become larger than 4GB,
|
|
! which is not going to happen any time soon.
|
|
sethi %hi(DES_SPtrans),global1
|
|
or global1,%lo(DES_SPtrans),global1
|
|
sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
|
|
add global1,out0,global1
|
|
add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
|
|
LDPTR [out2+global1],global1
|
|
#elif 0
|
|
setn DES_SPtrans,out2,global1 ! synthetic instruction !
|
|
#elif defined(ABI64)
|
|
sethi %hh(DES_SPtrans),out2
|
|
or out2,%hm(DES_SPtrans),out2
|
|
sethi %lm(DES_SPtrans),global1
|
|
or global1,%lo(DES_SPtrans),global1
|
|
sllx out2,32,out2
|
|
or out2,global1,global1
|
|
#else
|
|
sethi %hi(DES_SPtrans),global1
|
|
or global1,%lo(DES_SPtrans),global1
|
|
#endif
|
|
|
|
retl
|
|
add out0,.des_and-.PIC.me.up,out2
|
|
|
|
.align 256
|
|
.type .des_and,#object
|
|
.size .des_and,284
|
|
|
|
.des_and:
|
|
|
|
! This table is used for AND 0xFC when it is known that register
|
|
! bits 8-31 are zero. Makes it possible to do three arithmetic
|
|
! operations in one cycle.
|
|
|
|
.byte 0, 0, 0, 0, 4, 4, 4, 4
|
|
.byte 8, 8, 8, 8, 12, 12, 12, 12
|
|
.byte 16, 16, 16, 16, 20, 20, 20, 20
|
|
.byte 24, 24, 24, 24, 28, 28, 28, 28
|
|
.byte 32, 32, 32, 32, 36, 36, 36, 36
|
|
.byte 40, 40, 40, 40, 44, 44, 44, 44
|
|
.byte 48, 48, 48, 48, 52, 52, 52, 52
|
|
.byte 56, 56, 56, 56, 60, 60, 60, 60
|
|
.byte 64, 64, 64, 64, 68, 68, 68, 68
|
|
.byte 72, 72, 72, 72, 76, 76, 76, 76
|
|
.byte 80, 80, 80, 80, 84, 84, 84, 84
|
|
.byte 88, 88, 88, 88, 92, 92, 92, 92
|
|
.byte 96, 96, 96, 96, 100, 100, 100, 100
|
|
.byte 104, 104, 104, 104, 108, 108, 108, 108
|
|
.byte 112, 112, 112, 112, 116, 116, 116, 116
|
|
.byte 120, 120, 120, 120, 124, 124, 124, 124
|
|
.byte 128, 128, 128, 128, 132, 132, 132, 132
|
|
.byte 136, 136, 136, 136, 140, 140, 140, 140
|
|
.byte 144, 144, 144, 144, 148, 148, 148, 148
|
|
.byte 152, 152, 152, 152, 156, 156, 156, 156
|
|
.byte 160, 160, 160, 160, 164, 164, 164, 164
|
|
.byte 168, 168, 168, 168, 172, 172, 172, 172
|
|
.byte 176, 176, 176, 176, 180, 180, 180, 180
|
|
.byte 184, 184, 184, 184, 188, 188, 188, 188
|
|
.byte 192, 192, 192, 192, 196, 196, 196, 196
|
|
.byte 200, 200, 200, 200, 204, 204, 204, 204
|
|
.byte 208, 208, 208, 208, 212, 212, 212, 212
|
|
.byte 216, 216, 216, 216, 220, 220, 220, 220
|
|
.byte 224, 224, 224, 224, 228, 228, 228, 228
|
|
.byte 232, 232, 232, 232, 236, 236, 236, 236
|
|
.byte 240, 240, 240, 240, 244, 244, 244, 244
|
|
.byte 248, 248, 248, 248, 252, 252, 252, 252
|
|
|
|
! 5 numbers for initil/final permutation
|
|
|
|
.word 0x0f0f0f0f ! offset 256
|
|
.word 0x0000ffff ! 260
|
|
.word 0x33333333 ! 264
|
|
.word 0x00ff00ff ! 268
|
|
.word 0x55555555 ! 272
|
|
|
|
.word 0 ! 276
|
|
.word LOOPS ! 280
|
|
.word 0x0000FC00 ! 284
|
|
|
|
! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
|
|
! *****************************************************************
|
|
|
|
|
|
.align 32
|
|
.global DES_ncbc_encrypt
|
|
.type DES_ncbc_encrypt,#function
|
|
|
|
DES_ncbc_encrypt:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
|
|
define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
|
|
define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
cmp in5, 0 ! enc
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
be,pn %icc, .ncbc.dec
|
|
#else
|
|
be .ncbc.dec
|
|
#endif
|
|
STPTR in4, IVEC
|
|
|
|
! addr left right temp label
|
|
load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
|
|
|
|
addcc in2, -8, in2 ! bytes missing when first block done
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ncbc.enc.seven.or.less
|
|
#else
|
|
bl .ncbc.enc.seven.or.less
|
|
#endif
|
|
mov in3, in4 ! schedule
|
|
|
|
.ncbc.enc.next.block:
|
|
|
|
load_little_endian(in0, out4, global4, local3, .LLE2) ! block
|
|
|
|
.ncbc.enc.next.block_1:
|
|
|
|
xor in5, out4, in5 ! iv xor
|
|
xor out5, global4, out5 ! iv xor
|
|
|
|
! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
|
ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
|
|
|
|
.ncbc.enc.next.block_2:
|
|
|
|
!// call .des_enc ! compares in2 to 8
|
|
! rounds inlined for alignment purposes
|
|
|
|
add global1, 768, global4 ! address sbox 4 since register used below
|
|
|
|
rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ncbc.enc.next.block_fp
|
|
#else
|
|
bl .ncbc.enc.next.block_fp
|
|
#endif
|
|
add in0, 8, in0 ! input address
|
|
|
|
! If 8 or more bytes are to be encrypted after this block,
|
|
! we combine final permutation for this block with initial
|
|
! permutation for next block. Load next block:
|
|
|
|
load_little_endian(in0, global3, global4, local5, .LLE12)
|
|
|
|
! parameter 1 original left
|
|
! parameter 2 original right
|
|
! parameter 3 left ip
|
|
! parameter 4 right ip
|
|
! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
|
! 2: mov in4 to in3
|
|
!
|
|
! also adds -8 to length in2 and loads loop counter to out4
|
|
|
|
fp_ip_macro(out0, out1, global3, global4, 2)
|
|
|
|
store_little_endian(in1, out0, out1, local3, .SLE10) ! block
|
|
|
|
ld [in3], out0 ! key 7531 first round next block
|
|
mov in5, local1
|
|
xor global3, out5, in5 ! iv xor next block
|
|
|
|
ld [in3+4], out1 ! key 8642
|
|
add global1, 512, global3 ! address sbox 3 since register used
|
|
xor global4, local1, out5 ! iv xor next block
|
|
|
|
ba .ncbc.enc.next.block_2
|
|
add in1, 8, in1 ! output adress
|
|
|
|
.ncbc.enc.next.block_fp:
|
|
|
|
fp_macro(in5, out5)
|
|
|
|
store_little_endian(in1, in5, out5, local3, .SLE1) ! block
|
|
|
|
addcc in2, -8, in2 ! bytes missing when next block done
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
|
|
#else
|
|
bpos .ncbc.enc.next.block
|
|
#endif
|
|
add in1, 8, in1
|
|
|
|
.ncbc.enc.seven.or.less:
|
|
|
|
cmp in2, -8
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
ble,pt %icc, .ncbc.enc.finish
|
|
#else
|
|
ble .ncbc.enc.finish
|
|
#endif
|
|
nop
|
|
|
|
add in2, 8, local1 ! bytes to load
|
|
|
|
! addr, length, dest left, dest right, temp, temp2, label, ret label
|
|
load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
|
|
|
|
! Loads 1 to 7 bytes little endian to global4, out4
|
|
|
|
|
|
.ncbc.enc.finish:
|
|
|
|
LDPTR IVEC, local4
|
|
store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
|
|
|
|
ret
|
|
restore
|
|
|
|
|
|
.ncbc.dec:
|
|
|
|
STPTR in0, INPUT
|
|
cmp in2, 0 ! length
|
|
add in3, 120, in3
|
|
|
|
LDPTR IVEC, local7 ! ivec
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
ble,pn %icc, .ncbc.dec.finish
|
|
#else
|
|
ble .ncbc.dec.finish
|
|
#endif
|
|
mov in3, in4 ! schedule
|
|
|
|
STPTR in1, OUTPUT
|
|
mov in0, local5 ! input
|
|
|
|
load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
|
|
|
|
.ncbc.dec.next.block:
|
|
|
|
load_little_endian(local5, in5, out5, local3, .LLE4) ! block
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for mov in1 to in3
|
|
! parameter 8 1 for mov in3 to in4
|
|
|
|
ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
|
|
|
|
fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
|
|
|
|
! in2 is bytes left to be stored
|
|
! in2 is compared to 8 in the rounds
|
|
|
|
xor out5, in0, out4 ! iv xor
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ncbc.dec.seven.or.less
|
|
#else
|
|
bl .ncbc.dec.seven.or.less
|
|
#endif
|
|
xor in5, in1, global4 ! iv xor
|
|
|
|
! Load ivec next block now, since input and output address might be the same.
|
|
|
|
load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
|
|
|
|
store_little_endian(local7, out4, global4, local3, .SLE3)
|
|
|
|
STPTR local5, INPUT
|
|
add local7, 8, local7
|
|
addcc in2, -8, in2
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bg,pt %icc, .ncbc.dec.next.block
|
|
#else
|
|
bg .ncbc.dec.next.block
|
|
#endif
|
|
STPTR local7, OUTPUT
|
|
|
|
|
|
.ncbc.dec.store.iv:
|
|
|
|
LDPTR IVEC, local4 ! ivec
|
|
store_little_endian(local4, in0, in1, local5, .SLE4)
|
|
|
|
.ncbc.dec.finish:
|
|
|
|
ret
|
|
restore
|
|
|
|
.ncbc.dec.seven.or.less:
|
|
|
|
load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
|
|
|
|
store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
|
|
|
|
|
|
.DES_ncbc_encrypt.end:
|
|
.size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
|
|
|
|
|
|
! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
|
|
! **************************************************************************
|
|
|
|
|
|
.align 32
|
|
.global DES_ede3_cbc_encrypt
|
|
.type DES_ede3_cbc_encrypt,#function
|
|
|
|
DES_ede3_cbc_encrypt:
|
|
|
|
save %sp, FRAME, %sp
|
|
|
|
define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
|
|
define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
|
define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
|
|
|
|
call .PIC.me.up
|
|
mov .PIC.me.up-(.-4),out0
|
|
|
|
LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
|
|
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
|
cmp local3, 0 ! enc
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
be,pn %icc, .ede3.dec
|
|
#else
|
|
be .ede3.dec
|
|
#endif
|
|
STPTR in4, KS2
|
|
|
|
STPTR in5, KS3
|
|
|
|
load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
|
|
|
|
addcc in2, -8, in2 ! bytes missing after next block
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ede3.enc.seven.or.less
|
|
#else
|
|
bl .ede3.enc.seven.or.less
|
|
#endif
|
|
STPTR in3, KS1
|
|
|
|
.ede3.enc.next.block:
|
|
|
|
load_little_endian(in0, out4, global4, local3, .LLE7)
|
|
|
|
.ede3.enc.next.block_1:
|
|
|
|
LDPTR KS2, in4
|
|
xor in5, out4, in5 ! iv xor
|
|
xor out5, global4, out5 ! iv xor
|
|
|
|
LDPTR KS1, in3
|
|
add in4, 120, in4 ! for decryption we use last subkey first
|
|
nop
|
|
|
|
ip_macro(in5, out5, in5, out5, in3)
|
|
|
|
.ede3.enc.next.block_2:
|
|
|
|
call .des_enc ! ks1 in3
|
|
nop
|
|
|
|
call .des_dec ! ks2 in4
|
|
LDPTR KS3, in3
|
|
|
|
call .des_enc ! ks3 in3 compares in2 to 8
|
|
nop
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ede3.enc.next.block_fp
|
|
#else
|
|
bl .ede3.enc.next.block_fp
|
|
#endif
|
|
add in0, 8, in0
|
|
|
|
! If 8 or more bytes are to be encrypted after this block,
|
|
! we combine final permutation for this block with initial
|
|
! permutation for next block. Load next block:
|
|
|
|
load_little_endian(in0, global3, global4, local5, .LLE11)
|
|
|
|
! parameter 1 original left
|
|
! parameter 2 original right
|
|
! parameter 3 left ip
|
|
! parameter 4 right ip
|
|
! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
|
! 2: mov in4 to in3
|
|
!
|
|
! also adds -8 to length in2 and loads loop counter to out4
|
|
|
|
fp_ip_macro(out0, out1, global3, global4, 1)
|
|
|
|
store_little_endian(in1, out0, out1, local3, .SLE9) ! block
|
|
|
|
mov in5, local1
|
|
xor global3, out5, in5 ! iv xor next block
|
|
|
|
ld [in3], out0 ! key 7531
|
|
add global1, 512, global3 ! address sbox 3
|
|
xor global4, local1, out5 ! iv xor next block
|
|
|
|
ld [in3+4], out1 ! key 8642
|
|
add global1, 768, global4 ! address sbox 4
|
|
ba .ede3.enc.next.block_2
|
|
add in1, 8, in1
|
|
|
|
.ede3.enc.next.block_fp:
|
|
|
|
fp_macro(in5, out5)
|
|
|
|
store_little_endian(in1, in5, out5, local3, .SLE5) ! block
|
|
|
|
addcc in2, -8, in2 ! bytes missing when next block done
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bpos,pt %icc, .ede3.enc.next.block
|
|
#else
|
|
bpos .ede3.enc.next.block
|
|
#endif
|
|
add in1, 8, in1
|
|
|
|
.ede3.enc.seven.or.less:
|
|
|
|
cmp in2, -8
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
ble,pt %icc, .ede3.enc.finish
|
|
#else
|
|
ble .ede3.enc.finish
|
|
#endif
|
|
nop
|
|
|
|
add in2, 8, local1 ! bytes to load
|
|
|
|
! addr, length, dest left, dest right, temp, temp2, label, ret label
|
|
load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
|
|
|
|
.ede3.enc.finish:
|
|
|
|
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
|
store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
|
|
|
|
ret
|
|
restore
|
|
|
|
.ede3.dec:
|
|
|
|
STPTR in0, INPUT
|
|
add in5, 120, in5
|
|
|
|
STPTR in1, OUTPUT
|
|
mov in0, local5
|
|
add in3, 120, in3
|
|
|
|
STPTR in3, KS1
|
|
cmp in2, 0
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
ble %icc, .ede3.dec.finish
|
|
#else
|
|
ble .ede3.dec.finish
|
|
#endif
|
|
STPTR in5, KS3
|
|
|
|
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
|
|
load_little_endian(local7, in0, in1, local3, .LLE8)
|
|
|
|
.ede3.dec.next.block:
|
|
|
|
load_little_endian(local5, in5, out5, local3, .LLE9)
|
|
|
|
! parameter 6 1/2 for include encryption/decryption
|
|
! parameter 7 1 for mov in1 to in3
|
|
! parameter 8 1 for mov in3 to in4
|
|
! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
|
|
|
ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
|
|
|
|
call .des_enc ! ks2 in3
|
|
LDPTR KS1, in4
|
|
|
|
call .des_dec ! ks1 in4
|
|
nop
|
|
|
|
fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
|
|
|
|
! in2 is bytes left to be stored
|
|
! in2 is compared to 8 in the rounds
|
|
|
|
xor out5, in0, out4
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bl,pn %icc, .ede3.dec.seven.or.less
|
|
#else
|
|
bl .ede3.dec.seven.or.less
|
|
#endif
|
|
xor in5, in1, global4
|
|
|
|
load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
|
|
|
|
store_little_endian(local7, out4, global4, local3, .SLE7) ! block
|
|
|
|
STPTR local5, INPUT
|
|
addcc in2, -8, in2
|
|
add local7, 8, local7
|
|
|
|
#ifdef OPENSSL_SYSNAME_ULTRASPARC
|
|
bg,pt %icc, .ede3.dec.next.block
|
|
#else
|
|
bg .ede3.dec.next.block
|
|
#endif
|
|
STPTR local7, OUTPUT
|
|
|
|
.ede3.dec.store.iv:
|
|
|
|
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
|
store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
|
|
|
|
.ede3.dec.finish:
|
|
|
|
ret
|
|
restore
|
|
|
|
.ede3.dec.seven.or.less:
|
|
|
|
load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
|
|
|
|
store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
|
|
|
|
|
|
.DES_ede3_cbc_encrypt.end:
|
|
.size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
|