openssl/crypto/bn/asm/alpha.s.works
1998-12-21 11:00:56 +00:00

533 lines
9.9 KiB
Text

# DEC Alpha assember
# The bn_div64 is actually gcc output but the other parts are hand done.
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
# bn_div64.
# I've gone back and re-done most of routines.
# The key thing to remeber for the 164 CPU is that while a
# multiply operation takes 8 cycles, another one can only be issued
# after 4 cycles have elapsed. I've done modification to help
# improve this. Also, normally, a ld instruction will not be available
# for about 3 cycles.
.file 1 "bn_asm.c"
.set noat
gcc2_compiled.:
__gnu_compiled_c:
.text
.align 3
.globl bn_mul_add_words
.ent bn_mul_add_words
bn_mul_add_words:
bn_mul_add_words..ng:
.frame $30,0,$26,0
.prologue 0
.align 5
subq $18,4,$18
bis $31,$31,$0
blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
ldq $1,0($16) # 1 1
.align 3
$42:
mulq $20,$19,$5 # 1 2 1 ######
ldq $21,8($17) # 2 1
ldq $2,8($16) # 2 1
umulh $20,$19,$20 # 1 2 ######
ldq $27,16($17) # 3 1
ldq $3,16($16) # 3 1
mulq $21,$19,$6 # 2 2 1 ######
ldq $28,24($17) # 4 1
addq $1,$5,$1 # 1 2 2
ldq $4,24($16) # 4 1
umulh $21,$19,$21 # 2 2 ######
cmpult $1,$5,$22 # 1 2 3 1
addq $20,$22,$20 # 1 3 1
addq $1,$0,$1 # 1 2 3 1
mulq $27,$19,$7 # 3 2 1 ######
cmpult $1,$0,$0 # 1 2 3 2
addq $2,$6,$2 # 2 2 2
addq $20,$0,$0 # 1 3 2
cmpult $2,$6,$23 # 2 2 3 1
addq $21,$23,$21 # 2 3 1
umulh $27,$19,$27 # 3 2 ######
addq $2,$0,$2 # 2 2 3 1
cmpult $2,$0,$0 # 2 2 3 2
subq $18,4,$18
mulq $28,$19,$8 # 4 2 1 ######
addq $21,$0,$0 # 2 3 2
addq $3,$7,$3 # 3 2 2
addq $16,32,$16
cmpult $3,$7,$24 # 3 2 3 1
stq $1,-32($16) # 1 2 4
umulh $28,$19,$28 # 4 2 ######
addq $27,$24,$27 # 3 3 1
addq $3,$0,$3 # 3 2 3 1
stq $2,-24($16) # 2 2 4
cmpult $3,$0,$0 # 3 2 3 2
stq $3,-16($16) # 3 2 4
addq $4,$8,$4 # 4 2 2
addq $27,$0,$0 # 3 3 2
cmpult $4,$8,$25 # 4 2 3 1
addq $17,32,$17
addq $28,$25,$28 # 4 3 1
addq $4,$0,$4 # 4 2 3 1
cmpult $4,$0,$0 # 4 2 3 2
stq $4,-8($16) # 4 2 4
addq $28,$0,$0 # 4 3 2
blt $18,$43
ldq $20,0($17) # 1 1
ldq $1,0($16) # 1 1
br $42
.align 4
$45:
ldq $20,0($17) # 4 1
ldq $1,0($16) # 4 1
mulq $20,$19,$5 # 4 2 1
subq $18,1,$18
addq $16,8,$16
addq $17,8,$17
umulh $20,$19,$20 # 4 2
addq $1,$5,$1 # 4 2 2
cmpult $1,$5,$22 # 4 2 3 1
addq $20,$22,$20 # 4 3 1
addq $1,$0,$1 # 4 2 3 1
cmpult $1,$0,$0 # 4 2 3 2
addq $20,$0,$0 # 4 3 2
stq $1,-8($16) # 4 2 4
bgt $18,$45
ret $31,($26),1 # else exit
.align 4
$43:
addq $18,4,$18
bgt $18,$45 # goto tail code
ret $31,($26),1 # else exit
.end bn_mul_add_words
.align 3
.globl bn_mul_words
.ent bn_mul_words
bn_mul_words:
bn_mul_words..ng:
.frame $30,0,$26,0
.prologue 0
.align 5
subq $18,4,$18
bis $31,$31,$0
blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
.align 3
$142:
mulq $20,$19,$5 # 1 2 1 #####
ldq $21,8($17) # 2 1
ldq $27,16($17) # 3 1
umulh $20,$19,$20 # 1 2 #####
ldq $28,24($17) # 4 1
mulq $21,$19,$6 # 2 2 1 #####
addq $5,$0,$5 # 1 2 3 1
subq $18,4,$18
cmpult $5,$0,$0 # 1 2 3 2
umulh $21,$19,$21 # 2 2 #####
addq $20,$0,$0 # 1 3 2
addq $17,32,$17
addq $6,$0,$6 # 2 2 3 1
mulq $27,$19,$7 # 3 2 1 #####
cmpult $6,$0,$0 # 2 2 3 2
addq $21,$0,$0 # 2 3 2
addq $16,32,$16
umulh $27,$19,$27 # 3 2 #####
stq $5,-32($16) # 1 2 4
mulq $28,$19,$8 # 4 2 1 #####
addq $7,$0,$7 # 3 2 3 1
stq $6,-24($16) # 2 2 4
cmpult $7,$0,$0 # 3 2 3 2
umulh $28,$19,$28 # 4 2 #####
addq $27,$0,$0 # 3 3 2
stq $7,-16($16) # 3 2 4
addq $8,$0,$8 # 4 2 3 1
cmpult $8,$0,$0 # 4 2 3 2
addq $28,$0,$0 # 4 3 2
stq $8,-8($16) # 4 2 4
blt $18,$143
ldq $20,0($17) # 1 1
br $142
.align 4
$145:
ldq $20,0($17) # 4 1
mulq $20,$19,$5 # 4 2 1
subq $18,1,$18
umulh $20,$19,$20 # 4 2
addq $5,$0,$5 # 4 2 3 1
addq $16,8,$16
cmpult $5,$0,$0 # 4 2 3 2
addq $17,8,$17
addq $20,$0,$0 # 4 3 2
stq $5,-8($16) # 4 2 4
bgt $18,$145
ret $31,($26),1 # else exit
.align 4
$143:
addq $18,4,$18
bgt $18,$145 # goto tail code
ret $31,($26),1 # else exit
.end bn_mul_words
.align 3
.globl bn_sqr_words
.ent bn_sqr_words
bn_sqr_words:
bn_sqr_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $18,4,$18
blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
ldq $20,0($17) # 1 1
.align 3
$542:
mulq $20,$20,$5 ######
ldq $21,8($17) # 1 1
subq $18,4
umulh $20,$20,$1 ######
ldq $27,16($17) # 1 1
mulq $21,$21,$6 ######
ldq $28,24($17) # 1 1
stq $5,0($16) # r[0]
umulh $21,$21,$2 ######
stq $1,8($16) # r[1]
mulq $27,$27,$7 ######
stq $6,16($16) # r[0]
umulh $27,$27,$3 ######
stq $2,24($16) # r[1]
mulq $28,$28,$8 ######
stq $7,32($16) # r[0]
umulh $28,$28,$4 ######
stq $3,40($16) # r[1]
addq $16,64,$16
addq $17,32,$17
stq $8,-16($16) # r[0]
stq $4,-8($16) # r[1]
blt $18,$543
ldq $20,0($17) # 1 1
br $542
$442:
ldq $20,0($17) # a[0]
mulq $20,$20,$5 # a[0]*w low part r2
addq $16,16,$16
addq $17,8,$17
subq $18,1,$18
umulh $20,$20,$1 # a[0]*w high part r3
stq $5,-16($16) # r[0]
stq $1,-8($16) # r[1]
bgt $18,$442
ret $31,($26),1 # else exit
.align 4
$543:
addq $18,4,$18
bgt $18,$442 # goto tail code
ret $31,($26),1 # else exit
.end bn_sqr_words
.align 3
.globl bn_add_words
.ent bn_add_words
bn_add_words:
bn_add_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $19,4,$19
bis $31,$31,$0 # carry = 0
blt $19,$900
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
.align 3
$901:
addq $1,$5,$1 # r=a+b;
ldq $6,8($17) # a[1]
cmpult $1,$5,$22 # did we overflow?
ldq $2,8($18) # b[1]
addq $1,$0,$1 # c+= overflow
ldq $7,16($17) # a[2]
cmpult $1,$0,$0 # overflow?
ldq $3,16($18) # b[2]
addq $0,$22,$0
ldq $8,24($17) # a[3]
addq $2,$6,$2 # r=a+b;
ldq $4,24($18) # b[3]
cmpult $2,$6,$23 # did we overflow?
addq $3,$7,$3 # r=a+b;
addq $2,$0,$2 # c+= overflow
cmpult $3,$7,$24 # did we overflow?
cmpult $2,$0,$0 # overflow?
addq $4,$8,$4 # r=a+b;
addq $0,$23,$0
cmpult $4,$8,$25 # did we overflow?
addq $3,$0,$3 # c+= overflow
stq $1,0($16) # r[0]=c
cmpult $3,$0,$0 # overflow?
stq $2,8($16) # r[1]=c
addq $0,$24,$0
stq $3,16($16) # r[2]=c
addq $4,$0,$4 # c+= overflow
subq $19,4,$19 # loop--
cmpult $4,$0,$0 # overflow?
addq $17,32,$17 # a++
addq $0,$25,$0
stq $4,24($16) # r[3]=c
addq $18,32,$18 # b++
addq $16,32,$16 # r++
blt $19,$900
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
br $901
.align 4
$945:
ldq $5,0($17) # a[0]
ldq $1,0($18) # b[1]
addq $1,$5,$1 # r=a+b;
subq $19,1,$19 # loop--
addq $1,$0,$1 # c+= overflow
addq $17,8,$17 # a++
cmpult $1,$5,$22 # did we overflow?
cmpult $1,$0,$0 # overflow?
addq $18,8,$18 # b++
stq $1,0($16) # r[0]=c
addq $0,$22,$0
addq $16,8,$16 # r++
bgt $19,$945
ret $31,($26),1 # else exit
$900:
addq $19,4,$19
bgt $19,$945 # goto tail code
ret $31,($26),1 # else exit
.end bn_add_words
#
# What follows was taken directly from the C compiler with a few
# hacks to redo the lables.
#
.text
.align 3
.globl bn_div64
.ent bn_div64
bn_div64:
ldgp $29,0($27)
bn_div64..ng:
lda $30,-48($30)
.frame $30,48,$26,0
stq $26,0($30)
stq $9,8($30)
stq $10,16($30)
stq $11,24($30)
stq $12,32($30)
stq $13,40($30)
.mask 0x4003e00,-48
.prologue 1
bis $16,$16,$9
bis $17,$17,$10
bis $18,$18,$11
bis $31,$31,$13
bis $31,2,$12
bne $11,$119
lda $0,-1
br $31,$136
.align 4
$119:
bis $11,$11,$16
jsr $26,BN_num_bits_word
ldgp $29,0($26)
subq $0,64,$1
beq $1,$120
bis $31,1,$1
sll $1,$0,$1
cmpule $9,$1,$1
bne $1,$120
# lda $16,_IO_stderr_
# lda $17,$C32
# bis $0,$0,$18
# jsr $26,fprintf
# ldgp $29,0($26)
jsr $26,abort
ldgp $29,0($26)
.align 4
$120:
bis $31,64,$3
cmpult $9,$11,$2
subq $3,$0,$1
addl $1,$31,$0
subq $9,$11,$1
cmoveq $2,$1,$9
beq $0,$122
zapnot $0,15,$2
subq $3,$0,$1
sll $11,$2,$11
sll $9,$2,$3
srl $10,$1,$1
sll $10,$2,$10
bis $3,$1,$9
$122:
srl $11,32,$5
zapnot $11,15,$6
lda $7,-1
.align 5
$123:
srl $9,32,$1
subq $1,$5,$1
bne $1,$126
zapnot $7,15,$27
br $31,$127
.align 4
$126:
bis $9,$9,$24
bis $5,$5,$25
divqu $24,$25,$27
$127:
srl $10,32,$4
.align 5
$128:
mulq $27,$5,$1
subq $9,$1,$3
zapnot $3,240,$1
bne $1,$129
mulq $6,$27,$2
sll $3,32,$1
addq $1,$4,$1
cmpule $2,$1,$2
bne $2,$129
subq $27,1,$27
br $31,$128
.align 4
$129:
mulq $27,$6,$1
mulq $27,$5,$4
srl $1,32,$3
sll $1,32,$1
addq $4,$3,$4
cmpult $10,$1,$2
subq $10,$1,$10
addq $2,$4,$2
cmpult $9,$2,$1
bis $2,$2,$4
beq $1,$134
addq $9,$11,$9
subq $27,1,$27
$134:
subl $12,1,$12
subq $9,$4,$9
beq $12,$124
sll $27,32,$13
sll $9,32,$2
srl $10,32,$1
sll $10,32,$10
bis $2,$1,$9
br $31,$123
.align 4
$124:
bis $13,$27,$0
$136:
ldq $26,0($30)
ldq $9,8($30)
ldq $10,16($30)
ldq $11,24($30)
ldq $12,32($30)
ldq $13,40($30)
addq $30,48,$30
ret $31,($26),1
.end bn_div64
.set noat
.text
.align 3
.globl bn_sub_words
.ent bn_sub_words
bn_sub_words:
bn_sub_words..ng:
.frame $30,0,$26,0
.prologue 0
subq $19, 4, $19
bis $31, $31, $0
blt $19, $100
ldq $1, 0($17)
ldq $2, 0($18)
$101:
ldq $3, 8($17)
cmpult $1, $2, $4
ldq $5, 8($18)
subq $1, $2, $1
ldq $6, 16($17)
cmpult $1, $0, $2
ldq $7, 16($18)
subq $1, $0, $23
ldq $8, 24($17)
addq $2, $4, $0
cmpult $3, $5, $24
subq $3, $5, $3
ldq $22, 24($18)
cmpult $3, $0, $5
subq $3, $0, $25
addq $5, $24, $0
cmpult $6, $7, $27
subq $6, $7, $6
stq $23, 0($16)
cmpult $6, $0, $7
subq $6, $0, $28
addq $7, $27, $0
cmpult $8, $22, $21
subq $8, $22, $8
stq $25, 8($16)
cmpult $8, $0, $22
subq $8, $0, $20
addq $22, $21, $0
stq $28, 16($16)
subq $19, 4, $19
stq $20, 24($16)
addq $17, 32, $17
addq $18, 32, $18
addq $16, 32, $16
blt $19, $100
ldq $1, 0($17)
ldq $2, 0($18)
br $101
$102:
ldq $1, 0($17)
ldq $2, 0($18)
cmpult $1, $2, $27
subq $1, $2, $1
cmpult $1, $0, $2
subq $1, $0, $1
stq $1, 0($16)
addq $2, $27, $0
addq $17, 8, $17
addq $18, 8, $18
addq $16, 8, $16
subq $19, 1, $19
bgt $19, $102
ret $31,($26),1
$100:
addq $19, 4, $19
bgt $19, $102
$103:
ret $31,($26),1
.end bn_sub_words