This update gets endianness-neutrality right and adds second required

entry point, md5_block_asm_data_order.
This commit is contained in:
Andy Polyakov 2005-07-19 22:33:03 +00:00
parent 7e4d335943
commit 0f04379d9c

View file

@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define pPad2 p12
#define pPad3 p13
#define pSkip p8
// This two below shall remain constant througout whole routine
#define pDataOrder p14
#define pHostOrder p15
#define A_ out24
#define B_ out25
@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define _NOUTPUT 0
#define _NROTATE 24 /* this must be <= _NINPUTS */
#if defined(_HPUX_SOURCE) && !defined(_LP64)
#define ADDP addp4
#else
#define ADDP add
#endif
// Macros for getting the left and right portions of little-endian words
@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LCSave r21
#define PFSSave r20
#define PRSave r22
#define pAgain p14
#define pOff p14
.rodata
// Values are specified as bytes to ensure they are
// in little-endian byte-order.
.align 4
md5_round_constants:
data1 0x78, 0xa4, 0x6a, 0xd7 // 0
data1 0x56, 0xb7, 0xc7, 0xe8 // 1
data1 0xdb, 0x70, 0x20, 0x24 // 2
data1 0xee, 0xce, 0xbd, 0xc1 // 3
data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
data1 0x2a, 0xc6, 0x87, 0x47 // 5
data1 0x13, 0x46, 0x30, 0xa8 // 6
data1 0x01, 0x95, 0x46, 0xfd // 7
data1 0xd8, 0x98, 0x80, 0x69 // 8
data1 0xaf, 0xf7, 0x44, 0x8b // 9
data1 0xb1, 0x5b, 0xff, 0xff // 10
data1 0xbe, 0xd7, 0x5c, 0x89 // 11
data1 0x22, 0x11, 0x90, 0x6b // 12
data1 0x93, 0x71, 0x98, 0xfd // 13
data1 0x8e, 0x43, 0x79, 0xa6 // 14
data1 0x21, 0x08, 0xb4, 0x49 // 15
data1 0x62, 0x25, 0x1e, 0xf6 // 16
data1 0x40, 0xb3, 0x40, 0xc0 // 17
data1 0x51, 0x5a, 0x5e, 0x26 // 18
data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
data1 0x5d, 0x10, 0x2f, 0xd6 // 20
data1 0x53, 0x14, 0x44, 0x02 // 21
data1 0x81, 0xe6, 0xa1, 0xd8 // 22
data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
data1 0xe6, 0xcd, 0xe1, 0x21 // 24
data1 0xd6, 0x07, 0x37, 0xc3 // 25
data1 0x87, 0x0d, 0xd5, 0xf4 // 26
data1 0xed, 0x14, 0x5a, 0x45 // 27
data1 0x05, 0xe9, 0xe3, 0xa9 // 28
data1 0xf8, 0xa3, 0xef, 0xfc // 29
data1 0xd9, 0x02, 0x6f, 0x67 // 30
data1 0x8a, 0x4c, 0x2a, 0x8d // 31
data1 0x42, 0x39, 0xfa, 0xff // 32
data1 0x81, 0xf6, 0x71, 0x87 // 33
data1 0x22, 0x61, 0x9d, 0x6d // 34
data1 0x0c, 0x38, 0xe5, 0xfd // 35
data1 0x44, 0xea, 0xbe, 0xa4 // 36
data1 0xa9, 0xcf, 0xde, 0x4b // 37
data1 0x60, 0x4b, 0xbb, 0xf6 // 38
data1 0x70, 0xbc, 0xbf, 0xbe // 39
data1 0xc6, 0x7e, 0x9b, 0x28 // 40
data1 0xfa, 0x27, 0xa1, 0xea // 41
data1 0x85, 0x30, 0xef, 0xd4 // 42
data1 0x05, 0x1d, 0x88, 0x04 // 43
data1 0x39, 0xd0, 0xd4, 0xd9 // 44
data1 0xe5, 0x99, 0xdb, 0xe6 // 45
data1 0xf8, 0x7c, 0xa2, 0x1f // 46
data1 0x65, 0x56, 0xac, 0xc4 // 47
data1 0x44, 0x22, 0x29, 0xf4 // 48
data1 0x97, 0xff, 0x2a, 0x43 // 49
data1 0xa7, 0x23, 0x94, 0xab // 50
data1 0x39, 0xa0, 0x93, 0xfc // 51
data1 0xc3, 0x59, 0x5b, 0x65 // 52
data1 0x92, 0xcc, 0x0c, 0x8f // 53
data1 0x7d, 0xf4, 0xef, 0xff // 54
data1 0xd1, 0x5d, 0x84, 0x85 // 55
data1 0x4f, 0x7e, 0xa8, 0x6f // 56
data1 0xe0, 0xe6, 0x2c, 0xfe // 57
data1 0x14, 0x43, 0x01, 0xa3 // 58
data1 0xa1, 0x11, 0x08, 0x4e // 59
data1 0x82, 0x7e, 0x53, 0xf7 // 60
data1 0x35, 0xf2, 0x3a, 0xbd // 61
data1 0xbb, 0xd2, 0xd7, 0x2a // 62
data1 0x91, 0xd3, 0x86, 0xeb // 63
#define pAgain p63
#define pOff p63
.text
@ -320,52 +258,47 @@ md5_round_constants:
*/
.type md5_block_asm_data_order, @function
.global md5_block_asm_data_order
.align 32
.proc md5_block_asm_data_order
md5_block_asm_data_order:
{ .mib
cmp.eq pDataOrder,pHostOrder = r0,r0
br.sptk.many .md5_block
};;
.endp md5_block_asm_data_order
.type md5_block_asm_host_order, @function
.global md5_block_asm_host_order
.align 32
.proc md5_block_asm_host_order
md5_block_asm_host_order:
.prologue
#ifndef __LP64__
{ .mib
cmp.eq pHostOrder,pDataOrder = r0,r0
};;
.md5_block:
{ .mmi
.save ar.pfs, PFSSave
.save ar.pfs, PFSSave
alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
addp4 DPtrIn = 0, DPtrIn
addp4 CtxPtr0 = 0, CtxPtr0
ADDP CtxPtr1 = 8, CtxPtr0
mov CTable = ip
}
{ .mmi
ADDP DPtrIn = 0, DPtrIn
ADDP CtxPtr0 = 0, CtxPtr0
.save ar.lc, LCSave
mov LCSave = ar.lc
}
;;
.pred.rel "mutex",pDataOrder,pHostOrder
{ .mmi
nop 0x0
(pDataOrder) add CTable = .md5_tbl_data_order#-.md5_block#, CTable
(pHostOrder) add CTable = .md5_tbl_host_order#-.md5_block#, CTable
and InAlign = 0x3, DPtrIn
.save ar.lc, LCSave
mov LCSave = ar.lc
}
#else
{ .mmi
.save ar.pfs, PFSSave
alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
and InAlign = 0x3, DPtrIn
.save ar.lc, LCSave
mov LCSave = ar.lc
}
#endif
{ .mmi
addl CTable = @ltoffx(md5_round_constants), gp
;;
ld8.mov CTable = [CTable], md5_round_constants // native byte-order
add CtxPtr1 = 8, CtxPtr0
}
#ifdef B_ENDIAN
{
.mmi
rum psr.be // switch to little-endian mode
nop.m 0x0
nop.i 0x0
}
#endif
;;
{ .mmi
ld4 AccumA = [CtxPtr0], 4
ld4 AccumC = [CtxPtr1], 4
@ -379,15 +312,12 @@ md5_block_asm_host_order:
ld4 AccumD = [CtxPtr1]
dep DPtr_ = 0, DPtrIn, 0, 2
} ;;
{ .mmi
#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
(pDataOrder) rum psr.be;; // switch to little-endian
#endif
{ .mmb
ld4 CTable0 = [CTable], 4
cmp.ne pOff, p0 = 0, InAlign
} ;;
{ .mib
nop.m 0x0
nop.i 0x0
(pOff) br.cond.spnt.many .md5_unaligned
} ;;
@ -431,9 +361,9 @@ md5_block_asm_host_order:
} ;;
.md5_exit:
// Note that we switch back to the entry endianess AFTER storing so
// that the memory image of the hash is preserved.
#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
(pDataOrder) sum psr.be;; // switch back to big-endian mode
#endif
{ .mmi
st4 [CtxPtr0] = AccumB, -4
st4 [CtxPtr1] = AccumD, -4
@ -445,9 +375,6 @@ md5_block_asm_host_order:
mov ar.lc = LCSave
} ;;
{ .mib
#ifdef B_ENDIAN
sum psr.be // switch back to big-endian mode
#endif
mov ar.pfs = PFSSave
br.ret.sptk.few rp
} ;;
@ -1001,9 +928,99 @@ md5_digest_block##offset: \
nop 0x0 ; \
nop 0x0 ; \
br.cond.sptk.many md5_digest_GHI ; \
} ; \
} ;; \
.endp md5digestBlock ## offset
MD5FBLOCK(1)
MD5FBLOCK(2)
MD5FBLOCK(3)
.align 64
.type md5_constants, @object
md5_constants:
.md5_tbl_data_order: // To ensure little-endian data
// order, code as bytes.
data1 0x78, 0xa4, 0x6a, 0xd7 // 0
data1 0x56, 0xb7, 0xc7, 0xe8 // 1
data1 0xdb, 0x70, 0x20, 0x24 // 2
data1 0xee, 0xce, 0xbd, 0xc1 // 3
data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
data1 0x2a, 0xc6, 0x87, 0x47 // 5
data1 0x13, 0x46, 0x30, 0xa8 // 6
data1 0x01, 0x95, 0x46, 0xfd // 7
data1 0xd8, 0x98, 0x80, 0x69 // 8
data1 0xaf, 0xf7, 0x44, 0x8b // 9
data1 0xb1, 0x5b, 0xff, 0xff // 10
data1 0xbe, 0xd7, 0x5c, 0x89 // 11
data1 0x22, 0x11, 0x90, 0x6b // 12
data1 0x93, 0x71, 0x98, 0xfd // 13
data1 0x8e, 0x43, 0x79, 0xa6 // 14
data1 0x21, 0x08, 0xb4, 0x49 // 15
data1 0x62, 0x25, 0x1e, 0xf6 // 16
data1 0x40, 0xb3, 0x40, 0xc0 // 17
data1 0x51, 0x5a, 0x5e, 0x26 // 18
data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
data1 0x5d, 0x10, 0x2f, 0xd6 // 20
data1 0x53, 0x14, 0x44, 0x02 // 21
data1 0x81, 0xe6, 0xa1, 0xd8 // 22
data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
data1 0xe6, 0xcd, 0xe1, 0x21 // 24
data1 0xd6, 0x07, 0x37, 0xc3 // 25
data1 0x87, 0x0d, 0xd5, 0xf4 // 26
data1 0xed, 0x14, 0x5a, 0x45 // 27
data1 0x05, 0xe9, 0xe3, 0xa9 // 28
data1 0xf8, 0xa3, 0xef, 0xfc // 29
data1 0xd9, 0x02, 0x6f, 0x67 // 30
data1 0x8a, 0x4c, 0x2a, 0x8d // 31
data1 0x42, 0x39, 0xfa, 0xff // 32
data1 0x81, 0xf6, 0x71, 0x87 // 33
data1 0x22, 0x61, 0x9d, 0x6d // 34
data1 0x0c, 0x38, 0xe5, 0xfd // 35
data1 0x44, 0xea, 0xbe, 0xa4 // 36
data1 0xa9, 0xcf, 0xde, 0x4b // 37
data1 0x60, 0x4b, 0xbb, 0xf6 // 38
data1 0x70, 0xbc, 0xbf, 0xbe // 39
data1 0xc6, 0x7e, 0x9b, 0x28 // 40
data1 0xfa, 0x27, 0xa1, 0xea // 41
data1 0x85, 0x30, 0xef, 0xd4 // 42
data1 0x05, 0x1d, 0x88, 0x04 // 43
data1 0x39, 0xd0, 0xd4, 0xd9 // 44
data1 0xe5, 0x99, 0xdb, 0xe6 // 45
data1 0xf8, 0x7c, 0xa2, 0x1f // 46
data1 0x65, 0x56, 0xac, 0xc4 // 47
data1 0x44, 0x22, 0x29, 0xf4 // 48
data1 0x97, 0xff, 0x2a, 0x43 // 49
data1 0xa7, 0x23, 0x94, 0xab // 50
data1 0x39, 0xa0, 0x93, 0xfc // 51
data1 0xc3, 0x59, 0x5b, 0x65 // 52
data1 0x92, 0xcc, 0x0c, 0x8f // 53
data1 0x7d, 0xf4, 0xef, 0xff // 54
data1 0xd1, 0x5d, 0x84, 0x85 // 55
data1 0x4f, 0x7e, 0xa8, 0x6f // 56
data1 0xe0, 0xe6, 0x2c, 0xfe // 57
data1 0x14, 0x43, 0x01, 0xa3 // 58
data1 0xa1, 0x11, 0x08, 0x4e // 59
data1 0x82, 0x7e, 0x53, 0xf7 // 60
data1 0x35, 0xf2, 0x3a, 0xbd // 61
data1 0xbb, 0xd2, 0xd7, 0x2a // 62
data1 0x91, 0xd3, 0x86, 0xeb // 63
.md5_tbl_host_order: // OS data order, might as well
// be little-endian.
data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee // 0
data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 // 4
data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be // 8
data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 // 12
data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa // 16
data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 // 20
data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed // 24
data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a // 28
data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c // 32
data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 // 36
data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 // 40
data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 // 44
data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 // 48
data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 // 52
data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 // 56
data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 // 60
.size md5_constants#,64*4*2