This update gets endianness-neutrality right and adds second required
entry point, md5_block_asm_data_order.
This commit is contained in:
parent
7e4d335943
commit
0f04379d9c
1 changed files with 135 additions and 118 deletions
|
@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||
#define pPad2 p12
|
||||
#define pPad3 p13
|
||||
#define pSkip p8
|
||||
// This two below shall remain constant througout whole routine
|
||||
#define pDataOrder p14
|
||||
#define pHostOrder p15
|
||||
|
||||
#define A_ out24
|
||||
#define B_ out25
|
||||
|
@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||
#define _NOUTPUT 0
|
||||
#define _NROTATE 24 /* this must be <= _NINPUTS */
|
||||
|
||||
#if defined(_HPUX_SOURCE) && !defined(_LP64)
|
||||
#define ADDP addp4
|
||||
#else
|
||||
#define ADDP add
|
||||
#endif
|
||||
|
||||
// Macros for getting the left and right portions of little-endian words
|
||||
|
||||
|
@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||
#define LCSave r21
|
||||
#define PFSSave r20
|
||||
#define PRSave r22
|
||||
#define pAgain p14
|
||||
#define pOff p14
|
||||
|
||||
.rodata
|
||||
// Values are specified as bytes to ensure they are
|
||||
// in little-endian byte-order.
|
||||
.align 4
|
||||
md5_round_constants:
|
||||
data1 0x78, 0xa4, 0x6a, 0xd7 // 0
|
||||
data1 0x56, 0xb7, 0xc7, 0xe8 // 1
|
||||
data1 0xdb, 0x70, 0x20, 0x24 // 2
|
||||
data1 0xee, 0xce, 0xbd, 0xc1 // 3
|
||||
data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
|
||||
data1 0x2a, 0xc6, 0x87, 0x47 // 5
|
||||
data1 0x13, 0x46, 0x30, 0xa8 // 6
|
||||
data1 0x01, 0x95, 0x46, 0xfd // 7
|
||||
data1 0xd8, 0x98, 0x80, 0x69 // 8
|
||||
data1 0xaf, 0xf7, 0x44, 0x8b // 9
|
||||
data1 0xb1, 0x5b, 0xff, 0xff // 10
|
||||
data1 0xbe, 0xd7, 0x5c, 0x89 // 11
|
||||
data1 0x22, 0x11, 0x90, 0x6b // 12
|
||||
data1 0x93, 0x71, 0x98, 0xfd // 13
|
||||
data1 0x8e, 0x43, 0x79, 0xa6 // 14
|
||||
data1 0x21, 0x08, 0xb4, 0x49 // 15
|
||||
data1 0x62, 0x25, 0x1e, 0xf6 // 16
|
||||
data1 0x40, 0xb3, 0x40, 0xc0 // 17
|
||||
data1 0x51, 0x5a, 0x5e, 0x26 // 18
|
||||
data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
|
||||
data1 0x5d, 0x10, 0x2f, 0xd6 // 20
|
||||
data1 0x53, 0x14, 0x44, 0x02 // 21
|
||||
data1 0x81, 0xe6, 0xa1, 0xd8 // 22
|
||||
data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
|
||||
data1 0xe6, 0xcd, 0xe1, 0x21 // 24
|
||||
data1 0xd6, 0x07, 0x37, 0xc3 // 25
|
||||
data1 0x87, 0x0d, 0xd5, 0xf4 // 26
|
||||
data1 0xed, 0x14, 0x5a, 0x45 // 27
|
||||
data1 0x05, 0xe9, 0xe3, 0xa9 // 28
|
||||
data1 0xf8, 0xa3, 0xef, 0xfc // 29
|
||||
data1 0xd9, 0x02, 0x6f, 0x67 // 30
|
||||
data1 0x8a, 0x4c, 0x2a, 0x8d // 31
|
||||
data1 0x42, 0x39, 0xfa, 0xff // 32
|
||||
data1 0x81, 0xf6, 0x71, 0x87 // 33
|
||||
data1 0x22, 0x61, 0x9d, 0x6d // 34
|
||||
data1 0x0c, 0x38, 0xe5, 0xfd // 35
|
||||
data1 0x44, 0xea, 0xbe, 0xa4 // 36
|
||||
data1 0xa9, 0xcf, 0xde, 0x4b // 37
|
||||
data1 0x60, 0x4b, 0xbb, 0xf6 // 38
|
||||
data1 0x70, 0xbc, 0xbf, 0xbe // 39
|
||||
data1 0xc6, 0x7e, 0x9b, 0x28 // 40
|
||||
data1 0xfa, 0x27, 0xa1, 0xea // 41
|
||||
data1 0x85, 0x30, 0xef, 0xd4 // 42
|
||||
data1 0x05, 0x1d, 0x88, 0x04 // 43
|
||||
data1 0x39, 0xd0, 0xd4, 0xd9 // 44
|
||||
data1 0xe5, 0x99, 0xdb, 0xe6 // 45
|
||||
data1 0xf8, 0x7c, 0xa2, 0x1f // 46
|
||||
data1 0x65, 0x56, 0xac, 0xc4 // 47
|
||||
data1 0x44, 0x22, 0x29, 0xf4 // 48
|
||||
data1 0x97, 0xff, 0x2a, 0x43 // 49
|
||||
data1 0xa7, 0x23, 0x94, 0xab // 50
|
||||
data1 0x39, 0xa0, 0x93, 0xfc // 51
|
||||
data1 0xc3, 0x59, 0x5b, 0x65 // 52
|
||||
data1 0x92, 0xcc, 0x0c, 0x8f // 53
|
||||
data1 0x7d, 0xf4, 0xef, 0xff // 54
|
||||
data1 0xd1, 0x5d, 0x84, 0x85 // 55
|
||||
data1 0x4f, 0x7e, 0xa8, 0x6f // 56
|
||||
data1 0xe0, 0xe6, 0x2c, 0xfe // 57
|
||||
data1 0x14, 0x43, 0x01, 0xa3 // 58
|
||||
data1 0xa1, 0x11, 0x08, 0x4e // 59
|
||||
data1 0x82, 0x7e, 0x53, 0xf7 // 60
|
||||
data1 0x35, 0xf2, 0x3a, 0xbd // 61
|
||||
data1 0xbb, 0xd2, 0xd7, 0x2a // 62
|
||||
data1 0x91, 0xd3, 0x86, 0xeb // 63
|
||||
#define pAgain p63
|
||||
#define pOff p63
|
||||
|
||||
.text
|
||||
|
||||
|
@ -320,52 +258,47 @@ md5_round_constants:
|
|||
|
||||
*/
|
||||
|
||||
.type md5_block_asm_data_order, @function
|
||||
.global md5_block_asm_data_order
|
||||
.align 32
|
||||
.proc md5_block_asm_data_order
|
||||
md5_block_asm_data_order:
|
||||
{ .mib
|
||||
cmp.eq pDataOrder,pHostOrder = r0,r0
|
||||
br.sptk.many .md5_block
|
||||
};;
|
||||
.endp md5_block_asm_data_order
|
||||
|
||||
.type md5_block_asm_host_order, @function
|
||||
.global md5_block_asm_host_order
|
||||
|
||||
.align 32
|
||||
.proc md5_block_asm_host_order
|
||||
md5_block_asm_host_order:
|
||||
.prologue
|
||||
#ifndef __LP64__
|
||||
{ .mib
|
||||
cmp.eq pHostOrder,pDataOrder = r0,r0
|
||||
};;
|
||||
.md5_block:
|
||||
{ .mmi
|
||||
.save ar.pfs, PFSSave
|
||||
.save ar.pfs, PFSSave
|
||||
alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
|
||||
addp4 DPtrIn = 0, DPtrIn
|
||||
addp4 CtxPtr0 = 0, CtxPtr0
|
||||
ADDP CtxPtr1 = 8, CtxPtr0
|
||||
mov CTable = ip
|
||||
}
|
||||
{ .mmi
|
||||
ADDP DPtrIn = 0, DPtrIn
|
||||
ADDP CtxPtr0 = 0, CtxPtr0
|
||||
.save ar.lc, LCSave
|
||||
mov LCSave = ar.lc
|
||||
}
|
||||
;;
|
||||
.pred.rel "mutex",pDataOrder,pHostOrder
|
||||
{ .mmi
|
||||
nop 0x0
|
||||
(pDataOrder) add CTable = .md5_tbl_data_order#-.md5_block#, CTable
|
||||
(pHostOrder) add CTable = .md5_tbl_host_order#-.md5_block#, CTable
|
||||
and InAlign = 0x3, DPtrIn
|
||||
.save ar.lc, LCSave
|
||||
mov LCSave = ar.lc
|
||||
}
|
||||
#else
|
||||
{ .mmi
|
||||
.save ar.pfs, PFSSave
|
||||
alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
|
||||
and InAlign = 0x3, DPtrIn
|
||||
.save ar.lc, LCSave
|
||||
mov LCSave = ar.lc
|
||||
}
|
||||
#endif
|
||||
|
||||
{ .mmi
|
||||
addl CTable = @ltoffx(md5_round_constants), gp
|
||||
;;
|
||||
ld8.mov CTable = [CTable], md5_round_constants // native byte-order
|
||||
add CtxPtr1 = 8, CtxPtr0
|
||||
}
|
||||
#ifdef B_ENDIAN
|
||||
{
|
||||
.mmi
|
||||
rum psr.be // switch to little-endian mode
|
||||
nop.m 0x0
|
||||
nop.i 0x0
|
||||
}
|
||||
#endif
|
||||
;;
|
||||
{ .mmi
|
||||
ld4 AccumA = [CtxPtr0], 4
|
||||
ld4 AccumC = [CtxPtr1], 4
|
||||
|
@ -379,15 +312,12 @@ md5_block_asm_host_order:
|
|||
ld4 AccumD = [CtxPtr1]
|
||||
dep DPtr_ = 0, DPtrIn, 0, 2
|
||||
} ;;
|
||||
|
||||
{ .mmi
|
||||
#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
|
||||
(pDataOrder) rum psr.be;; // switch to little-endian
|
||||
#endif
|
||||
{ .mmb
|
||||
ld4 CTable0 = [CTable], 4
|
||||
cmp.ne pOff, p0 = 0, InAlign
|
||||
} ;;
|
||||
|
||||
{ .mib
|
||||
nop.m 0x0
|
||||
nop.i 0x0
|
||||
(pOff) br.cond.spnt.many .md5_unaligned
|
||||
} ;;
|
||||
|
||||
|
@ -431,9 +361,9 @@ md5_block_asm_host_order:
|
|||
} ;;
|
||||
|
||||
.md5_exit:
|
||||
// Note that we switch back to the entry endianess AFTER storing so
|
||||
// that the memory image of the hash is preserved.
|
||||
|
||||
#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
|
||||
(pDataOrder) sum psr.be;; // switch back to big-endian mode
|
||||
#endif
|
||||
{ .mmi
|
||||
st4 [CtxPtr0] = AccumB, -4
|
||||
st4 [CtxPtr1] = AccumD, -4
|
||||
|
@ -445,9 +375,6 @@ md5_block_asm_host_order:
|
|||
mov ar.lc = LCSave
|
||||
} ;;
|
||||
{ .mib
|
||||
#ifdef B_ENDIAN
|
||||
sum psr.be // switch back to big-endian mode
|
||||
#endif
|
||||
mov ar.pfs = PFSSave
|
||||
br.ret.sptk.few rp
|
||||
} ;;
|
||||
|
@ -1001,9 +928,99 @@ md5_digest_block##offset: \
|
|||
nop 0x0 ; \
|
||||
nop 0x0 ; \
|
||||
br.cond.sptk.many md5_digest_GHI ; \
|
||||
} ; \
|
||||
} ;; \
|
||||
.endp md5digestBlock ## offset
|
||||
|
||||
MD5FBLOCK(1)
|
||||
MD5FBLOCK(2)
|
||||
MD5FBLOCK(3)
|
||||
|
||||
.align 64
|
||||
.type md5_constants, @object
|
||||
md5_constants:
|
||||
.md5_tbl_data_order: // To ensure little-endian data
|
||||
// order, code as bytes.
|
||||
data1 0x78, 0xa4, 0x6a, 0xd7 // 0
|
||||
data1 0x56, 0xb7, 0xc7, 0xe8 // 1
|
||||
data1 0xdb, 0x70, 0x20, 0x24 // 2
|
||||
data1 0xee, 0xce, 0xbd, 0xc1 // 3
|
||||
data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
|
||||
data1 0x2a, 0xc6, 0x87, 0x47 // 5
|
||||
data1 0x13, 0x46, 0x30, 0xa8 // 6
|
||||
data1 0x01, 0x95, 0x46, 0xfd // 7
|
||||
data1 0xd8, 0x98, 0x80, 0x69 // 8
|
||||
data1 0xaf, 0xf7, 0x44, 0x8b // 9
|
||||
data1 0xb1, 0x5b, 0xff, 0xff // 10
|
||||
data1 0xbe, 0xd7, 0x5c, 0x89 // 11
|
||||
data1 0x22, 0x11, 0x90, 0x6b // 12
|
||||
data1 0x93, 0x71, 0x98, 0xfd // 13
|
||||
data1 0x8e, 0x43, 0x79, 0xa6 // 14
|
||||
data1 0x21, 0x08, 0xb4, 0x49 // 15
|
||||
data1 0x62, 0x25, 0x1e, 0xf6 // 16
|
||||
data1 0x40, 0xb3, 0x40, 0xc0 // 17
|
||||
data1 0x51, 0x5a, 0x5e, 0x26 // 18
|
||||
data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
|
||||
data1 0x5d, 0x10, 0x2f, 0xd6 // 20
|
||||
data1 0x53, 0x14, 0x44, 0x02 // 21
|
||||
data1 0x81, 0xe6, 0xa1, 0xd8 // 22
|
||||
data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
|
||||
data1 0xe6, 0xcd, 0xe1, 0x21 // 24
|
||||
data1 0xd6, 0x07, 0x37, 0xc3 // 25
|
||||
data1 0x87, 0x0d, 0xd5, 0xf4 // 26
|
||||
data1 0xed, 0x14, 0x5a, 0x45 // 27
|
||||
data1 0x05, 0xe9, 0xe3, 0xa9 // 28
|
||||
data1 0xf8, 0xa3, 0xef, 0xfc // 29
|
||||
data1 0xd9, 0x02, 0x6f, 0x67 // 30
|
||||
data1 0x8a, 0x4c, 0x2a, 0x8d // 31
|
||||
data1 0x42, 0x39, 0xfa, 0xff // 32
|
||||
data1 0x81, 0xf6, 0x71, 0x87 // 33
|
||||
data1 0x22, 0x61, 0x9d, 0x6d // 34
|
||||
data1 0x0c, 0x38, 0xe5, 0xfd // 35
|
||||
data1 0x44, 0xea, 0xbe, 0xa4 // 36
|
||||
data1 0xa9, 0xcf, 0xde, 0x4b // 37
|
||||
data1 0x60, 0x4b, 0xbb, 0xf6 // 38
|
||||
data1 0x70, 0xbc, 0xbf, 0xbe // 39
|
||||
data1 0xc6, 0x7e, 0x9b, 0x28 // 40
|
||||
data1 0xfa, 0x27, 0xa1, 0xea // 41
|
||||
data1 0x85, 0x30, 0xef, 0xd4 // 42
|
||||
data1 0x05, 0x1d, 0x88, 0x04 // 43
|
||||
data1 0x39, 0xd0, 0xd4, 0xd9 // 44
|
||||
data1 0xe5, 0x99, 0xdb, 0xe6 // 45
|
||||
data1 0xf8, 0x7c, 0xa2, 0x1f // 46
|
||||
data1 0x65, 0x56, 0xac, 0xc4 // 47
|
||||
data1 0x44, 0x22, 0x29, 0xf4 // 48
|
||||
data1 0x97, 0xff, 0x2a, 0x43 // 49
|
||||
data1 0xa7, 0x23, 0x94, 0xab // 50
|
||||
data1 0x39, 0xa0, 0x93, 0xfc // 51
|
||||
data1 0xc3, 0x59, 0x5b, 0x65 // 52
|
||||
data1 0x92, 0xcc, 0x0c, 0x8f // 53
|
||||
data1 0x7d, 0xf4, 0xef, 0xff // 54
|
||||
data1 0xd1, 0x5d, 0x84, 0x85 // 55
|
||||
data1 0x4f, 0x7e, 0xa8, 0x6f // 56
|
||||
data1 0xe0, 0xe6, 0x2c, 0xfe // 57
|
||||
data1 0x14, 0x43, 0x01, 0xa3 // 58
|
||||
data1 0xa1, 0x11, 0x08, 0x4e // 59
|
||||
data1 0x82, 0x7e, 0x53, 0xf7 // 60
|
||||
data1 0x35, 0xf2, 0x3a, 0xbd // 61
|
||||
data1 0xbb, 0xd2, 0xd7, 0x2a // 62
|
||||
data1 0x91, 0xd3, 0x86, 0xeb // 63
|
||||
|
||||
.md5_tbl_host_order: // OS data order, might as well
|
||||
// be little-endian.
|
||||
data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee // 0
|
||||
data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 // 4
|
||||
data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be // 8
|
||||
data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 // 12
|
||||
data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa // 16
|
||||
data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 // 20
|
||||
data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed // 24
|
||||
data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a // 28
|
||||
data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c // 32
|
||||
data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 // 36
|
||||
data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 // 40
|
||||
data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 // 44
|
||||
data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 // 48
|
||||
data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 // 52
|
||||
data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 // 56
|
||||
data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 // 60
|
||||
.size md5_constants#,64*4*2
|
||||
|
|
Loading…
Reference in a new issue