Reorganize and speed up MD5.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
This commit is contained in:
parent
7d7d2cbcb0
commit
bd3576d2dd
10 changed files with 1872 additions and 345 deletions
3
CHANGES
3
CHANGES
|
@ -5,6 +5,9 @@
|
|||
|
||||
Changes between 0.9.2b and 0.9.3
|
||||
|
||||
*) Reorganize and speed up MD5.
|
||||
[Andy Polyakov <appro@fy.chalmers.se>]
|
||||
|
||||
*) VMS support.
|
||||
[Richard Levitte <richard@levitte.org>]
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ my %table=(
|
|||
# Solaris setups
|
||||
"solaris-x86-gcc","gcc:-O3 -fomit-frame-pointer -m486 -Wall -DL_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG $x86_gcc_des $x86_gcc_opts:$x86_sol_asm",
|
||||
"solaris-sparc-gcc","gcc:-O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8.o::",
|
||||
"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
|
||||
"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o:::asm/md5-sparcv8plus.o:",
|
||||
"debug-solaris-sparc-gcc","gcc:-O3 -g -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:::",
|
||||
"debug-solaris-usparc-gcc","gcc:-O3 -g -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
|
||||
|
||||
|
@ -115,12 +115,11 @@ my %table=(
|
|||
# SC4 is ok, better than gcc even on bn as long as you tell it -xarch=v8
|
||||
# -fast slows things like DES down quite a lot
|
||||
# Don't use -xtarget=ultra with SC4.2. It is broken, and will break exptest.
|
||||
# SC5.0 with the compiler common patch works.
|
||||
"solaris-sparc-sc4","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
|
||||
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
|
||||
# SC5.0 note: Compiler common patch 107357-01 or later is required!
|
||||
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
|
||||
"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::",
|
||||
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o:::asm/md5-sparcv8plus.o:",
|
||||
"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::::asm/md5-sparcv9.o:",
|
||||
|
||||
# Sunos configs, assuming sparc for the gcc one.
|
||||
##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::",
|
||||
|
|
592
crypto/md32_common.h
Normal file
592
crypto/md32_common.h
Normal file
|
@ -0,0 +1,592 @@
|
|||
/* crypto/md32_common.h */
|
||||
/* ====================================================================
|
||||
* Copyright (c) 1999 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* licensing@OpenSSL.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This product includes cryptographic software written by Eric Young
|
||||
* (eay@cryptsoft.com). This product includes software written by Tim
|
||||
* Hudson (tjh@cryptsoft.com).
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is a generic 32 bit "collector" for message digest algorithms.
|
||||
* Whenever needed it collects input character stream into chunks of
|
||||
* 32 bit values and invokes a block function that performs actual hash
|
||||
* calculations.
|
||||
*
|
||||
* Porting guide.
|
||||
*
|
||||
* Obligatory macros:
|
||||
*
|
||||
* DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
|
||||
* this macro defines byte order of input stream.
|
||||
* HASH_CBLOCK
|
||||
* size of a unit chunk HASH_BLOCK operates on.
|
||||
* HASH_LONG
|
||||
* has to be at lest 32 bit wide, if it's wider, then
|
||||
* HASH_LONG_LOG2 *has to* be defined along
|
||||
* HASH_CTX
|
||||
* context structure that at least contains following
|
||||
* members:
|
||||
* typedef struct {
|
||||
* ...
|
||||
* HASH_LONG Nl,Nh;
|
||||
* HASH_LONG data[HASH_LBLOCK];
|
||||
* int num;
|
||||
* ...
|
||||
* } HASH_CTX;
|
||||
* HASH_UPDATE
|
||||
* name of "Update" function, implemented here.
|
||||
* HASH_TRANSFORM
|
||||
* name of "Transform" function, implemented here.
|
||||
* HASH_FINAL
|
||||
* name of "Final" function, implemented here.
|
||||
* HASH_BLOCK_HOST_ORDER
|
||||
* name of "block" function treating *aligned* input message
|
||||
* in host byte order, implemented externally.
|
||||
* HASH_BLOCK_DATA_ORDER
|
||||
* name of "block" function treating *unaligned* input message
|
||||
* in original (data) byte order, implemented externally (it
|
||||
* actually is optional if data and host are of the same
|
||||
* "endianess").
|
||||
*
|
||||
* Optional macros:
|
||||
*
|
||||
* B_ENDIAN or L_ENDIAN
|
||||
* defines host byte-order.
|
||||
* HASH_LONG_LOG2
|
||||
* defaults to 2 if not states otherwise.
|
||||
* HASH_LBLOCK
|
||||
* assumed to be HASH_CBLOCK/4 if not stated otherwise.
|
||||
* HASH_BLOCK_DATA_ORDER_ALIGNED
|
||||
* alternative "block" function capable of treating
|
||||
* aligned input message in original (data) order,
|
||||
* implemented externally.
|
||||
*
|
||||
* MD5 example:
|
||||
*
|
||||
* #define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||
*
|
||||
* #define HASH_LONG MD5_LONG
|
||||
* #define HASH_LONG_LOG2 MD5_LONG_LOG2
|
||||
* #define HASH_CTX MD5_CTX
|
||||
* #define HASH_CBLOCK MD5_CBLOCK
|
||||
* #define HASH_LBLOCK MD5_LBLOCK
|
||||
* #define HASH_UPDATE MD5_Update
|
||||
* #define HASH_TRANSFORM MD5_Transform
|
||||
* #define HASH_FINAL MD5_Final
|
||||
* #define HASH_BLOCK_HOST_ORDER md5_block_host_order
|
||||
* #define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
|
||||
#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||
#error "DATA_ORDER must be defined!"
|
||||
#endif
|
||||
|
||||
#ifndef HASH_CBLOCK
|
||||
#error "HASH_CBLOCK must be defined!"
|
||||
#endif
|
||||
#ifndef HASH_LONG
|
||||
#error "HASH_LONG must be defined!"
|
||||
#endif
|
||||
#ifndef HASH_CTX
|
||||
#error "HASH_CTX must be defined!"
|
||||
#endif
|
||||
|
||||
#ifndef HASH_UPDATE
|
||||
#error "HASH_UPDATE must be defined!"
|
||||
#endif
|
||||
#ifndef HASH_TRANSFORM
|
||||
#error "HASH_TRANSFORM must be defined!"
|
||||
#endif
|
||||
#ifndef HASH_FINAL
|
||||
#error "HASH_FINAL must be defined!"
|
||||
#endif
|
||||
|
||||
#ifndef HASH_BLOCK_HOST_ORDER
|
||||
#error "HASH_BLOCK_HOST_ORDER must be defined!"
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
|
||||
* isn't defined.
|
||||
*/
|
||||
#ifndef HASH_BLOCK_DATA_ORDER
|
||||
#error "HASH_BLOCK_DATA_ORDER must be defined!"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef HASH_LBLOCK
|
||||
#define HASH_LBLOCK (HASH_CBLOCK/4)
|
||||
#endif
|
||||
|
||||
#ifndef HASH_LONG_LOG2
|
||||
#define HASH_LONG_LOG2 2
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Engage compiler specific rotate intrinsic function if available.
|
||||
*/
|
||||
#undef ROTATE
|
||||
#ifndef PEDANTIC
|
||||
# if defined(_MSC_VER)
|
||||
# define ROTATE(a,n) _lrotl(a,n)
|
||||
# elif defined(__GNUC__) && __GNUC__>=2
|
||||
/*
|
||||
* Some GNU C inline assembler templates. Note that these are
|
||||
* rotates by *constant* number of bits! But that's exactly
|
||||
* what we need here...
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
# if defined(__i386)
|
||||
# define ROTATE(a,n) ({ register unsigned int ret; \
|
||||
asm volatile ( \
|
||||
"roll %1,%0" \
|
||||
: "=r"(ret) \
|
||||
: "I"(n), "0"(a) \
|
||||
: "cc"); \
|
||||
ret; \
|
||||
})
|
||||
# elif defined(__powerpc)
|
||||
# define ROTATE(a,n) ({ register unsigned int ret; \
|
||||
asm volatile ( \
|
||||
"rlwinm %0,%1,%2,0,31" \
|
||||
: "=r"(ret) \
|
||||
: "r"(a), "I"(n)); \
|
||||
ret; \
|
||||
})
|
||||
# endif
|
||||
# endif
|
||||
|
||||
/*
|
||||
* Engage compiler specific "fetch in reverse byte order"
|
||||
* intrinsic function if available.
|
||||
*/
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
/* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
|
||||
# if defined(__i386) && !defined(I386_ONLY)
|
||||
# define BE_FETCH32(a) ({ register unsigned int l=(a);\
|
||||
asm volatile ( \
|
||||
"bswapl %0" \
|
||||
: "=r"(l) : "0"(l)); \
|
||||
l; \
|
||||
})
|
||||
# elif defined(__powerpc)
|
||||
# define LE_FETCH32(a) ({ register unsigned int l; \
|
||||
asm volatile ( \
|
||||
"lwbrx %0,0,%1" \
|
||||
: "=r"(l) \
|
||||
: "r"(a)); \
|
||||
l; \
|
||||
})
|
||||
|
||||
# elif defined(__sparc) && defined(ULTRASPARC)
|
||||
# define LE_FETCH32(a) ({ register unsigned int l; \
|
||||
asm volatile ( \
|
||||
"lda [%1]#ASI_PRIMARY_LITTLE,%0"\
|
||||
: "=r"(l) \
|
||||
: "r"(a)); \
|
||||
l; \
|
||||
})
|
||||
# endif
|
||||
# endif
|
||||
#endif /* PEDANTIC */
|
||||
|
||||
#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */
|
||||
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
|
||||
#ifdef ROTATE
|
||||
/* 5 instructions with rotate instruction, else 9 */
|
||||
#define REVERSE_FETCH32(a,l) ( \
|
||||
l=*(const HASH_LONG *)(a), \
|
||||
((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \
|
||||
)
|
||||
#else
|
||||
/* 6 instructions with rotate instruction, else 8 */
|
||||
#define REVERSE_FETCH32(a,l) ( \
|
||||
l=*(const HASH_LONG *)(a), \
|
||||
l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \
|
||||
ROTATE(l,16) \
|
||||
)
|
||||
/*
|
||||
* Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
|
||||
* It's rewritten as above for two reasons:
|
||||
* - RISCs aren't good at long constants and have to explicitely
|
||||
* compose 'em with several (well, usually 2) instructions in a
|
||||
* register before performing the actual operation and (as you
|
||||
* already realized:-) having same constant should inspire the
|
||||
* compiler to permanently allocate the only register for it;
|
||||
* - most modern CPUs have two ALUs, but usually only one has
|
||||
* circuitry for shifts:-( this minor tweak inspires compiler
|
||||
* to schedule shift instructions in a better way...
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ROTATE
|
||||
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
|
||||
* and HASH_BLOCK_HOST_ORDER ought to be the same if input data
|
||||
* and host are of the same "endianess". It's possible to mask
|
||||
* this with blank #define HASH_BLOCK_DATA_ORDER though...
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#if defined(B_ENDIAN)
|
||||
# if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
|
||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
|
||||
# endif
|
||||
# elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||
# ifndef HOST_FETCH32
|
||||
# ifdef LE_FETCH32
|
||||
# define HOST_FETCH32(p,l) LE_FETCH32(p)
|
||||
# elif defined(REVERSE_FETCH32)
|
||||
# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
#elif defined(L_ENDIAN)
|
||||
# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
|
||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
|
||||
# endif
|
||||
# elif defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||
# ifndef HOST_FETCH32
|
||||
# ifdef BE_FETCH32
|
||||
# define HOST_FETCH32(p,l) BE_FETCH32(p)
|
||||
# elif defined(REVERSE_FETCH32)
|
||||
# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
|
||||
#ifndef HASH_BLOCK_DATA_ORDER
|
||||
#error "HASH_BLOCK_DATA_ORDER must be defined!"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||
|
||||
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
|
||||
l|=(((unsigned long)(*((c)++)))<<16), \
|
||||
l|=(((unsigned long)(*((c)++)))<< 8), \
|
||||
l|=(((unsigned long)(*((c)++))) ), \
|
||||
l)
|
||||
#define HOST_p_c2l(c,l,n) { \
|
||||
switch (n) { \
|
||||
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
case 3: l|=((unsigned long)(*((c)++))); \
|
||||
} }
|
||||
#define HOST_p_c2l_p(c,l,sc,len) { \
|
||||
switch (sc) { \
|
||||
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
||||
if (--len == 0) break; \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
if (--len == 0) break; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
} }
|
||||
/* NOTE the pointer is not incremented at the end of this */
|
||||
#define HOST_c2l_p(c,l,n) { \
|
||||
l=0; (c)+=n; \
|
||||
switch (n) { \
|
||||
case 3: l =((unsigned long)(*(--(c))))<< 8; \
|
||||
case 2: l|=((unsigned long)(*(--(c))))<<16; \
|
||||
case 1: l|=((unsigned long)(*(--(c))))<<24; \
|
||||
} }
|
||||
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l) )&0xff), \
|
||||
l)
|
||||
|
||||
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||
|
||||
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
|
||||
l|=(((unsigned long)(*((c)++)))<< 8), \
|
||||
l|=(((unsigned long)(*((c)++)))<<16), \
|
||||
l|=(((unsigned long)(*((c)++)))<<24), \
|
||||
l)
|
||||
#define HOST_p_c2l(c,l,n) { \
|
||||
switch (n) { \
|
||||
case 0: l =((unsigned long)(*((c)++))); \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
case 3: l|=((unsigned long)(*((c)++)))<<24; \
|
||||
} }
|
||||
#define HOST_p_c2l_p(c,l,sc,len) { \
|
||||
switch (sc) { \
|
||||
case 0: l =((unsigned long)(*((c)++))); \
|
||||
if (--len == 0) break; \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
if (--len == 0) break; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
} }
|
||||
/* NOTE the pointer is not incremented at the end of this */
|
||||
#define HOST_c2l_p(c,l,n) { \
|
||||
l=0; (c)+=n; \
|
||||
switch (n) { \
|
||||
case 3: l =((unsigned long)(*(--(c))))<<16; \
|
||||
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
|
||||
case 1: l|=((unsigned long)(*(--(c)))); \
|
||||
} }
|
||||
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>>24)&0xff), \
|
||||
l)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Time for some action:-)
|
||||
*/
|
||||
|
||||
void HASH_UPDATE (HASH_CTX *c, const unsigned char *data, unsigned long len)
|
||||
{
|
||||
register HASH_LONG * p;
|
||||
register unsigned long l;
|
||||
int sw,sc,ew,ec;
|
||||
|
||||
if (len==0) return;
|
||||
|
||||
l=(c->Nl+(len<<3))&0xffffffffL;
|
||||
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
|
||||
* Wei Dai <weidai@eskimo.com> for pointing it out. */
|
||||
if (l < c->Nl) /* overflow */
|
||||
c->Nh++;
|
||||
c->Nh+=(len>>29);
|
||||
c->Nl=l;
|
||||
|
||||
if (c->num != 0)
|
||||
{
|
||||
p=c->data;
|
||||
sw=c->num>>2;
|
||||
sc=c->num&0x03;
|
||||
|
||||
if ((c->num+len) >= HASH_CBLOCK)
|
||||
{
|
||||
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
|
||||
for (; sw<HASH_LBLOCK; sw++)
|
||||
{
|
||||
HOST_c2l(data,l); p[sw]=l;
|
||||
}
|
||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
||||
len-=(HASH_CBLOCK-c->num);
|
||||
c->num=0;
|
||||
/* drop through and do the rest */
|
||||
}
|
||||
else
|
||||
{
|
||||
c->num+=len;
|
||||
if ((sc+len) < 4) /* ugly, add char's to a word */
|
||||
{
|
||||
l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
|
||||
}
|
||||
else
|
||||
{
|
||||
ew=(c->num>>2);
|
||||
ec=(c->num&0x03);
|
||||
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
|
||||
for (; sw < ew; sw++)
|
||||
{
|
||||
HOST_c2l(data,l); p[sw]=l;
|
||||
}
|
||||
if (ec)
|
||||
{
|
||||
HOST_c2l_p(data,l,ec); p[sw]=l;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sw=len/HASH_CBLOCK;
|
||||
if (sw > 0)
|
||||
{
|
||||
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
|
||||
/*
|
||||
* Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
|
||||
* only if sizeof(HASH_LONG)==4.
|
||||
*/
|
||||
if ((((unsigned long)data)%4) == 0)
|
||||
{
|
||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,sw);
|
||||
sw*=HASH_CBLOCK;
|
||||
data+=sw;
|
||||
len-=sw;
|
||||
}
|
||||
else
|
||||
#if !defined(HASH_BLOCK_DATA_ORDER)
|
||||
while (sw--)
|
||||
{
|
||||
memcpy (p=c->data,data,HASH_CBLOCK);
|
||||
HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
|
||||
data+=HASH_CBLOCK;
|
||||
len-=HASH_CBLOCK;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if defined(HASH_BLOCK_DATA_ORDER)
|
||||
{
|
||||
HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,sw);
|
||||
sw*=HASH_CBLOCK;
|
||||
data+=sw;
|
||||
len-=sw;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (len!=0)
|
||||
{
|
||||
p = c->data;
|
||||
c->num = len;
|
||||
ew=len>>2; /* words to copy */
|
||||
ec=len&0x03;
|
||||
for (; ew; ew--,p++)
|
||||
{
|
||||
HOST_c2l(data,l); *p=l;
|
||||
}
|
||||
HOST_c2l_p(data,l,ec);
|
||||
*p=l;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void HASH_TRANSFORM (HASH_CTX *c, unsigned char *data)
|
||||
{
|
||||
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
|
||||
if ((((unsigned long)data)%4) == 0)
|
||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,1);
|
||||
else
|
||||
#if !defined(HASH_BLOCK_DATA_ORDER)
|
||||
{
|
||||
memcpy (c->data,data,HASH_CBLOCK);
|
||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if defined(HASH_BLOCK_DATA_ORDER)
|
||||
HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,1);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void HASH_FINAL (unsigned char *md, HASH_CTX *c)
|
||||
{
|
||||
register HASH_LONG *p;
|
||||
register unsigned long l;
|
||||
register int i,j;
|
||||
static const unsigned char end[4]={0x80,0x00,0x00,0x00};
|
||||
const unsigned char *cp=end;
|
||||
|
||||
/* c->num should definitly have room for at least one more byte. */
|
||||
p=c->data;
|
||||
i=c->num>>2;
|
||||
j=c->num&0x03;
|
||||
|
||||
#if 0
|
||||
/* purify often complains about the following line as an
|
||||
* Uninitialized Memory Read. While this can be true, the
|
||||
* following p_c2l macro will reset l when that case is true.
|
||||
* This is because j&0x03 contains the number of 'valid' bytes
|
||||
* already in p[i]. If and only if j&0x03 == 0, the UMR will
|
||||
* occur but this is also the only time p_c2l will do
|
||||
* l= *(cp++) instead of l|= *(cp++)
|
||||
* Many thanks to Alex Tang <altitude@cic.net> for pickup this
|
||||
* 'potential bug' */
|
||||
#ifdef PURIFY
|
||||
if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
|
||||
#endif
|
||||
l=p[i];
|
||||
#else
|
||||
l = (j==0) ? 0 : p[i];
|
||||
#endif
|
||||
HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
|
||||
|
||||
if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
|
||||
{
|
||||
if (i<HASH_LBLOCK) p[i]=0;
|
||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
||||
i=0;
|
||||
}
|
||||
for (; i<(HASH_LBLOCK-2); i++)
|
||||
p[i]=0;
|
||||
|
||||
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||
p[HASH_LBLOCK-2]=c->Nh;
|
||||
p[HASH_LBLOCK-1]=c->Nl;
|
||||
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||
p[HASH_LBLOCK-2]=c->Nl;
|
||||
p[HASH_LBLOCK-1]=c->Nh;
|
||||
#endif
|
||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
||||
|
||||
l=c->A; HOST_l2c(l,md);
|
||||
l=c->B; HOST_l2c(l,md);
|
||||
l=c->C; HOST_l2c(l,md);
|
||||
l=c->D; HOST_l2c(l,md);
|
||||
|
||||
c->num=0;
|
||||
/* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
|
||||
* but I'm not worried :-)
|
||||
memset((void *)c,0,sizeof(HASH_CTX));
|
||||
*/
|
||||
}
|
|
@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp
|
|||
asm/mx86unix.cpp: asm/md5-586.pl
|
||||
(cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp)
|
||||
|
||||
# works for both SC and gcc
|
||||
asm/md5-sparcv8plus.o: asm/md5-sparcv9.S
|
||||
$(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o
|
||||
|
||||
asm/md5-sparcv9.o: asm/md5-sparcv9.S
|
||||
$(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o
|
||||
|
||||
|
||||
files:
|
||||
$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
|
||||
|
||||
|
@ -103,5 +111,5 @@ clean:
|
|||
# DO NOT DELETE THIS LINE -- make depend depends on it.
|
||||
|
||||
md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h
|
||||
md5_dgst.o: md5_locl.h
|
||||
md5_dgst.o: ../md32_common.h md5_locl.h
|
||||
md5_one.o: ../../include/openssl/md5.h md5_locl.h
|
||||
|
|
|
@ -29,7 +29,7 @@ $X="esi";
|
|||
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
|
||||
);
|
||||
|
||||
&md5_block("md5_block_x86");
|
||||
&md5_block("md5_block_asm_host_order");
|
||||
&asm_finish();
|
||||
|
||||
sub Np
|
||||
|
@ -183,6 +183,7 @@ sub md5_block
|
|||
&mov($X, &wparam(1)); # esi
|
||||
&mov($C, &wparam(2));
|
||||
&push("ebp");
|
||||
&shl($C, 6);
|
||||
&push("ebx");
|
||||
&add($C, $X); # offset we end at
|
||||
&sub($C, 64);
|
||||
|
|
1035
crypto/md5/asm/md5-sparcv9.S
Normal file
1035
crypto/md5/asm/md5-sparcv9.S
Normal file
File diff suppressed because it is too large
Load diff
|
@ -67,23 +67,43 @@ extern "C" {
|
|||
#error MD5 is disabled.
|
||||
#endif
|
||||
|
||||
/*
|
||||
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
* ! MD5_LONG has to be at least 32 bits wide. If it's wider, then !
|
||||
* ! MD5_LONG_LOG2 has to be defined along. !
|
||||
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
*/
|
||||
|
||||
#if defined(WIN16) || defined(__LP32__)
|
||||
#define MD5_LONG unsigned long
|
||||
#elif defined(_CRAY) || defined(__ILP64__)
|
||||
#define MD5_LONG unsigned long
|
||||
#define MD5_LONG_LOG2 3
|
||||
/*
|
||||
* _CRAY note. I could declare short, but I have no idea what impact
|
||||
* does it have on performance on none-T3E machines. I could declare
|
||||
* int, but at least on C90 sizeof(int) can be chosen at compile time.
|
||||
* So I've chosen long...
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#else
|
||||
#define MD5_LONG unsigned int
|
||||
#endif
|
||||
|
||||
#define MD5_CBLOCK 64
|
||||
#define MD5_LBLOCK 16
|
||||
#define MD5_BLOCK 16
|
||||
#define MD5_LAST_BLOCK 56
|
||||
#define MD5_LENGTH_BLOCK 8
|
||||
#define MD5_LBLOCK (MD5_CBLOCK/4)
|
||||
#define MD5_DIGEST_LENGTH 16
|
||||
|
||||
typedef struct MD5state_st
|
||||
{
|
||||
unsigned long A,B,C,D;
|
||||
unsigned long Nl,Nh;
|
||||
unsigned long data[MD5_LBLOCK];
|
||||
MD5_LONG A,B,C,D;
|
||||
MD5_LONG Nl,Nh;
|
||||
MD5_LONG data[MD5_LBLOCK];
|
||||
int num;
|
||||
} MD5_CTX;
|
||||
|
||||
void MD5_Init(MD5_CTX *c);
|
||||
void MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
|
||||
void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len);
|
||||
void MD5_Final(unsigned char *md, MD5_CTX *c);
|
||||
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md);
|
||||
void MD5_Transform(MD5_CTX *c, unsigned char *b);
|
||||
|
|
|
@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT;
|
|||
#define INIT_DATA_C (unsigned long)0x98badcfeL
|
||||
#define INIT_DATA_D (unsigned long)0x10325476L
|
||||
|
||||
# ifdef MD5_ASM
|
||||
void md5_block_x86(MD5_CTX *c, unsigned long *p,int num);
|
||||
# define md5_block md5_block_x86
|
||||
# else
|
||||
static void md5_block(MD5_CTX *c, unsigned long *p,int num);
|
||||
# endif
|
||||
void MD5_Init(MD5_CTX *c)
|
||||
{
|
||||
c->A=INIT_DATA_A;
|
||||
|
@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c)
|
|||
c->num=0;
|
||||
}
|
||||
|
||||
void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len)
|
||||
#ifndef md5_block_host_order
|
||||
void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num)
|
||||
{
|
||||
register const unsigned char *data=_data;
|
||||
register ULONG *p;
|
||||
int sw,sc;
|
||||
ULONG l;
|
||||
|
||||
if (len == 0) return;
|
||||
|
||||
l=(c->Nl+(len<<3))&0xffffffffL;
|
||||
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
|
||||
* Wei Dai <weidai@eskimo.com> for pointing it out. */
|
||||
if (l < c->Nl) /* overflow */
|
||||
c->Nh++;
|
||||
c->Nh+=(len>>29);
|
||||
c->Nl=l;
|
||||
|
||||
if (c->num != 0)
|
||||
{
|
||||
p=c->data;
|
||||
sw=c->num>>2;
|
||||
sc=c->num&0x03;
|
||||
|
||||
if ((c->num+len) >= MD5_CBLOCK)
|
||||
{
|
||||
l= p[sw];
|
||||
p_c2l(data,l,sc);
|
||||
p[sw++]=l;
|
||||
for (; sw<MD5_LBLOCK; sw++)
|
||||
{
|
||||
c2l(data,l);
|
||||
p[sw]=l;
|
||||
}
|
||||
len-=(MD5_CBLOCK-c->num);
|
||||
|
||||
md5_block(c,p,64);
|
||||
c->num=0;
|
||||
/* drop through and do the rest */
|
||||
}
|
||||
else
|
||||
{
|
||||
int ew,ec;
|
||||
|
||||
c->num+=(int)len;
|
||||
if ((sc+len) < 4) /* ugly, add char's to a word */
|
||||
{
|
||||
l= p[sw];
|
||||
p_c2l_p(data,l,sc,len);
|
||||
p[sw]=l;
|
||||
}
|
||||
else
|
||||
{
|
||||
ew=(c->num>>2);
|
||||
ec=(c->num&0x03);
|
||||
l= p[sw];
|
||||
p_c2l(data,l,sc);
|
||||
p[sw++]=l;
|
||||
for (; sw < ew; sw++)
|
||||
{ c2l(data,l); p[sw]=l; }
|
||||
if (ec)
|
||||
{
|
||||
c2l_p(data,l,ec);
|
||||
p[sw]=l;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* we now can process the input data in blocks of MD5_CBLOCK
|
||||
* chars and save the leftovers to c->data. */
|
||||
#ifdef L_ENDIAN
|
||||
if ((((unsigned long)data)%sizeof(ULONG)) == 0)
|
||||
{
|
||||
sw=(int)len/MD5_CBLOCK;
|
||||
if (sw > 0)
|
||||
{
|
||||
sw*=MD5_CBLOCK;
|
||||
md5_block(c,(ULONG *)data,sw);
|
||||
data+=sw;
|
||||
len-=sw;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
p=c->data;
|
||||
while (len >= MD5_CBLOCK)
|
||||
{
|
||||
#if defined(L_ENDIAN) || defined(B_ENDIAN)
|
||||
if (p != (unsigned long *)data)
|
||||
memcpy(p,data,MD5_CBLOCK);
|
||||
data+=MD5_CBLOCK;
|
||||
#ifdef B_ENDIAN
|
||||
for (sw=(MD5_LBLOCK/4); sw; sw--)
|
||||
{
|
||||
Endian_Reverse32(p[0]);
|
||||
Endian_Reverse32(p[1]);
|
||||
Endian_Reverse32(p[2]);
|
||||
Endian_Reverse32(p[3]);
|
||||
p+=4;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
for (sw=(MD5_LBLOCK/4); sw; sw--)
|
||||
{
|
||||
c2l(data,l); *(p++)=l;
|
||||
c2l(data,l); *(p++)=l;
|
||||
c2l(data,l); *(p++)=l;
|
||||
c2l(data,l); *(p++)=l;
|
||||
}
|
||||
#endif
|
||||
p=c->data;
|
||||
md5_block(c,p,64);
|
||||
len-=MD5_CBLOCK;
|
||||
}
|
||||
sc=(int)len;
|
||||
c->num=sc;
|
||||
if (sc)
|
||||
{
|
||||
sw=sc>>2; /* words to copy */
|
||||
#ifdef L_ENDIAN
|
||||
p[sw]=0;
|
||||
memcpy(p,data,sc);
|
||||
#else
|
||||
sc&=0x03;
|
||||
for ( ; sw; sw--)
|
||||
{ c2l(data,l); *(p++)=l; }
|
||||
c2l_p(data,l,sc);
|
||||
*p=l;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void MD5_Transform(MD5_CTX *c, unsigned char *b)
|
||||
{
|
||||
ULONG p[16];
|
||||
#if !defined(L_ENDIAN)
|
||||
ULONG *q;
|
||||
int i;
|
||||
#endif
|
||||
|
||||
#if defined(B_ENDIAN) || defined(L_ENDIAN)
|
||||
memcpy(p,b,64);
|
||||
#ifdef B_ENDIAN
|
||||
q=p;
|
||||
for (i=(MD5_LBLOCK/4); i; i--)
|
||||
{
|
||||
Endian_Reverse32(q[0]);
|
||||
Endian_Reverse32(q[1]);
|
||||
Endian_Reverse32(q[2]);
|
||||
Endian_Reverse32(q[3]);
|
||||
q+=4;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
q=p;
|
||||
for (i=(MD5_LBLOCK/4); i; i--)
|
||||
{
|
||||
ULONG l;
|
||||
c2l(b,l); *(q++)=l;
|
||||
c2l(b,l); *(q++)=l;
|
||||
c2l(b,l); *(q++)=l;
|
||||
c2l(b,l); *(q++)=l;
|
||||
}
|
||||
#endif
|
||||
md5_block(c,p,64);
|
||||
}
|
||||
|
||||
#ifndef MD5_ASM
|
||||
|
||||
static void md5_block(MD5_CTX *c, register ULONG *X, int num)
|
||||
{
|
||||
register ULONG A,B,C,D;
|
||||
register unsigned long A,B,C,D;
|
||||
/*
|
||||
* In case you wonder why A-D are declared as long and not
|
||||
* as MD5_LONG. Doing so results in slight performance
|
||||
* boost on LP64 architectures. The catch is we don't
|
||||
* really care if 32 MSBs of a 64-bit register get polluted
|
||||
* with eventual overflows as we *save* only 32 LSBs in
|
||||
* *either* case. Now declaring 'em long excuses the compiler
|
||||
* from keeping 32 MSBs zeroed resulting in 13% performance
|
||||
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
|
||||
* Well, to be honest it should say that this *prevents*
|
||||
* performance degradation.
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
|
||||
A=c->A;
|
||||
B=c->B;
|
||||
C=c->C;
|
||||
D=c->D;
|
||||
for (;;)
|
||||
|
||||
for (;num--;X+=HASH_LBLOCK)
|
||||
{
|
||||
/* Round 0 */
|
||||
R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
|
||||
|
@ -334,74 +176,127 @@ static void md5_block(MD5_CTX *c, register ULONG *X, int num)
|
|||
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
|
||||
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
|
||||
|
||||
A+=c->A&0xffffffffL;
|
||||
B+=c->B&0xffffffffL;
|
||||
c->A=A;
|
||||
c->B=B;
|
||||
C+=c->C&0xffffffffL;
|
||||
D+=c->D&0xffffffffL;
|
||||
c->C=C;
|
||||
c->D=D;
|
||||
X+=16;
|
||||
num-=64;
|
||||
if (num <= 0) break;
|
||||
A = c->A += A;
|
||||
B = c->B += B;
|
||||
C = c->C += C;
|
||||
D = c->D += D;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void MD5_Final(unsigned char *md, MD5_CTX *c)
|
||||
#ifndef md5_block_data_order
|
||||
void md5_block_data_order (MD5_CTX *c, const unsigned char *data, int num)
|
||||
{
|
||||
register int i,j;
|
||||
register ULONG l;
|
||||
register ULONG *p;
|
||||
static unsigned char end[4]={0x80,0x00,0x00,0x00};
|
||||
unsigned char *cp=end;
|
||||
register unsigned long A,B,C,D,l;
|
||||
/*
|
||||
* In case you wonder why A-D are declared as long and not
|
||||
* as MD5_LONG. Doing so results in slight performance
|
||||
* boost on LP64 architectures. The catch is we don't
|
||||
* really care if 32 MSBs of a 64-bit register get polluted
|
||||
* with eventual overflows as we *save* only 32 LSBs in
|
||||
* *either* case. Now declaring 'em long excuses the compiler
|
||||
* from keeping 32 MSBs zeroed resulting in 13% performance
|
||||
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
|
||||
* Well, to be honest it should say that this *prevents*
|
||||
* performance degradation.
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
MD5_LONG X[MD5_LBLOCK];
|
||||
/*
|
||||
* In case you wonder why don't I use c->data for this.
|
||||
* RISCs usually have a handful of registers and if X is
|
||||
* declared as automatic array good optimizing compiler
|
||||
* shall accomodate at least part of it in register bank
|
||||
* instead of memory.
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
|
||||
/* c->num should definitly have room for at least one more byte. */
|
||||
p=c->data;
|
||||
j=c->num;
|
||||
i=j>>2;
|
||||
A=c->A;
|
||||
B=c->B;
|
||||
C=c->C;
|
||||
D=c->D;
|
||||
|
||||
/* purify often complains about the following line as an
|
||||
* Uninitialized Memory Read. While this can be true, the
|
||||
* following p_c2l macro will reset l when that case is true.
|
||||
* This is because j&0x03 contains the number of 'valid' bytes
|
||||
* already in p[i]. If and only if j&0x03 == 0, the UMR will
|
||||
* occur but this is also the only time p_c2l will do
|
||||
* l= *(cp++) instead of l|= *(cp++)
|
||||
* Many thanks to Alex Tang <altitude@cic.net> for pickup this
|
||||
* 'potential bug' */
|
||||
#ifdef PURIFY
|
||||
if ((j&0x03) == 0) p[i]=0;
|
||||
#endif
|
||||
l=p[i];
|
||||
p_c2l(cp,l,j&0x03);
|
||||
p[i]=l;
|
||||
i++;
|
||||
/* i is the next 'undefined word' */
|
||||
if (c->num >= MD5_LAST_BLOCK)
|
||||
for (;num--;)
|
||||
{
|
||||
for (; i<MD5_LBLOCK; i++)
|
||||
p[i]=0;
|
||||
md5_block(c,p,64);
|
||||
i=0;
|
||||
}
|
||||
for (; i<(MD5_LBLOCK-2); i++)
|
||||
p[i]=0;
|
||||
p[MD5_LBLOCK-2]=c->Nl;
|
||||
p[MD5_LBLOCK-1]=c->Nh;
|
||||
md5_block(c,p,64);
|
||||
cp=md;
|
||||
l=c->A; l2c(l,cp);
|
||||
l=c->B; l2c(l,cp);
|
||||
l=c->C; l2c(l,cp);
|
||||
l=c->D; l2c(l,cp);
|
||||
HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l;
|
||||
/* Round 0 */
|
||||
R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l;
|
||||
R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l;
|
||||
R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l;
|
||||
R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l;
|
||||
R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l;
|
||||
R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l;
|
||||
R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l;
|
||||
R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l;
|
||||
R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l;
|
||||
R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l;
|
||||
R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l;
|
||||
R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l;
|
||||
R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l;
|
||||
R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l;
|
||||
R0(C,D,A,B,X[14],17,0xa679438eL);
|
||||
R0(B,C,D,A,X[15],22,0x49b40821L);
|
||||
/* Round 1 */
|
||||
R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
|
||||
R1(D,A,B,C,X[ 6], 9,0xc040b340L);
|
||||
R1(C,D,A,B,X[11],14,0x265e5a51L);
|
||||
R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
|
||||
R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
|
||||
R1(D,A,B,C,X[10], 9,0x02441453L);
|
||||
R1(C,D,A,B,X[15],14,0xd8a1e681L);
|
||||
R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
|
||||
R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
|
||||
R1(D,A,B,C,X[14], 9,0xc33707d6L);
|
||||
R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
|
||||
R1(B,C,D,A,X[ 8],20,0x455a14edL);
|
||||
R1(A,B,C,D,X[13], 5,0xa9e3e905L);
|
||||
R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
|
||||
R1(C,D,A,B,X[ 7],14,0x676f02d9L);
|
||||
R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
|
||||
/* Round 2 */
|
||||
R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
|
||||
R2(D,A,B,C,X[ 8],11,0x8771f681L);
|
||||
R2(C,D,A,B,X[11],16,0x6d9d6122L);
|
||||
R2(B,C,D,A,X[14],23,0xfde5380cL);
|
||||
R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
|
||||
R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
|
||||
R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
|
||||
R2(B,C,D,A,X[10],23,0xbebfbc70L);
|
||||
R2(A,B,C,D,X[13], 4,0x289b7ec6L);
|
||||
R2(D,A,B,C,X[ 0],11,0xeaa127faL);
|
||||
R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
|
||||
R2(B,C,D,A,X[ 6],23,0x04881d05L);
|
||||
R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
|
||||
R2(D,A,B,C,X[12],11,0xe6db99e5L);
|
||||
R2(C,D,A,B,X[15],16,0x1fa27cf8L);
|
||||
R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
|
||||
/* Round 3 */
|
||||
R3(A,B,C,D,X[ 0], 6,0xf4292244L);
|
||||
R3(D,A,B,C,X[ 7],10,0x432aff97L);
|
||||
R3(C,D,A,B,X[14],15,0xab9423a7L);
|
||||
R3(B,C,D,A,X[ 5],21,0xfc93a039L);
|
||||
R3(A,B,C,D,X[12], 6,0x655b59c3L);
|
||||
R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
|
||||
R3(C,D,A,B,X[10],15,0xffeff47dL);
|
||||
R3(B,C,D,A,X[ 1],21,0x85845dd1L);
|
||||
R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
|
||||
R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
|
||||
R3(C,D,A,B,X[ 6],15,0xa3014314L);
|
||||
R3(B,C,D,A,X[13],21,0x4e0811a1L);
|
||||
R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
|
||||
R3(D,A,B,C,X[11],10,0xbd3af235L);
|
||||
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
|
||||
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
|
||||
|
||||
/* clear stuff, md5_block may be leaving some stuff on the stack
|
||||
* but I'm not worried :-) */
|
||||
c->num=0;
|
||||
/* memset((char *)&c,0,sizeof(c));*/
|
||||
A = c->A += A;
|
||||
B = c->B += B;
|
||||
C = c->C += C;
|
||||
D = c->D += D;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef undef
|
||||
int printit(unsigned long *l)
|
||||
|
|
|
@ -56,98 +56,79 @@
|
|||
* [including the GNU Public Licence.]
|
||||
*/
|
||||
|
||||
/* On sparc, this actually slows things down :-( */
|
||||
#if defined(sun)
|
||||
#undef B_ENDIAN
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#define ULONG unsigned long
|
||||
#define UCHAR unsigned char
|
||||
#define UINT unsigned int
|
||||
|
||||
#undef c2l
|
||||
#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \
|
||||
l|=(((unsigned long)(*((c)++)))<< 8), \
|
||||
l|=(((unsigned long)(*((c)++)))<<16), \
|
||||
l|=(((unsigned long)(*((c)++)))<<24))
|
||||
|
||||
#undef p_c2l
|
||||
#define p_c2l(c,l,n) { \
|
||||
switch (n) { \
|
||||
case 0: l =((unsigned long)(*((c)++))); \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
case 3: l|=((unsigned long)(*((c)++)))<<24; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* NOTE the pointer is not incremented at the end of this */
|
||||
#undef c2l_p
|
||||
#define c2l_p(c,l,n) { \
|
||||
l=0; \
|
||||
(c)+=n; \
|
||||
switch (n) { \
|
||||
case 3: l =((unsigned long)(*(--(c))))<<16; \
|
||||
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
|
||||
case 1: l|=((unsigned long)(*(--(c)))) ; \
|
||||
} \
|
||||
}
|
||||
|
||||
#undef p_c2l_p
|
||||
#define p_c2l_p(c,l,sc,len) { \
|
||||
switch (sc) \
|
||||
{ \
|
||||
case 0: l =((unsigned long)(*((c)++))); \
|
||||
if (--len == 0) break; \
|
||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
||||
if (--len == 0) break; \
|
||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
||||
} \
|
||||
}
|
||||
|
||||
#undef l2c
|
||||
#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
||||
*((c)++)=(unsigned char)(((l)>>24)&0xff))
|
||||
|
||||
/* NOTE - c is not incremented as per l2c */
|
||||
#undef l2cn
|
||||
#define l2cn(l1,l2,c,n) { \
|
||||
c+=n; \
|
||||
switch (n) { \
|
||||
case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
|
||||
case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
|
||||
case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
|
||||
case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
|
||||
case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
|
||||
case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
|
||||
case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
|
||||
case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
|
||||
#if defined(WIN32)
|
||||
/* 5 instructions with rotate instruction, else 9 */
|
||||
#define Endian_Reverse32(a) \
|
||||
{ \
|
||||
unsigned long l=(a); \
|
||||
(a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
|
||||
}
|
||||
#else
|
||||
/* 6 instructions with rotate instruction, else 8 */
|
||||
#define Endian_Reverse32(a) \
|
||||
{ \
|
||||
unsigned long l=(a); \
|
||||
l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
|
||||
(a)=ROTATE(l,16L); \
|
||||
}
|
||||
#ifndef MD5_LONG_LOG2
|
||||
#define MD5_LONG_LOG2 2 /* default to 32 bits */
|
||||
#endif
|
||||
|
||||
#ifdef MD5_ASM
|
||||
# if defined(__i386) || defined(WIN32)
|
||||
# define md5_block_host_order md5_block_asm_host_order
|
||||
# elif defined(__sparc) && defined(ULTRASPARC)
|
||||
void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
|
||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
|
||||
# endif
|
||||
#endif
|
||||
|
||||
void md5_block_host_order (MD5_CTX *c, const MD5_LONG *p,int num);
|
||||
void md5_block_data_order (MD5_CTX *c, const unsigned char *p,int num);
|
||||
|
||||
#if defined(__i386)
|
||||
/*
|
||||
* *_block_host_order is expected to handle aligned data while
|
||||
* *_block_data_order - unaligned. As algorithm and host (x86)
|
||||
* are in this case of the same "endianess" these two are
|
||||
* otherwise indistinguishable. But normally you don't want to
|
||||
* call the same function because unaligned access in places
|
||||
* where alignment is expected is usually a "Bad Thing". Indeed,
|
||||
* on RISCs you get punished with BUS ERROR signal or *severe*
|
||||
* performance degradation. Intel CPUs are in turn perfectly
|
||||
* capable of loading unaligned data without such drastic side
|
||||
* effect. Yes, they say it's slower than aligned load, but no
|
||||
* exception is generated and therefore performance degradation
|
||||
* is *incomparable* with RISCs. What we should weight here is
|
||||
* costs of unaligned access against costs of aligning data.
|
||||
* According to my measurements allowing unaligned access results
|
||||
* in ~9% performance improvement on Pentium II operating at
|
||||
* 266MHz. I won't be surprised if the difference will be higher
|
||||
* on faster systems:-)
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#define md5_block_data_order md5_block_host_order
|
||||
#endif
|
||||
|
||||
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||
|
||||
#define HASH_LONG MD5_LONG
|
||||
#define HASH_LONG_LOG2 MD5_LONG_LOG2
|
||||
#define HASH_CTX MD5_CTX
|
||||
#define HASH_CBLOCK MD5_CBLOCK
|
||||
#define HASH_LBLOCK MD5_LBLOCK
|
||||
#define HASH_UPDATE MD5_Update
|
||||
#define HASH_TRANSFORM MD5_Transform
|
||||
#define HASH_FINAL MD5_Final
|
||||
#define HASH_BLOCK_HOST_ORDER md5_block_host_order
|
||||
#if defined(B_ENDIAN) || defined(md5_block_data_order)
|
||||
#define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
||||
/*
|
||||
* Little-endians (Intel and Alpha) feel better without this.
|
||||
* It looks like memcpy does better job than generic
|
||||
* md5_block_data_order on copying-n-aligning input data.
|
||||
* But franlky speaking I didn't expect such result on Alpha.
|
||||
* On the other hand I've got this with egcs-1.0.2 and if
|
||||
* program is compiled with another (better?) compiler it
|
||||
* might turn out other way around.
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#endif
|
||||
|
||||
#include "../md32_common.h"
|
||||
|
||||
/*
|
||||
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
|
||||
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
|
||||
|
@ -162,14 +143,6 @@
|
|||
#define H(b,c,d) ((b) ^ (c) ^ (d))
|
||||
#define I(b,c,d) (((~(d)) | (b)) ^ (c))
|
||||
|
||||
#undef ROTATE
|
||||
#if defined(WIN32)
|
||||
#define ROTATE(a,n) _lrotl(a,n)
|
||||
#else
|
||||
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
|
||||
#endif
|
||||
|
||||
|
||||
#define R0(a,b,c,d,k,s,t) { \
|
||||
a+=((k)+(t)+F((b),(c),(d))); \
|
||||
a=ROTATE(a,s); \
|
||||
|
|
|
@ -57,7 +57,8 @@
|
|||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "md5_locl.h"
|
||||
#include <string.h>
|
||||
#include <openssl/md5.h>
|
||||
|
||||
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue