Reorganize and speed up MD5.

Submitted by: Andy Polyakov <appro@fy.chalmers.se>
This commit is contained in:
Ulf Möller 1999-05-13 13:16:42 +00:00
parent 7d7d2cbcb0
commit bd3576d2dd
10 changed files with 1872 additions and 345 deletions

View file

@ -5,6 +5,9 @@
Changes between 0.9.2b and 0.9.3
*) Reorganize and speed up MD5.
[Andy Polyakov <appro@fy.chalmers.se>]
*) VMS support.
[Richard Levitte <richard@levitte.org>]

View file

@ -106,7 +106,7 @@ my %table=(
# Solaris setups
"solaris-x86-gcc","gcc:-O3 -fomit-frame-pointer -m486 -Wall -DL_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG $x86_gcc_des $x86_gcc_opts:$x86_sol_asm",
"solaris-sparc-gcc","gcc:-O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8.o::",
"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
"solaris-usparc-gcc","gcc:-O3 -fomit-frame-pointer -mcpu=ultrasparc -Wall -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o:::asm/md5-sparcv8plus.o:",
"debug-solaris-sparc-gcc","gcc:-O3 -g -mv8 -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:::",
"debug-solaris-usparc-gcc","gcc:-O3 -g -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
@ -115,12 +115,11 @@ my %table=(
# SC4 is ok, better than gcc even on bn as long as you tell it -xarch=v8
# -fast slows things like DES down quite a lot
# Don't use -xtarget=ultra with SC4.2. It is broken, and will break exptest.
# SC5.0 with the compiler common patch works.
"solaris-sparc-sc4","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
# SC5.0 note: Compiler common patch 107357-01 or later is required!
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::",
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o:::asm/md5-sparcv8plus.o:",
"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::::asm/md5-sparcv9.o:",
# Sunos configs, assuming sparc for the gcc one.
##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::",

592
crypto/md32_common.h Normal file
View file

@ -0,0 +1,592 @@
/* crypto/md32_common.h */
/* ====================================================================
* Copyright (c) 1999 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* licensing@OpenSSL.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com).
*
*/
/*
* This is a generic 32 bit "collector" for message digest algorithms.
* Whenever needed it collects input character stream into chunks of
* 32 bit values and invokes a block function that performs actual hash
* calculations.
*
* Porting guide.
*
* Obligatory macros:
*
* DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
* this macro defines byte order of input stream.
* HASH_CBLOCK
* size of a unit chunk HASH_BLOCK operates on.
* HASH_LONG
* has to be at lest 32 bit wide, if it's wider, then
* HASH_LONG_LOG2 *has to* be defined along
* HASH_CTX
* context structure that at least contains following
* members:
* typedef struct {
* ...
* HASH_LONG Nl,Nh;
* HASH_LONG data[HASH_LBLOCK];
* int num;
* ...
* } HASH_CTX;
* HASH_UPDATE
* name of "Update" function, implemented here.
* HASH_TRANSFORM
* name of "Transform" function, implemented here.
* HASH_FINAL
* name of "Final" function, implemented here.
* HASH_BLOCK_HOST_ORDER
* name of "block" function treating *aligned* input message
* in host byte order, implemented externally.
* HASH_BLOCK_DATA_ORDER
* name of "block" function treating *unaligned* input message
* in original (data) byte order, implemented externally (it
* actually is optional if data and host are of the same
* "endianess").
*
* Optional macros:
*
* B_ENDIAN or L_ENDIAN
* defines host byte-order.
* HASH_LONG_LOG2
* defaults to 2 if not states otherwise.
* HASH_LBLOCK
* assumed to be HASH_CBLOCK/4 if not stated otherwise.
* HASH_BLOCK_DATA_ORDER_ALIGNED
* alternative "block" function capable of treating
* aligned input message in original (data) order,
* implemented externally.
*
* MD5 example:
*
* #define DATA_ORDER_IS_LITTLE_ENDIAN
*
* #define HASH_LONG MD5_LONG
* #define HASH_LONG_LOG2 MD5_LONG_LOG2
* #define HASH_CTX MD5_CTX
* #define HASH_CBLOCK MD5_CBLOCK
* #define HASH_LBLOCK MD5_LBLOCK
* #define HASH_UPDATE MD5_Update
* #define HASH_TRANSFORM MD5_Transform
* #define HASH_FINAL MD5_Final
* #define HASH_BLOCK_HOST_ORDER md5_block_host_order
* #define HASH_BLOCK_DATA_ORDER md5_block_data_order
*
* <appro@fy.chalmers.se>
*/
#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
#error "DATA_ORDER must be defined!"
#endif
#ifndef HASH_CBLOCK
#error "HASH_CBLOCK must be defined!"
#endif
#ifndef HASH_LONG
#error "HASH_LONG must be defined!"
#endif
#ifndef HASH_CTX
#error "HASH_CTX must be defined!"
#endif
#ifndef HASH_UPDATE
#error "HASH_UPDATE must be defined!"
#endif
#ifndef HASH_TRANSFORM
#error "HASH_TRANSFORM must be defined!"
#endif
#ifndef HASH_FINAL
#error "HASH_FINAL must be defined!"
#endif
#ifndef HASH_BLOCK_HOST_ORDER
#error "HASH_BLOCK_HOST_ORDER must be defined!"
#endif
#if 0
/*
* Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
* isn't defined.
*/
#ifndef HASH_BLOCK_DATA_ORDER
#error "HASH_BLOCK_DATA_ORDER must be defined!"
#endif
#endif
#ifndef HASH_LBLOCK
#define HASH_LBLOCK (HASH_CBLOCK/4)
#endif
#ifndef HASH_LONG_LOG2
#define HASH_LONG_LOG2 2
#endif
/*
* Engage compiler specific rotate intrinsic function if available.
*/
#undef ROTATE
#ifndef PEDANTIC
# if defined(_MSC_VER)
# define ROTATE(a,n) _lrotl(a,n)
# elif defined(__GNUC__) && __GNUC__>=2
/*
* Some GNU C inline assembler templates. Note that these are
* rotates by *constant* number of bits! But that's exactly
* what we need here...
*
* <appro@fy.chalmers.se>
*/
# if defined(__i386)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm volatile ( \
"roll %1,%0" \
: "=r"(ret) \
: "I"(n), "0"(a) \
: "cc"); \
ret; \
})
# elif defined(__powerpc)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm volatile ( \
"rlwinm %0,%1,%2,0,31" \
: "=r"(ret) \
: "r"(a), "I"(n)); \
ret; \
})
# endif
# endif
/*
* Engage compiler specific "fetch in reverse byte order"
* intrinsic function if available.
*/
# if defined(__GNUC__) && __GNUC__>=2
/* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
# if defined(__i386) && !defined(I386_ONLY)
# define BE_FETCH32(a) ({ register unsigned int l=(a);\
asm volatile ( \
"bswapl %0" \
: "=r"(l) : "0"(l)); \
l; \
})
# elif defined(__powerpc)
# define LE_FETCH32(a) ({ register unsigned int l; \
asm volatile ( \
"lwbrx %0,0,%1" \
: "=r"(l) \
: "r"(a)); \
l; \
})
# elif defined(__sparc) && defined(ULTRASPARC)
# define LE_FETCH32(a) ({ register unsigned int l; \
asm volatile ( \
"lda [%1]#ASI_PRIMARY_LITTLE,%0"\
: "=r"(l) \
: "r"(a)); \
l; \
})
# endif
# endif
#endif /* PEDANTIC */
#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
#ifdef ROTATE
/* 5 instructions with rotate instruction, else 9 */
#define REVERSE_FETCH32(a,l) ( \
l=*(const HASH_LONG *)(a), \
((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \
)
#else
/* 6 instructions with rotate instruction, else 8 */
#define REVERSE_FETCH32(a,l) ( \
l=*(const HASH_LONG *)(a), \
l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \
ROTATE(l,16) \
)
/*
* Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
* It's rewritten as above for two reasons:
* - RISCs aren't good at long constants and have to explicitely
* compose 'em with several (well, usually 2) instructions in a
* register before performing the actual operation and (as you
* already realized:-) having same constant should inspire the
* compiler to permanently allocate the only register for it;
* - most modern CPUs have two ALUs, but usually only one has
* circuitry for shifts:-( this minor tweak inspires compiler
* to schedule shift instructions in a better way...
*
* <appro@fy.chalmers.se>
*/
#endif
#endif
#ifndef ROTATE
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#endif
/*
* Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
* and HASH_BLOCK_HOST_ORDER ought to be the same if input data
* and host are of the same "endianess". It's possible to mask
* this with blank #define HASH_BLOCK_DATA_ORDER though...
*
* <appro@fy.chalmers.se>
*/
#if defined(B_ENDIAN)
# if defined(DATA_ORDER_IS_BIG_ENDIAN)
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
# endif
# elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
# ifndef HOST_FETCH32
# ifdef LE_FETCH32
# define HOST_FETCH32(p,l) LE_FETCH32(p)
# elif defined(REVERSE_FETCH32)
# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
# endif
# endif
# endif
#elif defined(L_ENDIAN)
# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
# endif
# elif defined(DATA_ORDER_IS_BIG_ENDIAN)
# ifndef HOST_FETCH32
# ifdef BE_FETCH32
# define HOST_FETCH32(p,l) BE_FETCH32(p)
# elif defined(REVERSE_FETCH32)
# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
# endif
# endif
# endif
#endif
#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
#ifndef HASH_BLOCK_DATA_ORDER
#error "HASH_BLOCK_DATA_ORDER must be defined!"
#endif
#endif
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++))) ), \
l)
#define HOST_p_c2l(c,l,n) { \
switch (n) { \
case 0: l =((unsigned long)(*((c)++)))<<24; \
case 1: l|=((unsigned long)(*((c)++)))<<16; \
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
case 3: l|=((unsigned long)(*((c)++))); \
} }
#define HOST_p_c2l_p(c,l,sc,len) { \
switch (sc) { \
case 0: l =((unsigned long)(*((c)++)))<<24; \
if (--len == 0) break; \
case 1: l|=((unsigned long)(*((c)++)))<<16; \
if (--len == 0) break; \
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
} }
/* NOTE the pointer is not incremented at the end of this */
#define HOST_c2l_p(c,l,n) { \
l=0; (c)+=n; \
switch (n) { \
case 3: l =((unsigned long)(*(--(c))))<< 8; \
case 2: l|=((unsigned long)(*(--(c))))<<16; \
case 1: l|=((unsigned long)(*(--(c))))<<24; \
} }
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l) )&0xff), \
l)
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<<24), \
l)
#define HOST_p_c2l(c,l,n) { \
switch (n) { \
case 0: l =((unsigned long)(*((c)++))); \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
case 3: l|=((unsigned long)(*((c)++)))<<24; \
} }
#define HOST_p_c2l_p(c,l,sc,len) { \
switch (sc) { \
case 0: l =((unsigned long)(*((c)++))); \
if (--len == 0) break; \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
if (--len == 0) break; \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
} }
/* NOTE the pointer is not incremented at the end of this */
#define HOST_c2l_p(c,l,n) { \
l=0; (c)+=n; \
switch (n) { \
case 3: l =((unsigned long)(*(--(c))))<<16; \
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
case 1: l|=((unsigned long)(*(--(c)))); \
} }
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>>24)&0xff), \
l)
#endif
/*
* Time for some action:-)
*/
void HASH_UPDATE (HASH_CTX *c, const unsigned char *data, unsigned long len)
{
register HASH_LONG * p;
register unsigned long l;
int sw,sc,ew,ec;
if (len==0) return;
l=(c->Nl+(len<<3))&0xffffffffL;
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
* Wei Dai <weidai@eskimo.com> for pointing it out. */
if (l < c->Nl) /* overflow */
c->Nh++;
c->Nh+=(len>>29);
c->Nl=l;
if (c->num != 0)
{
p=c->data;
sw=c->num>>2;
sc=c->num&0x03;
if ((c->num+len) >= HASH_CBLOCK)
{
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
for (; sw<HASH_LBLOCK; sw++)
{
HOST_c2l(data,l); p[sw]=l;
}
HASH_BLOCK_HOST_ORDER (c,p,1);
len-=(HASH_CBLOCK-c->num);
c->num=0;
/* drop through and do the rest */
}
else
{
c->num+=len;
if ((sc+len) < 4) /* ugly, add char's to a word */
{
l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
}
else
{
ew=(c->num>>2);
ec=(c->num&0x03);
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
for (; sw < ew; sw++)
{
HOST_c2l(data,l); p[sw]=l;
}
if (ec)
{
HOST_c2l_p(data,l,ec); p[sw]=l;
}
}
return;
}
}
sw=len/HASH_CBLOCK;
if (sw > 0)
{
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
/*
* Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
* only if sizeof(HASH_LONG)==4.
*/
if ((((unsigned long)data)%4) == 0)
{
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,sw);
sw*=HASH_CBLOCK;
data+=sw;
len-=sw;
}
else
#if !defined(HASH_BLOCK_DATA_ORDER)
while (sw--)
{
memcpy (p=c->data,data,HASH_CBLOCK);
HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
data+=HASH_CBLOCK;
len-=HASH_CBLOCK;
}
#endif
#endif
#if defined(HASH_BLOCK_DATA_ORDER)
{
HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,sw);
sw*=HASH_CBLOCK;
data+=sw;
len-=sw;
}
#endif
}
if (len!=0)
{
p = c->data;
c->num = len;
ew=len>>2; /* words to copy */
ec=len&0x03;
for (; ew; ew--,p++)
{
HOST_c2l(data,l); *p=l;
}
HOST_c2l_p(data,l,ec);
*p=l;
}
}
void HASH_TRANSFORM (HASH_CTX *c, unsigned char *data)
{
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_BLOCK_DATA_ORDER_ALIGNED!=1
if ((((unsigned long)data)%4) == 0)
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(HASH_LONG *)data,1);
else
#if !defined(HASH_BLOCK_DATA_ORDER)
{
memcpy (c->data,data,HASH_CBLOCK);
HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
}
#endif
#endif
#if defined(HASH_BLOCK_DATA_ORDER)
HASH_BLOCK_DATA_ORDER (c,(HASH_LONG *)data,1);
#endif
}
void HASH_FINAL (unsigned char *md, HASH_CTX *c)
{
register HASH_LONG *p;
register unsigned long l;
register int i,j;
static const unsigned char end[4]={0x80,0x00,0x00,0x00};
const unsigned char *cp=end;
/* c->num should definitly have room for at least one more byte. */
p=c->data;
i=c->num>>2;
j=c->num&0x03;
#if 0
/* purify often complains about the following line as an
* Uninitialized Memory Read. While this can be true, the
* following p_c2l macro will reset l when that case is true.
* This is because j&0x03 contains the number of 'valid' bytes
* already in p[i]. If and only if j&0x03 == 0, the UMR will
* occur but this is also the only time p_c2l will do
* l= *(cp++) instead of l|= *(cp++)
* Many thanks to Alex Tang <altitude@cic.net> for pickup this
* 'potential bug' */
#ifdef PURIFY
if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
#endif
l=p[i];
#else
l = (j==0) ? 0 : p[i];
#endif
HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
{
if (i<HASH_LBLOCK) p[i]=0;
HASH_BLOCK_HOST_ORDER (c,p,1);
i=0;
}
for (; i<(HASH_LBLOCK-2); i++)
p[i]=0;
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
p[HASH_LBLOCK-2]=c->Nh;
p[HASH_LBLOCK-1]=c->Nl;
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
p[HASH_LBLOCK-2]=c->Nl;
p[HASH_LBLOCK-1]=c->Nh;
#endif
HASH_BLOCK_HOST_ORDER (c,p,1);
l=c->A; HOST_l2c(l,md);
l=c->B; HOST_l2c(l,md);
l=c->C; HOST_l2c(l,md);
l=c->D; HOST_l2c(l,md);
c->num=0;
/* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
* but I'm not worried :-)
memset((void *)c,0,sizeof(HASH_CTX));
*/
}

View file

@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp
asm/mx86unix.cpp: asm/md5-586.pl
(cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp)
# works for both SC and gcc
asm/md5-sparcv8plus.o: asm/md5-sparcv9.S
$(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o
asm/md5-sparcv9.o: asm/md5-sparcv9.S
$(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o
files:
$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
@ -103,5 +111,5 @@ clean:
# DO NOT DELETE THIS LINE -- make depend depends on it.
md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h
md5_dgst.o: md5_locl.h
md5_dgst.o: ../md32_common.h md5_locl.h
md5_one.o: ../../include/openssl/md5.h md5_locl.h

View file

@ -29,7 +29,7 @@ $X="esi";
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
);
&md5_block("md5_block_x86");
&md5_block("md5_block_asm_host_order");
&asm_finish();
sub Np
@ -183,6 +183,7 @@ sub md5_block
&mov($X, &wparam(1)); # esi
&mov($C, &wparam(2));
&push("ebp");
&shl($C, 6);
&push("ebx");
&add($C, $X); # offset we end at
&sub($C, 64);

1035
crypto/md5/asm/md5-sparcv9.S Normal file

File diff suppressed because it is too large Load diff

View file

@ -67,23 +67,43 @@ extern "C" {
#error MD5 is disabled.
#endif
/*
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* ! MD5_LONG has to be at least 32 bits wide. If it's wider, then !
* ! MD5_LONG_LOG2 has to be defined along. !
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*/
#if defined(WIN16) || defined(__LP32__)
#define MD5_LONG unsigned long
#elif defined(_CRAY) || defined(__ILP64__)
#define MD5_LONG unsigned long
#define MD5_LONG_LOG2 3
/*
* _CRAY note. I could declare short, but I have no idea what impact
* does it have on performance on none-T3E machines. I could declare
* int, but at least on C90 sizeof(int) can be chosen at compile time.
* So I've chosen long...
* <appro@fy.chalmers.se>
*/
#else
#define MD5_LONG unsigned int
#endif
#define MD5_CBLOCK 64
#define MD5_LBLOCK 16
#define MD5_BLOCK 16
#define MD5_LAST_BLOCK 56
#define MD5_LENGTH_BLOCK 8
#define MD5_LBLOCK (MD5_CBLOCK/4)
#define MD5_DIGEST_LENGTH 16
typedef struct MD5state_st
{
unsigned long A,B,C,D;
unsigned long Nl,Nh;
unsigned long data[MD5_LBLOCK];
MD5_LONG A,B,C,D;
MD5_LONG Nl,Nh;
MD5_LONG data[MD5_LBLOCK];
int num;
} MD5_CTX;
void MD5_Init(MD5_CTX *c);
void MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len);
void MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md);
void MD5_Transform(MD5_CTX *c, unsigned char *b);

View file

@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT;
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
# ifdef MD5_ASM
void md5_block_x86(MD5_CTX *c, unsigned long *p,int num);
# define md5_block md5_block_x86
# else
static void md5_block(MD5_CTX *c, unsigned long *p,int num);
# endif
void MD5_Init(MD5_CTX *c)
{
c->A=INIT_DATA_A;
@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c)
c->num=0;
}
void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len)
#ifndef md5_block_host_order
void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num)
{
register const unsigned char *data=_data;
register ULONG *p;
int sw,sc;
ULONG l;
if (len == 0) return;
l=(c->Nl+(len<<3))&0xffffffffL;
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
* Wei Dai <weidai@eskimo.com> for pointing it out. */
if (l < c->Nl) /* overflow */
c->Nh++;
c->Nh+=(len>>29);
c->Nl=l;
if (c->num != 0)
{
p=c->data;
sw=c->num>>2;
sc=c->num&0x03;
if ((c->num+len) >= MD5_CBLOCK)
{
l= p[sw];
p_c2l(data,l,sc);
p[sw++]=l;
for (; sw<MD5_LBLOCK; sw++)
{
c2l(data,l);
p[sw]=l;
}
len-=(MD5_CBLOCK-c->num);
md5_block(c,p,64);
c->num=0;
/* drop through and do the rest */
}
else
{
int ew,ec;
c->num+=(int)len;
if ((sc+len) < 4) /* ugly, add char's to a word */
{
l= p[sw];
p_c2l_p(data,l,sc,len);
p[sw]=l;
}
else
{
ew=(c->num>>2);
ec=(c->num&0x03);
l= p[sw];
p_c2l(data,l,sc);
p[sw++]=l;
for (; sw < ew; sw++)
{ c2l(data,l); p[sw]=l; }
if (ec)
{
c2l_p(data,l,ec);
p[sw]=l;
}
}
return;
}
}
/* we now can process the input data in blocks of MD5_CBLOCK
* chars and save the leftovers to c->data. */
#ifdef L_ENDIAN
if ((((unsigned long)data)%sizeof(ULONG)) == 0)
{
sw=(int)len/MD5_CBLOCK;
if (sw > 0)
{
sw*=MD5_CBLOCK;
md5_block(c,(ULONG *)data,sw);
data+=sw;
len-=sw;
}
}
#endif
p=c->data;
while (len >= MD5_CBLOCK)
{
#if defined(L_ENDIAN) || defined(B_ENDIAN)
if (p != (unsigned long *)data)
memcpy(p,data,MD5_CBLOCK);
data+=MD5_CBLOCK;
#ifdef B_ENDIAN
for (sw=(MD5_LBLOCK/4); sw; sw--)
{
Endian_Reverse32(p[0]);
Endian_Reverse32(p[1]);
Endian_Reverse32(p[2]);
Endian_Reverse32(p[3]);
p+=4;
}
#endif
#else
for (sw=(MD5_LBLOCK/4); sw; sw--)
{
c2l(data,l); *(p++)=l;
c2l(data,l); *(p++)=l;
c2l(data,l); *(p++)=l;
c2l(data,l); *(p++)=l;
}
#endif
p=c->data;
md5_block(c,p,64);
len-=MD5_CBLOCK;
}
sc=(int)len;
c->num=sc;
if (sc)
{
sw=sc>>2; /* words to copy */
#ifdef L_ENDIAN
p[sw]=0;
memcpy(p,data,sc);
#else
sc&=0x03;
for ( ; sw; sw--)
{ c2l(data,l); *(p++)=l; }
c2l_p(data,l,sc);
*p=l;
#endif
}
}
void MD5_Transform(MD5_CTX *c, unsigned char *b)
{
ULONG p[16];
#if !defined(L_ENDIAN)
ULONG *q;
int i;
#endif
#if defined(B_ENDIAN) || defined(L_ENDIAN)
memcpy(p,b,64);
#ifdef B_ENDIAN
q=p;
for (i=(MD5_LBLOCK/4); i; i--)
{
Endian_Reverse32(q[0]);
Endian_Reverse32(q[1]);
Endian_Reverse32(q[2]);
Endian_Reverse32(q[3]);
q+=4;
}
#endif
#else
q=p;
for (i=(MD5_LBLOCK/4); i; i--)
{
ULONG l;
c2l(b,l); *(q++)=l;
c2l(b,l); *(q++)=l;
c2l(b,l); *(q++)=l;
c2l(b,l); *(q++)=l;
}
#endif
md5_block(c,p,64);
}
#ifndef MD5_ASM
static void md5_block(MD5_CTX *c, register ULONG *X, int num)
{
register ULONG A,B,C,D;
register unsigned long A,B,C,D;
/*
* In case you wonder why A-D are declared as long and not
* as MD5_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
A=c->A;
B=c->B;
C=c->C;
D=c->D;
for (;;)
for (;num--;X+=HASH_LBLOCK)
{
/* Round 0 */
R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
@ -334,74 +176,127 @@ static void md5_block(MD5_CTX *c, register ULONG *X, int num)
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
A+=c->A&0xffffffffL;
B+=c->B&0xffffffffL;
c->A=A;
c->B=B;
C+=c->C&0xffffffffL;
D+=c->D&0xffffffffL;
c->C=C;
c->D=D;
X+=16;
num-=64;
if (num <= 0) break;
A = c->A += A;
B = c->B += B;
C = c->C += C;
D = c->D += D;
}
}
#endif
void MD5_Final(unsigned char *md, MD5_CTX *c)
#ifndef md5_block_data_order
void md5_block_data_order (MD5_CTX *c, const unsigned char *data, int num)
{
register int i,j;
register ULONG l;
register ULONG *p;
static unsigned char end[4]={0x80,0x00,0x00,0x00};
unsigned char *cp=end;
register unsigned long A,B,C,D,l;
/*
* In case you wonder why A-D are declared as long and not
* as MD5_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
MD5_LONG X[MD5_LBLOCK];
/*
* In case you wonder why don't I use c->data for this.
* RISCs usually have a handful of registers and if X is
* declared as automatic array good optimizing compiler
* shall accomodate at least part of it in register bank
* instead of memory.
*
* <appro@fy.chalmers.se>
*/
/* c->num should definitly have room for at least one more byte. */
p=c->data;
j=c->num;
i=j>>2;
A=c->A;
B=c->B;
C=c->C;
D=c->D;
/* purify often complains about the following line as an
* Uninitialized Memory Read. While this can be true, the
* following p_c2l macro will reset l when that case is true.
* This is because j&0x03 contains the number of 'valid' bytes
* already in p[i]. If and only if j&0x03 == 0, the UMR will
* occur but this is also the only time p_c2l will do
* l= *(cp++) instead of l|= *(cp++)
* Many thanks to Alex Tang <altitude@cic.net> for pickup this
* 'potential bug' */
#ifdef PURIFY
if ((j&0x03) == 0) p[i]=0;
#endif
l=p[i];
p_c2l(cp,l,j&0x03);
p[i]=l;
i++;
/* i is the next 'undefined word' */
if (c->num >= MD5_LAST_BLOCK)
for (;num--;)
{
for (; i<MD5_LBLOCK; i++)
p[i]=0;
md5_block(c,p,64);
i=0;
}
for (; i<(MD5_LBLOCK-2); i++)
p[i]=0;
p[MD5_LBLOCK-2]=c->Nl;
p[MD5_LBLOCK-1]=c->Nh;
md5_block(c,p,64);
cp=md;
l=c->A; l2c(l,cp);
l=c->B; l2c(l,cp);
l=c->C; l2c(l,cp);
l=c->D; l2c(l,cp);
HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l;
/* Round 0 */
R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l;
R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l;
R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l;
R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l;
R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l;
R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l;
R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l;
R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l;
R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l;
R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l;
R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l;
R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l;
R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l;
R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l;
R0(C,D,A,B,X[14],17,0xa679438eL);
R0(B,C,D,A,X[15],22,0x49b40821L);
/* Round 1 */
R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
R1(D,A,B,C,X[ 6], 9,0xc040b340L);
R1(C,D,A,B,X[11],14,0x265e5a51L);
R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
R1(D,A,B,C,X[10], 9,0x02441453L);
R1(C,D,A,B,X[15],14,0xd8a1e681L);
R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
R1(D,A,B,C,X[14], 9,0xc33707d6L);
R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
R1(B,C,D,A,X[ 8],20,0x455a14edL);
R1(A,B,C,D,X[13], 5,0xa9e3e905L);
R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
R1(C,D,A,B,X[ 7],14,0x676f02d9L);
R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
/* Round 2 */
R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
R2(D,A,B,C,X[ 8],11,0x8771f681L);
R2(C,D,A,B,X[11],16,0x6d9d6122L);
R2(B,C,D,A,X[14],23,0xfde5380cL);
R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
R2(B,C,D,A,X[10],23,0xbebfbc70L);
R2(A,B,C,D,X[13], 4,0x289b7ec6L);
R2(D,A,B,C,X[ 0],11,0xeaa127faL);
R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
R2(B,C,D,A,X[ 6],23,0x04881d05L);
R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
R2(D,A,B,C,X[12],11,0xe6db99e5L);
R2(C,D,A,B,X[15],16,0x1fa27cf8L);
R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
/* Round 3 */
R3(A,B,C,D,X[ 0], 6,0xf4292244L);
R3(D,A,B,C,X[ 7],10,0x432aff97L);
R3(C,D,A,B,X[14],15,0xab9423a7L);
R3(B,C,D,A,X[ 5],21,0xfc93a039L);
R3(A,B,C,D,X[12], 6,0x655b59c3L);
R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
R3(C,D,A,B,X[10],15,0xffeff47dL);
R3(B,C,D,A,X[ 1],21,0x85845dd1L);
R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
R3(C,D,A,B,X[ 6],15,0xa3014314L);
R3(B,C,D,A,X[13],21,0x4e0811a1L);
R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
R3(D,A,B,C,X[11],10,0xbd3af235L);
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
/* clear stuff, md5_block may be leaving some stuff on the stack
* but I'm not worried :-) */
c->num=0;
/* memset((char *)&c,0,sizeof(c));*/
A = c->A += A;
B = c->B += B;
C = c->C += C;
D = c->D += D;
}
}
#endif
#ifdef undef
int printit(unsigned long *l)

View file

@ -56,98 +56,79 @@
* [including the GNU Public Licence.]
*/
/* On sparc, this actually slows things down :-( */
#if defined(sun)
#undef B_ENDIAN
#endif
#include <stdlib.h>
#include <string.h>
#include <openssl/md5.h>
#define ULONG unsigned long
#define UCHAR unsigned char
#define UINT unsigned int
#undef c2l
#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<<24))
#undef p_c2l
#define p_c2l(c,l,n) { \
switch (n) { \
case 0: l =((unsigned long)(*((c)++))); \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
case 3: l|=((unsigned long)(*((c)++)))<<24; \
} \
}
/* NOTE the pointer is not incremented at the end of this */
#undef c2l_p
#define c2l_p(c,l,n) { \
l=0; \
(c)+=n; \
switch (n) { \
case 3: l =((unsigned long)(*(--(c))))<<16; \
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
case 1: l|=((unsigned long)(*(--(c)))) ; \
} \
}
#undef p_c2l_p
#define p_c2l_p(c,l,sc,len) { \
switch (sc) \
{ \
case 0: l =((unsigned long)(*((c)++))); \
if (--len == 0) break; \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
if (--len == 0) break; \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
} \
}
#undef l2c
#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>>24)&0xff))
/* NOTE - c is not incremented as per l2c */
#undef l2cn
#define l2cn(l1,l2,c,n) { \
c+=n; \
switch (n) { \
case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
} \
}
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
#if defined(WIN32)
/* 5 instructions with rotate instruction, else 9 */
#define Endian_Reverse32(a) \
{ \
unsigned long l=(a); \
(a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
}
#else
/* 6 instructions with rotate instruction, else 8 */
#define Endian_Reverse32(a) \
{ \
unsigned long l=(a); \
l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
(a)=ROTATE(l,16L); \
}
#ifndef MD5_LONG_LOG2
#define MD5_LONG_LOG2 2 /* default to 32 bits */
#endif
#ifdef MD5_ASM
# if defined(__i386) || defined(WIN32)
# define md5_block_host_order md5_block_asm_host_order
# elif defined(__sparc) && defined(ULTRASPARC)
void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
# endif
#endif
void md5_block_host_order (MD5_CTX *c, const MD5_LONG *p,int num);
void md5_block_data_order (MD5_CTX *c, const unsigned char *p,int num);
#if defined(__i386)
/*
* *_block_host_order is expected to handle aligned data while
* *_block_data_order - unaligned. As algorithm and host (x86)
* are in this case of the same "endianess" these two are
* otherwise indistinguishable. But normally you don't want to
* call the same function because unaligned access in places
* where alignment is expected is usually a "Bad Thing". Indeed,
* on RISCs you get punished with BUS ERROR signal or *severe*
* performance degradation. Intel CPUs are in turn perfectly
* capable of loading unaligned data without such drastic side
* effect. Yes, they say it's slower than aligned load, but no
* exception is generated and therefore performance degradation
* is *incomparable* with RISCs. What we should weight here is
* costs of unaligned access against costs of aligning data.
* According to my measurements allowing unaligned access results
* in ~9% performance improvement on Pentium II operating at
* 266MHz. I won't be surprised if the difference will be higher
* on faster systems:-)
*
* <appro@fy.chalmers.se>
*/
#define md5_block_data_order md5_block_host_order
#endif
#define DATA_ORDER_IS_LITTLE_ENDIAN
#define HASH_LONG MD5_LONG
#define HASH_LONG_LOG2 MD5_LONG_LOG2
#define HASH_CTX MD5_CTX
#define HASH_CBLOCK MD5_CBLOCK
#define HASH_LBLOCK MD5_LBLOCK
#define HASH_UPDATE MD5_Update
#define HASH_TRANSFORM MD5_Transform
#define HASH_FINAL MD5_Final
#define HASH_BLOCK_HOST_ORDER md5_block_host_order
#if defined(B_ENDIAN) || defined(md5_block_data_order)
#define HASH_BLOCK_DATA_ORDER md5_block_data_order
/*
* Little-endians (Intel and Alpha) feel better without this.
* It looks like memcpy does better job than generic
* md5_block_data_order on copying-n-aligning input data.
* But franlky speaking I didn't expect such result on Alpha.
* On the other hand I've got this with egcs-1.0.2 and if
* program is compiled with another (better?) compiler it
* might turn out other way around.
*
* <appro@fy.chalmers.se>
*/
#endif
#include "../md32_common.h"
/*
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
@ -162,14 +143,6 @@
#define H(b,c,d) ((b) ^ (c) ^ (d))
#define I(b,c,d) (((~(d)) | (b)) ^ (c))
#undef ROTATE
#if defined(WIN32)
#define ROTATE(a,n) _lrotl(a,n)
#else
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#endif
#define R0(a,b,c,d,k,s,t) { \
a+=((k)+(t)+F((b),(c),(d))); \
a=ROTATE(a,s); \

View file

@ -57,7 +57,8 @@
*/
#include <stdio.h>
#include "md5_locl.h"
#include <string.h>
#include <openssl/md5.h>
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md)
{