Remove some inline assembler and non-standard constructs.

The goal is to minimize maintenance burden by eliminating somewhat
obscure platform-specific tweaks that are not viewed as critical for
contemporary applications. This affects Camellia and digest
implementations that rely on md32_common.h, MD4, MD5, SHA1, SHA256.
SHA256 is the only one that can be viewed as critical, but given
the assembly coverage, the omission is considered appropriate.

Reviewed-by: Rich Salz <rsalz@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6508)
This commit is contained in:
Andy Polyakov 2018-06-18 12:50:53 +02:00
parent a766aab93a
commit 9be083ad36
2 changed files with 9 additions and 167 deletions

View file

@ -44,51 +44,11 @@
#include <string.h>
#include <stdlib.h>
/* 32-bit rotations */
#if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
# if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
# define RightRotate(x, s) _lrotr(x, s)
# define LeftRotate(x, s) _lrotl(x, s)
# if _MSC_VER >= 1400
# define SWAP(x) _byteswap_ulong(x)
# else
# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
# endif
# define GETU32(p) SWAP(*((u32 *)(p)))
# define PUTU32(p,v) (*((u32 *)(p)) = SWAP((v)))
# elif defined(__GNUC__) && __GNUC__>=2
# if defined(__i386) || defined(__x86_64)
# define RightRotate(x,s) ({u32 ret; asm ("rorl %1,%0":"=r"(ret):"I"(s),"0"(x):"cc"); ret; })
# define LeftRotate(x,s) ({u32 ret; asm ("roll %1,%0":"=r"(ret):"I"(s),"0"(x):"cc"); ret; })
# if defined(B_ENDIAN) /* stratus.com does it */
# define GETU32(p) (*(u32 *)(p))
# define PUTU32(p,v) (*(u32 *)(p)=(v))
# else
# define GETU32(p) ({u32 r=*(const u32 *)(p); asm("bswapl %0":"=r"(r):"0"(r)); r; })
# define PUTU32(p,v) ({u32 r=(v); asm("bswapl %0":"=r"(r):"0"(r)); *(u32 *)(p)=r; })
# endif
# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
# define LeftRotate(x,s) ({u32 ret; asm ("rlwinm %0,%1,%2,0,31":"=r"(ret):"r"(x),"I"(s)); ret; })
# define RightRotate(x,s) LeftRotate(x,(32-s))
# elif defined(__s390x__)
# define LeftRotate(x,s) ({u32 ret; asm ("rll %0,%1,%2":"=r"(ret):"r"(x),"I"(s)); ret; })
# define RightRotate(x,s) LeftRotate(x,(32-s))
# define GETU32(p) (*(u32 *)(p))
# define PUTU32(p,v) (*(u32 *)(p)=(v))
# endif
# endif
#endif
#define RightRotate(x, s) ( ((x) >> (s)) + ((x) << (32 - s)) )
#define LeftRotate(x, s) ( ((x) << (s)) + ((x) >> (32 - s)) )
#if !defined(RightRotate) && !defined(LeftRotate)
# define RightRotate(x, s) ( ((x) >> (s)) + ((x) << (32 - s)) )
# define LeftRotate(x, s) ( ((x) << (s)) + ((x) >> (32 - s)) )
#endif
#if !defined(GETU32) && !defined(PUTU32)
# define GETU32(p) (((u32)(p)[0] << 24) ^ ((u32)(p)[1] << 16) ^ ((u32)(p)[2] << 8) ^ ((u32)(p)[3]))
# define PUTU32(p,v) ((p)[0] = (u8)((v) >> 24), (p)[1] = (u8)((v) >> 16), (p)[2] = (u8)((v) >> 8), (p)[3] = (u8)(v))
#endif
#define GETU32(p) (((u32)(p)[0] << 24) ^ ((u32)(p)[1] << 16) ^ ((u32)(p)[2] << 8) ^ ((u32)(p)[3]))
#define PUTU32(p,v) ((p)[0] = (u8)((v) >> 24), (p)[1] = (u8)((v) >> 16), (p)[2] = (u8)((v) >> 8), (p)[3] = (u8)(v))
/* S-box data */
#define SBOX1_1110 Camellia_SBOX[0]

View file

@ -93,149 +93,31 @@
# error "HASH_BLOCK_DATA_ORDER must be defined!"
#endif
/*
* Engage compiler specific rotate intrinsic function if available.
*/
#undef ROTATE
#ifndef PEDANTIC
# if defined(_MSC_VER)
# define ROTATE(a,n) _lrotl(a,n)
# elif defined(__ICC)
# define ROTATE(a,n) _rotl(a,n)
# elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
/*
* Some GNU C inline assembler templates. Note that these are
* rotates by *constant* number of bits! But that's exactly
* what we need here...
*/
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ( \
"roll %1,%0" \
: "=r"(ret) \
: "I"(n), "0"((unsigned int)(a)) \
: "cc"); \
ret; \
})
# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ( \
"rlwinm %0,%1,%2,0,31" \
: "=r"(ret) \
: "r"(a), "I"(n)); \
ret; \
})
# elif defined(__s390x__)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ("rll %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "I"(n)); \
ret; \
})
# endif
# endif
#endif /* PEDANTIC */
#ifndef ROTATE
# define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#endif
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
# ifndef PEDANTIC
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
# if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
(defined(__x86_64) || defined(__x86_64__))
# if !defined(B_ENDIAN)
/*
* This gives ~30-40% performance improvement in SHA-256 compiled
* with gcc [on P4]. Well, first macro to be frank. We can pull
* this trick on x86* platforms only, because these CPUs can fetch
* unaligned data without raising an exception.
*/
# define HOST_c2l(c,l) ({ unsigned int r=*((const unsigned int *)(c)); \
asm ("bswapl %0":"=r"(r):"0"(r)); \
(c)+=4; (l)=r; })
# define HOST_l2c(l,c) ({ unsigned int r=(l); \
asm ("bswapl %0":"=r"(r):"0"(r)); \
*((unsigned int *)(c))=r; (c)+=4; r; })
# endif
# elif defined(__aarch64__)
# if defined(__BYTE_ORDER__)
# if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
# define HOST_c2l(c,l) ({ unsigned int r; \
asm ("rev %w0,%w1" \
:"=r"(r) \
:"r"(*((const unsigned int *)(c))));\
(c)+=4; (l)=r; })
# define HOST_l2c(l,c) ({ unsigned int r; \
asm ("rev %w0,%w1" \
:"=r"(r) \
:"r"((unsigned int)(l)));\
*((unsigned int *)(c))=r; (c)+=4; r; })
# elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
# endif
# endif
# endif
# endif
# if defined(__s390__) || defined(__s390x__)
# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
# endif
# endif
# ifndef HOST_c2l
# define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++))) ) )
# endif
# ifndef HOST_l2c
# define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l) )&0xff), \
l)
# endif
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
# ifndef PEDANTIC
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
# if defined(__s390x__)
# define HOST_c2l(c,l) ({ asm ("lrv %0,%1" \
:"=d"(l) :"m"(*(const unsigned int *)(c)));\
(c)+=4; (l); })
# define HOST_l2c(l,c) ({ asm ("strv %1,%0" \
:"=m"(*(unsigned int *)(c)) :"d"(l));\
(c)+=4; (l); })
# endif
# endif
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# ifndef B_ENDIAN
/* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
# endif
# endif
# endif
# ifndef HOST_c2l
# define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<<24) )
# endif
# ifndef HOST_l2c
# define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>>24)&0xff), \
l)
# endif
#endif