openssl/crypto/ec/curve448/constant_time.h

363 lines
12 KiB
C
Raw Normal View History

/**
* @file constant_time.h
* @copyright
* Copyright (c) 2014 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg
*
* @brief Constant-time routines.
*/
#ifndef __CONSTANT_TIME_H__
#define __CONSTANT_TIME_H__ 1
#include "word.h"
#include <string.h>
/*
* Constant-time operations on hopefully-compile-time-sized memory
* regions. Needed for flexibility / demagication: not all fields
* have sizes which are multiples of the vector width, necessitating
* a change from the Ed448 versions.
*
* These routines would be much simpler to define at the byte level,
* but if not vectorized they would be a significant fraction of the
* runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
* signing time, vs 6% on Haswell with its fancy AVX2 vectors.
*
* If the compiler could do a good job of autovectorizing the code,
* we could just leave it with the byte definition. But that's unlikely
* on most deployed compilers, especially if you consider that pcmpeq[size]
* is much faster than moving a scalar to the vector unit (which is what
* a naive autovectorizer will do with constant_time_lookup on Intel).
*
* Instead, we're putting our trust in the loop unroller and unswitcher.
*/
/**
* Unaligned big (vector?) register.
*/
typedef struct {
big_register_t unaligned;
} __attribute__((packed)) unaligned_br_t;
/**
* Unaligned word register, for architectures where that matters.
*/
typedef struct {
word_t unaligned;
} __attribute__((packed)) unaligned_word_t;
/**
* @brief Constant-time conditional swap.
*
* If doswap, then swap elem_bytes between *a and *b.
*
* *a and *b must not alias. Also, they must be at least as aligned
* as their sizes, if the CPU cares about that sort of thing.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_cond_swap (
void *__restrict__ a_,
void *__restrict__ b_,
word_t elem_bytes,
mask_t doswap
) {
word_t k;
unsigned char *a = (unsigned char *)a_;
unsigned char *b = (unsigned char *)b_;
big_register_t br_mask = br_set_to_mask(doswap);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
big_register_t xor =
((unaligned_br_t*)(&a[k]))->unaligned
^ ((unaligned_br_t*)(&b[k]))->unaligned;
xor &= br_mask;
((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
big_register_t xor =
*((big_register_t*)(&a[k]))
^ *((big_register_t*)(&b[k]));
xor &= br_mask;
*((big_register_t*)(&a[k])) ^= xor;
*((big_register_t*)(&b[k])) ^= xor;
}
}
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
word_t xor =
((unaligned_word_t*)(&a[k]))->unaligned
^ ((unaligned_word_t*)(&b[k]))->unaligned;
xor &= doswap;
((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
word_t xor =
*((word_t*)(&a[k]))
^ *((word_t*)(&b[k]));
xor &= doswap;
*((word_t*)(&a[k])) ^= xor;
*((word_t*)(&b[k])) ^= xor;
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
unsigned char xor = a[k] ^ b[k];
xor &= doswap;
a[k] ^= xor;
b[k] ^= xor;
}
}
}
/**
* @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
*
* The table must be at least as aligned as elem_bytes. The output must be word aligned,
* and if the input size is vector aligned it must also be vector aligned.
*
* The table and output must not alias.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_lookup (
void *__restrict__ out_,
const void *table_,
word_t elem_bytes,
word_t n_table,
word_t idx
) {
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
/* Can't do pointer arithmetic on void* */
unsigned char *out = (unsigned char *)out_;
const unsigned char *table = (const unsigned char *)table_;
word_t j,k;
memset(out, 0, elem_bytes);
for (j=0; j<n_table; j++, big_i-=big_one) {
big_register_t br_mask = br_is_zero(big_i);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t *)(out+k))->unaligned
|= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
} else {
/* aligned */
*(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
}
}
word_t mask = word_is_zero(idx^j);
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* input unaligned, output aligned */
*(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
} else {
/* aligned */
*(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
out[k] |= mask & table[k+j*elem_bytes];
}
}
}
}
/**
* @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
*
* The table must be at least as aligned as elem_bytes. The input must be word aligned,
* and if the output size is vector aligned it must also be vector aligned.
*
* The table and input must not alias.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_insert (
void *__restrict__ table_,
const void *in_,
word_t elem_bytes,
word_t n_table,
word_t idx
) {
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
/* Can't do pointer arithmetic on void* */
const unsigned char *in = (const unsigned char *)in_;
unsigned char *table = (unsigned char *)table_;
word_t j,k;
for (j=0; j<n_table; j++, big_i-=big_one) {
big_register_t br_mask = br_is_zero(big_i);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
= ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
| ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
} else {
/* aligned */
*(big_register_t*)(&table[k+j*elem_bytes])
= ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
| ( *(const big_register_t *)(in+k) & br_mask );
}
}
word_t mask = word_is_zero(idx^j);
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* output unaligned, input aligned */
((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
= ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
| ( *(const word_t *)(in+k) & mask );
} else {
/* aligned */
*(word_t*)(&table[k+j*elem_bytes])
= ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
| ( *(const word_t *)(in+k) & mask );
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
table[k+j*elem_bytes]
= ( table[k+j*elem_bytes] & ~mask )
| ( in[k] & mask );
}
}
}
}
/**
* @brief Constant-time a = b&mask.
*
* The input and output must be at least as aligned as elem_bytes.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_mask (
void * a_,
const void *b_,
word_t elem_bytes,
mask_t mask
) {
unsigned char *a = (unsigned char *)a_;
const unsigned char *b = (const unsigned char *)b_;
word_t k;
big_register_t br_mask = br_set_to_mask(mask);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
} else {
/* aligned */
*(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
}
}
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
} else {
/* aligned */
*(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
a[k] = mask & b[k];
}
}
}
/**
* @brief Constant-time a = mask ? bTrue : bFalse.
*
* The input and output must be at least as aligned as alignment_bytes
* or their size, whichever is smaller.
*
* Note that the output is not __restrict__, but if it overlaps either
* input, it must be equal and not partially overlap.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_select (
void *a_,
const void *bFalse_,
const void *bTrue_,
word_t elem_bytes,
mask_t mask,
size_t alignment_bytes
) {
unsigned char *a = (unsigned char *)a_;
const unsigned char *bTrue = (const unsigned char *)bTrue_;
const unsigned char *bFalse = (const unsigned char *)bFalse_;
alignment_bytes |= elem_bytes;
word_t k;
big_register_t br_mask = br_set_to_mask(mask);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (alignment_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&a[k]))->unaligned =
( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
| (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
} else {
/* aligned */
*(big_register_t *)(a+k) =
( br_mask & *(const big_register_t*)(&bTrue [k]))
| (~br_mask & *(const big_register_t*)(&bFalse[k]));
}
}
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (alignment_bytes % sizeof(word_t)) {
/* unaligned */
((unaligned_word_t*)(&a[k]))->unaligned =
( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
| (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
} else {
/* aligned */
*(word_t *)(a+k) =
( mask & *(const word_t*)(&bTrue [k]))
| (~mask & *(const word_t*)(&bFalse[k]));
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
}
}
}
#endif /* __CONSTANT_TIME_H__ */