Manual formatting tweaks to Curve448 code

Following running openssl-format-source there were a lot of manual tweaks
that were requried.

Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
(Merged from https://github.com/openssl/openssl/pull/5105)
This commit is contained in:
Matt Caswell 2017-12-04 13:30:53 +00:00
parent 205fd63881
commit 8d55f844b0
24 changed files with 1654 additions and 2709 deletions

View file

@ -24,13 +24,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
{
const uint32_t *a = as->limb, *b = bs->limb;
uint32_t *c = cs->limb;
uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
uint32_t mask = (1 << 28) - 1;
uint32_t aa[8], bb[8];
int i, j;
for (i = 0; i < 8; i++) {
aa[i] = a[i] + a[i + 8];
bb[i] = b[i] + b[i + 8];

View file

@ -39,9 +39,8 @@ void gf_bias(gf a, int amt)
unsigned int i;
uint32_t co1 = ((1 << 28) - 1) * amt, co2 = co1 - amt;
for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++) {
for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++)
a->limb[i] += (i == sizeof(*a) / sizeof(a->limb[0]) / 2) ? co2 : co1;
}
}
void gf_weak_reduce(gf a)
@ -51,8 +50,7 @@ void gf_weak_reduce(gf a)
unsigned int i;
a->limb[8] += tmp;
for (i = 15; i > 0; i--) {
for (i = 15; i > 0; i--)
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
}
a->limb[0] = (a->limb[0] & mask) + tmp;
}

View file

@ -19,6 +19,7 @@ static __inline__ __attribute((always_inline, unused))
uint32_t word_is_zero(uint32_t a)
{
uint32_t ret;
asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
return ret;
}

View file

@ -19,9 +19,10 @@ static inline void __attribute__ ((gnu_inline, always_inline))
#ifdef __ARMEL__
uint32_t lo = *acc, hi = (*acc) >> 32;
__asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
[hi] "+&r"(hi)
:[a] "r"(a),[b] "r"(b));
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
: [lo]"+&r"(lo), [hi]"+&r"(hi)
: [a]"r"(a), [b]"r"(b));
*acc = lo + (((uint64_t)hi) << 32);
#else
@ -35,9 +36,11 @@ static inline void __attribute__ ((gnu_inline, always_inline))
#ifdef __ARMEL__
uint32_t lo = *acc, hi = (*acc) >> 32;
__asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
[hi] "+&r"(hi)
:[a] "r"(a),[b] "r"(2 * b));
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
: [lo]"+&r"(lo), [hi]"+&r"(hi)
: [a]"r"(a), [b]"r"(2 * b));
*acc = lo + (((uint64_t)hi) << 32);
#else
@ -51,9 +54,9 @@ static inline void __attribute__ ((gnu_inline, always_inline))
#ifdef __ARMEL__
uint32_t lo, hi;
__asm__ __volatile__("smull %[lo], %[hi], %[a], %[b]":[lo] "=&r"(lo),
[hi] "=&r"(hi)
:[a] "r"(a),[b] "r"(b));
__asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
: [lo]"=&r"(lo), [hi]"=&r"(hi)
: [a]"r"(a), [b]"r"(b));
*acc = lo + (((uint64_t)hi) << 32);
#else
@ -68,8 +71,8 @@ static inline void __attribute__ ((gnu_inline, always_inline))
uint32_t lo, hi;
__asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
: [lo] "=&r"(lo),[hi] "=&r"(hi)
: [a] "r"(a),[b] "r"(2 * b));
: [lo]"=&r"(lo), [hi]"=&r"(hi)
: [a]"r"(a), [b]"r"(2*b));
*acc = lo + (((uint64_t)hi) << 32);
#else
@ -729,16 +732,14 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
{
uint32_t mask = (1ull << 28) - 1;
assert(b <= mask);
const uint32_t *a = as->limb;
uint32_t *c = cs->limb;
uint64_t accum0, accum8;
int i;
uint32_t c0, c8, n0, n8;
assert(b <= mask);
c0 = a[0];
c8 = a[8];
accum0 = widemul(b, c0);

View file

@ -23,10 +23,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
((uint32xn_t *) out)[i] =
((const uint32xn_t *)a)[i] + ((const uint32xn_t *)b)[i];
}
/*
* for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
* out->limb[i] = a->limb[i] + b->limb[i]; }
*/
}
void gf_sub_RAW(gf out, const gf a, const gf b)
@ -35,10 +31,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
((uint32xn_t *) out)[i] =
((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
}
/*
* for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
* out->limb[i] = a->limb[i] - b->limb[i]; }
*/
}
void gf_bias(gf a, int amt)
@ -47,6 +39,7 @@ void gf_bias(gf a, int amt)
uint32x4_t lo = { co1, co1, co1, co1 }, hi = {
co2, co1, co1, co1};
uint32x4_t *aa = (uint32x4_t *) a;
aa[0] += lo;
aa[1] += lo;
aa[2] += hi;
@ -57,6 +50,7 @@ void gf_weak_reduce(gf a)
{
uint64_t mask = (1ull << 28) - 1;
uint64_t tmp = a->limb[15] >> 28;
a->limb[8] += tmp;
for (unsigned int i = 15; i > 0; i--) {
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);

File diff suppressed because it is too large Load diff

View file

@ -15,11 +15,13 @@
#define USE_NEON_PERM 1
#define LIMBHI(x) ((x##ull)>>28)
#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) { \
{ \
LIMBLO(a), LIMBLO(e), LIMBHI(a), LIMBHI(e), LIMBLO(b), LIMBLO(f), \
LIMBHI(b), LIMBHI(f), LIMBLO(c), LIMBLO(g), LIMBHI(c), LIMBHI(g), \
LIMBLO(d), LIMBLO(h), LIMBHI(d), LIMBHI(h) \
} \
}
#define LIMB_PLACE_VALUE(i) 28
@ -37,17 +39,13 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
((uint32xn_t *) out)[i] =
((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
}
/*
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
* out->limb[i] = a->limb[i] - b->limb[i]; }
*/
}
void gf_bias(gf a, int amt)
{
uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt;
uint32x4_t lo = { co1, co2, co1, co1 }, hi = {
co1, co1, co1, co1};
uint32x4_t lo = { co1, co2, co1, co1 };
uint32x4_t hi = { co1, co1, co1, co1 };
uint32x4_t *aa = (uint32x4_t *) a;
aa[0] += lo;
aa[1] += hi;
@ -57,13 +55,11 @@ void gf_bias(gf a, int amt)
void gf_weak_reduce(gf a)
{
uint32x2_t *aa = (uint32x2_t *) a;
uint32x2_t vmask = { (1ull << 28) - 1, (1ull << 28) - 1};
uint32x2_t vm2 = { 0, -1}, tmp = vshr_n_u32(aa[7], 28);
uint32x2_t *aa = (uint32x2_t *) a, vmask = {
(1ull << 28) - 1, (1ull << 28) - 1}, vm2 = {
0, -1}, tmp = vshr_n_u32(aa[7], 28);
for (unsigned int i = 7; i >= 1; i--) {
for (unsigned int i = 7; i >= 1; i--)
aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28);
}
aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2);
}

View file

@ -15,13 +15,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
{
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;
__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull << 56) - 1;
uint64_t aa[4], bb[4], bbb[4];
unsigned int i;
for (i = 0; i < 4; i++) {
aa[i] = a[i] + a[i + 4];
bb[i] = b[i] + b[i + 4];
@ -177,11 +175,10 @@ void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
{
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
__uint128_t accum0 = 0, accum4 = 0;
uint64_t mask = (1ull << 56) - 1;
int i;
for (i = 0; i < 4; i++) {
accum0 += widemul(b, a[i]);
accum4 += widemul(b, a[i + 4]);
@ -204,10 +201,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
{
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull << 56) - 1;
uint64_t aa[4];
/* For some reason clang doesn't vectorize this without prompting? */

View file

@ -17,18 +17,17 @@
void gf_add_RAW(gf out, const gf a, const gf b)
{
for (unsigned int i = 0; i < 8; i++) {
for (unsigned int i = 0; i < 8; i++)
out->limb[i] = a->limb[i] + b->limb[i];
}
gf_weak_reduce(out);
}
void gf_sub_RAW(gf out, const gf a, const gf b)
{
uint64_t co1 = ((1ull << 56) - 1) * 2, co2 = co1 - 2;
for (unsigned int i = 0; i < 8; i++) {
for (unsigned int i = 0; i < 8; i++)
out->limb[i] = a->limb[i] - b->limb[i] + ((i == 4) ? co2 : co1);
}
gf_weak_reduce(out);
}
@ -42,9 +41,9 @@ void gf_weak_reduce(gf a)
{
uint64_t mask = (1ull << 56) - 1;
uint64_t tmp = a->limb[7] >> 56;
a->limb[4] += tmp;
for (unsigned int i = 7; i > 0; i--) {
for (unsigned int i = 7; i > 0; i--)
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
}
a->limb[0] = (a->limb[0] & mask) + tmp;
}

View file

@ -10,286 +10,323 @@
* Originally written by Mike Hamburg
*/
#ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
# define __ARCH_X86_64_ARCH_INTRINSICS_H__
#define __ARCH_X86_64_ARCH_INTRINSICS_H__
# define ARCH_WORD_BITS 64
#define ARCH_WORD_BITS 64
# include <openssl/e_os2.h>
#include <openssl/e_os2.h>
/* FUTURE: autogenerate */
static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
{
uint64_t c, d;
# ifndef __BMI2__
#ifndef __BMI2__
__asm__ volatile
("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
:[b] "m"(*b),[a] "m"(*a)
:"cc");
# else
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=&a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
#else
__asm__ volatile
("movq %[a], %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
:[b] "m"(*b),[a] "m"(*a)
:"rdx");
# endif
return (((__uint128_t) (d)) << 64) | c;
("movq %[a], %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
#endif
return (((__uint128_t)(d)) << 64) | c;
}
static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
{
uint64_t c, d;
# ifndef __BMI2__
#ifndef __BMI2__
__asm__ volatile
("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
:[b] "m"(*b),[a] "r"(a)
:"cc");
# else
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=&a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"r"(a)
: "cc");
#else
__asm__ volatile
("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
:[b] "m"(*b),[a] "d"(a));
# endif
return (((__uint128_t) (d)) << 64) | c;
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"d"(a));
#endif
return (((__uint128_t)(d)) << 64) | c;
}
static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
{
uint64_t c, d;
# ifndef __BMI2__
#ifndef __BMI2__
__asm__ volatile
("mulq %[b];":[c] "=a"(c),[d] "=d"(d)
:[b] "r"(b), "a"(a)
:"cc");
# else
("mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"r"(b), "a"(a)
: "cc");
#else
__asm__ volatile
("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
:[b] "r"(b),[a] "d"(a));
# endif
return (((__uint128_t) (d)) << 64) | c;
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"r"(b), [a]"d"(a));
#endif
return (((__uint128_t)(d)) << 64) | c;
}
static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
{
uint64_t c, d;
# ifndef __BMI2__
#ifndef __BMI2__
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; " "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
:[b] "m"(*b),[a] "m"(*a)
:"cc");
# else
"addq %%rax, %%rax; "
"mulq %[b];"
: [c]"=&a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
#else
__asm__ volatile
("movq %[a], %%rdx;"
"leaq (,%%rdx,2), %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
:[b] "m"(*b),[a] "m"(*a)
:"rdx");
# endif
return (((__uint128_t) (d)) << 64) | c;
"leaq (,%%rdx,2), %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
#endif
return (((__uint128_t)(d)) << 64) | c;
}
static __inline__ void mac(__uint128_t * acc, const uint64_t *a,
static __inline__ void mac(__uint128_t *acc, const uint64_t *a,
const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; ":[c] "=&r"(c),[d] "=&r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
# else
"adcq %[d], %[hi]; "
: [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rax", "rdx", "cc");
# endif
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
*acc = (((__uint128_t)(hi)) << 64) | lo;
}
static __inline__ void macac(__uint128_t * acc, __uint128_t * acc2,
static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2,
const uint64_t *a, const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
"addq %[c], %[lo2]; "
"adcq %[d], %[hi2]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi),
[lo2] "+r"(lo2),[hi2] "+r"(hi2)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
# else
"adcq %[d], %[hi2]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
"addq %%rax, %[lo2]; "
"adcq %%rdx, %[hi2]; ":[lo] "+r"(lo),[hi] "+r"(hi),[lo2] "+r"(lo2),
[hi2] "+r"(hi2)
:[b] "m"(*b),[a] "m"(*a)
:"rax", "rdx", "cc");
# endif
"adcq %%rdx, %[hi2]; "
: [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
*acc2 = (((__uint128_t) (hi2)) << 64) | lo2;
*acc = (((__uint128_t)(hi)) << 64) | lo;
*acc2 = (((__uint128_t)(hi2)) << 64) | lo2;
}
static __inline__ void mac_rm(__uint128_t * acc, uint64_t a, const uint64_t *b)
static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "d"(a)
:"cc");
# else
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "r"(a)
:"rax", "rdx", "cc");
# endif
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"r"(a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
*acc = (((__uint128_t)(hi)) << 64) | lo;
}
static __inline__ void mac_rr(__uint128_t * acc, uint64_t a, const uint64_t b)
static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "r"(b),[a] "d"(a)
:"cc");
# else
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"r"(b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi), "+a"(a)
:[b] "r"(b)
:"rdx", "cc");
# endif
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
: [b]"r"(b)
: "rdx", "cc");
#endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
*acc = (((__uint128_t)(hi)) << 64) | lo;
}
static __inline__ void mac2(__uint128_t * acc, const uint64_t *a,
static __inline__ void mac2(__uint128_t *acc, const uint64_t *a,
const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
# else
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rax", "rdx", "cc");
# endif
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
*acc = (((__uint128_t)(hi)) << 64) | lo;
}
static __inline__ void msb(__uint128_t * acc, const uint64_t *a,
static __inline__ void msb(__uint128_t *acc, const uint64_t *a,
const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
# else
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rax", "rdx", "cc");
# endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi)) << 64) | lo;
}
static __inline__ void msb2(__uint128_t * acc, const uint64_t *a,
static __inline__ void msb2(__uint128_t *acc, const uint64_t *a,
const uint64_t *b)
{
uint64_t lo = *acc, hi = *acc >> 64;
# ifdef __BMI2__
uint64_t c, d;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
# else
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rax", "rdx", "cc");
# endif
*acc = (((__uint128_t) (hi)) << 64) | lo;
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}
static __inline__ void mrs(__uint128_t * acc, const uint64_t *a,
static __inline__ void mrs(__uint128_t *acc, const uint64_t *a,
const uint64_t *b)
{
uint64_t c, d, lo = *acc, hi = *acc >> 64;
uint64_t c,d, lo = *acc, hi = *acc >> 64;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[lo], %[c]; "
"sbbq %[hi], %[d]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
:[b] "m"(*b),[a] "m"(*a)
:"rdx", "cc");
*acc = (((__uint128_t) (d)) << 64) | c;
"sbbq %[hi], %[d]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
*acc = (((__uint128_t)(d)) << 64) | c;
}
static __inline__ uint64_t word_is_zero(uint64_t x)
{
__asm__ volatile ("neg %0; sbb %0, %0;":"+r" (x));
__asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
return ~x;
}

View file

@ -16,10 +16,8 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
{
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;
__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull << 56) - 1;
uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;
/* For some reason clang doesn't vectorize this without prompting? */
@ -202,10 +200,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
{
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull << 56) - 1;
uint64_t aa[4] VECTOR_ALIGNED;
/* For some reason clang doesn't vectorize this without prompting? */

View file

@ -20,10 +20,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
((uint64xn_t *) out)[i] =
((const uint64xn_t *)a)[i] + ((const uint64xn_t *)b)[i];
}
/*
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
* out->limb[i] = a->limb[i] + b->limb[i]; }
*/
}
void gf_sub_RAW(gf out, const gf a, const gf b)
@ -32,10 +28,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
((uint64xn_t *) out)[i] =
((const uint64xn_t *)a)[i] - ((const uint64xn_t *)b)[i];
}
/*
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
* out->limb[i] = a->limb[i] - b->limb[i]; }
*/
}
void gf_bias(gf a, int amt)
@ -68,6 +60,7 @@ void gf_weak_reduce(gf a)
/* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull << 56) - 1;
uint64_t tmp = a->limb[7] >> 56;
a->limb[4] += tmp;
for (unsigned int i = 7; i > 0; i--) {
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);

View file

@ -36,32 +36,32 @@
* Instead, we're putting our trust in the loop unroller and unswitcher.
*/
/**
/*
* Unaligned big (vector?) register.
*/
typedef struct {
big_register_t unaligned;
} __attribute__ ((packed)) unaligned_br_t;
/**
/*
* Unaligned word register, for architectures where that matters.
*/
typedef struct {
word_t unaligned;
} __attribute__ ((packed)) unaligned_word_t;
/**
* @brief Constant-time conditional swap.
/*
* Constant-time conditional swap.
*
* If doswap, then swap elem_bytes between *a and *b.
*
* *a and *b must not alias. Also, they must be at least as aligned
* as their sizes, if the CPU cares about that sort of thing.
*/
static __inline__ void
__attribute__ ((unused, always_inline))
constant_time_cond_swap(void *__restrict__ a_,
void *__restrict__ b_, word_t elem_bytes, mask_t doswap)
static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
void *__restrict__ b_,
word_t elem_bytes,
mask_t doswap)
{
word_t k;
unsigned char *a = (unsigned char *)a_;
@ -72,19 +72,19 @@ static __inline__ void
k += sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
big_register_t xor =
((unaligned_br_t *) (&a[k]))->unaligned
big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
^ ((unaligned_br_t *) (&b[k]))->unaligned;
xor &= br_mask;
((unaligned_br_t *) (&a[k]))->unaligned ^= xor;
((unaligned_br_t *) (&b[k]))->unaligned ^= xor;
((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
big_register_t xor = *((big_register_t *) (&a[k]))
^ *((big_register_t *) (&b[k]));
xor &= br_mask;
*((big_register_t *) (&a[k])) ^= xor;
*((big_register_t *) (&b[k])) ^= xor;
*((big_register_t *)(&a[k])) ^= xor;
*((big_register_t *)(&b[k])) ^= xor;
}
}
@ -92,19 +92,18 @@ static __inline__ void
for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
word_t xor =
((unaligned_word_t *) (&a[k]))->unaligned
^ ((unaligned_word_t *) (&b[k]))->unaligned;
word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
^ ((unaligned_word_t *)(&b[k]))->unaligned;
xor &= doswap;
((unaligned_word_t *) (&a[k]))->unaligned ^= xor;
((unaligned_word_t *) (&b[k]))->unaligned ^= xor;
((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
word_t xor = *((word_t *) (&a[k]))
^ *((word_t *) (&b[k]));
word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
xor &= doswap;
*((word_t *) (&a[k])) ^= xor;
*((word_t *) (&b[k])) ^= xor;
*((word_t *)(&a[k])) ^= xor;
*((word_t *)(&b[k])) ^= xor;
}
}
}
@ -112,6 +111,7 @@ static __inline__ void
if (elem_bytes % sizeof(word_t)) {
for (; k < elem_bytes; k += 1) {
unsigned char xor = a[k] ^ b[k];
xor &= doswap;
a[k] ^= xor;
b[k] ^= xor;
@ -119,23 +119,23 @@ static __inline__ void
}
}
/**
* @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
/*
* Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
*
* The table must be at least as aligned as elem_bytes. The output must be word aligned,
* and if the input size is vector aligned it must also be vector aligned.
*
* The table and output must not alias.
*/
static __inline__ void
__attribute__ ((unused, always_inline))
constant_time_lookup(void *__restrict__ out_,
static ossl_inline void constant_time_lookup(void *__restrict__ out_,
const void *table_,
word_t elem_bytes, word_t n_table, word_t idx)
word_t elem_bytes,
word_t n_table,
word_t idx)
{
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
/* Can't do pointer arithmetic on void* */
/* Can't do pointer arithmetic on void * */
unsigned char *out = (unsigned char *)out_;
const unsigned char *table = (const unsigned char *)table_;
word_t j, k;
@ -149,16 +149,15 @@ static __inline__ void
k += sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t *) (out + k))->unaligned
|=
br_mask &
((const unaligned_br_t
*)(&table[k + j * elem_bytes]))->unaligned;
((unaligned_br_t *)(out + k))->unaligned |=
br_mask
& ((const unaligned_br_t *)
(&table[k + j * elem_bytes]))->unaligned;
} else {
/* aligned */
*(big_register_t *) (out + k) |=
br_mask & *(const big_register_t
*)(&table[k + j * elem_bytes]);
*(big_register_t *)(out + k) |=
br_mask
& *(const big_register_t *)(&table[k + j * elem_bytes]);
}
}
@ -167,14 +166,15 @@ static __inline__ void
for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* input unaligned, output aligned */
*(word_t *) (out + k) |=
mask &
((const unaligned_word_t
*)(&table[k + j * elem_bytes]))->unaligned;
*(word_t *)(out + k) |=
mask
& ((const unaligned_word_t *)
(&table[k + j * elem_bytes]))->unaligned;
} else {
/* aligned */
*(word_t *) (out + k) |=
mask & *(const word_t *)(&table[k + j * elem_bytes]);
*(word_t *)(out + k) |=
mask
& *(const word_t *)(&table[k + j * elem_bytes]);
}
}
}
@ -187,8 +187,8 @@ static __inline__ void
}
}
/**
* @brief Constant-time a = mask ? bTrue : bFalse.
/*
* Constant-time a = mask ? bTrue : bFalse.
*
* The input and output must be at least as aligned as alignment_bytes
* or their size, whichever is smaller.
@ -196,12 +196,12 @@ static __inline__ void
* Note that the output is not __restrict__, but if it overlaps either
* input, it must be equal and not partially overlap.
*/
static __inline__ void
__attribute__ ((unused, always_inline))
constant_time_select(void *a_,
static ossl_inline void constant_time_select(void *a_,
const void *bFalse_,
const void *bTrue_,
word_t elem_bytes, mask_t mask, size_t alignment_bytes)
word_t elem_bytes,
mask_t mask,
size_t alignment_bytes)
{
unsigned char *a = (unsigned char *)a_;
const unsigned char *bTrue = (const unsigned char *)bTrue_;
@ -215,10 +215,10 @@ static __inline__ void
k += sizeof(big_register_t)) {
if (alignment_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t *) (&a[k]))->unaligned =
((unaligned_br_t *)(&a[k]))->unaligned =
(br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
| (~br_mask &
((const unaligned_br_t *)(&bFalse[k]))->unaligned);
| (~br_mask
& ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
} else {
/* aligned */
*(big_register_t *) (a + k) =

View file

@ -28,16 +28,13 @@
#define DECAF_WNAF_VAR_TABLE_BITS 3
static const int EDWARDS_D = -39081;
static const curve448_scalar_t precomputed_scalarmul_adjustment = { {{
SC_LIMB
(0xc873d6d54a7bb0cf),
SC_LIMB
(0xe933d8d723a70aad),
SC_LIMB
(0xbb124b65129c96fd),
SC_LIMB
(0x00000008335dc163)
}}
static const curve448_scalar_t precomputed_scalarmul_adjustment = {
{
{
SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad),
SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
}
}
};
const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
@ -69,7 +66,7 @@ extern const gf curve448_precomputed_base_as_fe[];
const curve448_precomputed_s *curve448_precomputed_base =
(const curve448_precomputed_s *)&curve448_precomputed_base_as_fe;
/** Inverse. */
/* Inverse. */
static void gf_invert(gf y, const gf x, int assert_nonzero)
{
mask_t ret;
@ -89,11 +86,11 @@ static void gf_invert(gf y, const gf x, int assert_nonzero)
const curve448_point_t curve448_point_identity =
{ {{{{0}}}, {{{1}}}, {{{1}}}, {{{0}}}} };
static void
point_double_internal(curve448_point_t p,
const curve448_point_t q, int before_double)
static void point_double_internal(curve448_point_t p, const curve448_point_t q,
int before_double)
{
gf a, b, c, d;
gf_sqr(c, q->x);
gf_sqr(a, q->y);
gf_add_nr(d, c, a); /* 2+e */
@ -136,6 +133,7 @@ static void pt_to_pniels(pniels_t b, const curve448_point_t a)
static void pniels_to_pt(curve448_point_t e, const pniels_t d)
{
gf eu;
gf_add(eu, d->n->b, d->n->a);
gf_sub(e->y, d->n->b, d->n->a);
gf_mul(e->t, e->y, eu);
@ -152,10 +150,11 @@ static void niels_to_pt(curve448_point_t e, const niels_t n)
gf_copy(e->z, ONE);
}
static void
add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
static void add_niels_to_pt(curve448_point_t d, const niels_t e,
int before_double)
{
gf a, b, c;
gf_sub_nr(b, d->y, d->x); /* 3+e */
gf_mul(a, e->a, b);
gf_add_nr(b, d->x, d->y); /* 2+e */
@ -172,8 +171,8 @@ add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
gf_mul(d->t, b, c);
}
static void
sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
static void sub_niels_from_pt(curve448_point_t d, const niels_t e,
int before_double)
{
gf a, b, c;
gf_sub_nr(b, d->y, d->x); /* 3+e */
@ -192,19 +191,21 @@ sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
gf_mul(d->t, b, c);
}
static void
add_pniels_to_pt(curve448_point_t p, const pniels_t pn, int before_double)
static void add_pniels_to_pt(curve448_point_t p, const pniels_t pn,
int before_double)
{
gf L0;
gf_mul(L0, p->z, pn->z);
gf_copy(p->z, L0);
add_niels_to_pt(p, pn->n, before_double);
}
static void
sub_pniels_from_pt(curve448_point_t p, const pniels_t pn, int before_double)
static void sub_pniels_from_pt(curve448_point_t p, const pniels_t pn,
int before_double)
{
gf L0;
gf_mul(L0, p->z, pn->z);
gf_copy(p->z, L0);
sub_niels_from_pt(p, pn->n, before_double);
@ -244,9 +245,9 @@ decaf_bool_t curve448_point_valid(const curve448_point_t p)
return mask_to_bool(out);
}
static ossl_inline void
constant_time_lookup_niels(niels_s * __restrict__ ni,
const niels_t * table, int nelts, int idx)
static ossl_inline void constant_time_lookup_niels(niels_s * __restrict__ ni,
const niels_t * table,
int nelts, int idx)
{
constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
}
@ -300,9 +301,8 @@ void curve448_precomputed_scalarmul(curve448_point_t out,
OPENSSL_cleanse(scalar1x, sizeof(scalar1x));
}
void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
enc
[DECAF_EDDSA_448_PUBLIC_BYTES],
void curve448_point_mul_by_ratio_and_encode_like_eddsa(
uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
const curve448_point_t p)
{
@ -314,6 +314,7 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
{
/* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
gf u;
gf_sqr(x, q->x);
gf_sqr(t, q->y);
gf_add(u, x, t);
@ -347,12 +348,9 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
curve448_point_destroy(q);
}
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
p,
const uint8_t
enc
[DECAF_EDDSA_448_PUBLIC_BYTES]
)
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
curve448_point_t p,
const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES])
{
uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
mask_t low;
@ -411,8 +409,7 @@ decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
const uint8_t base[X_PUBLIC_BYTES],
const uint8_t scalar[X_PRIVATE_BYTES]
)
const uint8_t scalar[X_PRIVATE_BYTES])
{
gf x1, x2, z2, x3, z3, t1, t2;
int t;
@ -487,8 +484,7 @@ decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
/* Thanks Johan Pascal */
void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
const uint8_t
ed[DECAF_EDDSA_448_PUBLIC_BYTES]
)
ed[DECAF_EDDSA_448_PUBLIC_BYTES])
{
gf y;
const uint8_t mask = (uint8_t)(0xFE << (7));
@ -527,8 +523,7 @@ void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
}
void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
const uint8_t scalar[X_PRIVATE_BYTES]
)
const uint8_t scalar[X_PRIVATE_BYTES])
{
/* Scalar conditioning */
uint8_t scalar2[X_PRIVATE_BYTES];
@ -553,17 +548,15 @@ void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
curve448_point_destroy(p);
}
/**
* @cond internal
* Control for variable-time scalar multiply algorithms.
*/
/* Control for variable-time scalar multiply algorithms. */
struct smvt_control {
int power, addend;
};
static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
* 3] */
const curve448_scalar_t scalar, unsigned int table_bits)
static int recode_wnaf(struct smvt_control *control,
/* [nbits/(table_bits + 1) + 3] */
const curve448_scalar_t scalar,
unsigned int table_bits)
{
unsigned int table_size = DECAF_448_SCALAR_BITS / (table_bits + 1) + 3;
int position = table_size - 1; /* at the end */
@ -587,16 +580,13 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
for (w = 1; w < (DECAF_448_SCALAR_BITS - 1) / 16 + 3; w++) {
if (w < (DECAF_448_SCALAR_BITS - 1) / 16 + 1) {
/* Refill the 16 high bits of current */
current +=
(uint32_t)((scalar->limb[w / B_OVER_16] >> (16 *
(w %
B_OVER_16))) <<
16);
current += (uint32_t)((scalar->limb[w / B_OVER_16]
>> (16 * (w % B_OVER_16))) << 16);
}
while (current & 0xFFFF) {
uint32_t pos = __builtin_ctz((uint32_t)current), odd =
(uint32_t)current >> pos;
uint32_t pos = __builtin_ctz((uint32_t)current);
uint32_t odd = (uint32_t)current >> pos;
int32_t delta = odd & mask;
assert(position >= 0);
@ -619,9 +609,9 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
return n - 1;
}
static void
prepare_wnaf_table(pniels_t * output,
const curve448_point_t working, unsigned int tbits)
static void prepare_wnaf_table(pniels_t * output,
const curve448_point_t working,
unsigned int tbits)
{
curve448_point_t tmp;
int i;
@ -698,12 +688,12 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
if (control_var[contv].addend > 0) {
add_pniels_to_pt(combo,
precmp_var[control_var[contv].addend >> 1], i
&& !cp);
precmp_var[control_var[contv].addend >> 1],
i && !cp);
} else {
sub_pniels_from_pt(combo,
precmp_var[(-control_var[contv].addend) >>
1], i && !cp);
precmp_var[(-control_var[contv].addend)
>> 1], i && !cp);
}
contv++;
}
@ -713,8 +703,8 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
if (control_pre[contp].addend > 0) {
add_niels_to_pt(combo,
curve448_wnaf_base[control_pre[contp].addend >>
1], i);
curve448_wnaf_base[control_pre[contp].addend
>> 1], i);
} else {
sub_niels_from_pt(combo,
curve448_wnaf_base[(-control_pre

File diff suppressed because it is too large Load diff

View file

@ -27,48 +27,59 @@ extern "C" {
* with arch_arm32.
*/
# ifndef DECAF_WORD_BITS
# if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
# define DECAF_WORD_BITS 64 /**< The number of bits in a word */
# if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) \
|| (((__UINT_FAST32_MAX__)>>30)>>30))
# define DECAF_WORD_BITS 64 /* The number of bits in a word */
# else
# define DECAF_WORD_BITS 32 /**< The number of bits in a word */
# define DECAF_WORD_BITS 32 /* The number of bits in a word */
# endif
# endif
# if DECAF_WORD_BITS == 64
typedef uint64_t decaf_word_t; /**< Word size for internal computations */
typedef int64_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */
typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */
# elif DECAF_WORD_BITS == 32 /**< The number of bits in a word */
typedef uint32_t decaf_word_t; /**< Word size for internal computations */
typedef int32_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint32_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef uint64_t decaf_dword_t; /**< Double-word size for internal computations */
typedef int64_t decaf_dsword_t; /**< Signed double-word size for internal computations */
/* Word size for internal computations */
typedef uint64_t decaf_word_t;
/* Signed word size for internal computations */
typedef int64_t decaf_sword_t;
/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef uint64_t decaf_bool_t;
/* Double-word size for internal computations */
typedef __uint128_t decaf_dword_t;
/* Signed double-word size for internal computations */
typedef __int128_t decaf_dsword_t;
# elif DECAF_WORD_BITS == 32
/* Word size for internal computations */
typedef uint32_t decaf_word_t;
/* Signed word size for internal computations */
typedef int32_t decaf_sword_t;
/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef uint32_t decaf_bool_t;
/* Double-word size for internal computations */
typedef uint64_t decaf_dword_t;
/* Signed double-word size for internal computations */
typedef int64_t decaf_dsword_t;
# else
# error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
# endif
/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
/* DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t) 1;
/** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
/* DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
static const decaf_bool_t DECAF_FALSE = 0;
/** Another boolean type used to indicate success or failure. */
/* Another boolean type used to indicate success or failure. */
typedef enum {
DECAF_SUCCESS = -1, /**< The operation succeeded. */
DECAF_FAILURE = 0 /**< The operation failed. */
} decaf_error_t;
/** Return success if x is true */
/* Return success if x is true */
static ossl_inline decaf_error_t decaf_succeed_if(decaf_bool_t x)
{
return (decaf_error_t) x;
}
/** Return DECAF_TRUE iff x == DECAF_SUCCESS */
/* Return DECAF_TRUE iff x == DECAF_SUCCESS */
static ossl_inline decaf_bool_t decaf_successful(decaf_error_t e)
{
decaf_dword_t w = ((decaf_word_t) e) ^ ((decaf_word_t) DECAF_SUCCESS);

View file

@ -19,111 +19,106 @@
extern "C" {
#endif
/** Number of bytes in an EdDSA public key. */
/* Number of bytes in an EdDSA public key. */
# define DECAF_EDDSA_448_PUBLIC_BYTES 57
/** Number of bytes in an EdDSA private key. */
/* Number of bytes in an EdDSA private key. */
# define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
/** Number of bytes in an EdDSA private key. */
# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
/* Number of bytes in an EdDSA private key. */
# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + \
DECAF_EDDSA_448_PRIVATE_BYTES)
/** Does EdDSA support non-contextual signatures? */
/* Does EdDSA support non-contextual signatures? */
# define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
/** EdDSA encoding ratio. */
/* EdDSA encoding ratio. */
# define DECAF_448_EDDSA_ENCODE_RATIO 4
/** EdDSA decoding ratio. */
/* EdDSA decoding ratio. */
# define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
/**
* @brief EdDSA key generation. This function uses a different (non-Decaf)
* encoding.
/*
* EdDSA key generation. This function uses a different (non-Decaf) encoding.
*
* @param [out] pubkey The public key.
* @param [in] privkey The private key.
* pubkey (out): The public key.
* privkey (in): The private key.
*/
decaf_error_t decaf_ed448_derive_public_key(uint8_t
pubkey
[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t
privkey
[DECAF_EDDSA_448_PRIVATE_BYTES]
);
decaf_error_t decaf_ed448_derive_public_key(
uint8_t pubkey [DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t privkey [DECAF_EDDSA_448_PRIVATE_BYTES]);
/**
* @brief EdDSA signing.
/*
* EdDSA signing.
*
* @param [out] signature The signature.
* @param [in] privkey The private key.
* @param [in] pubkey The public key.
* @param [in] message The message to sign.
* @param [in] message_len The length of the message.
* @param [in] prehashed Nonzero if the message is actually the hash of something you want to sign.
* @param [in] context A "context" for this signature of up to 255 bytes.
* @param [in] context_len Length of the context.
* signature (out): The signature.
* privkey (in): The private key.
* pubkey (in): The public key.
* message (in): The message to sign.
* message_len (in): The length of the message.
* prehashed (in): Nonzero if the message is actually the hash of something
* you want to sign.
* context (in): A "context" for this signature of up to 255 bytes.
* context_len (in): Length of the context.
*
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
* you no seat belt.
* For Ed25519, it is unsafe to use the same key for both prehashed and
* non-prehashed messages, at least without some very careful protocol-level
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
* it harder to screw this up, but this C code gives you no seat belt.
*/
decaf_error_t decaf_ed448_sign(uint8_t
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
decaf_error_t decaf_ed448_sign(
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t *message, size_t message_len,
uint8_t prehashed, const uint8_t *context,
size_t context_len)
__attribute__ ((nonnull(1, 2, 3)));
/**
* @brief EdDSA signing with prehash.
/*
* EdDSA signing with prehash.
*
* @param [out] signature The signature.
* @param [in] privkey The private key.
* @param [in] pubkey The public key.
* @param [in] hash The hash of the message. This object will not be modified by the call.
* @param [in] context A "context" for this signature of up to 255 bytes. Must be the same as what was used for the prehash.
* @param [in] context_len Length of the context.
* signature (out): The signature.
* privkey (in): The private key.
* pubkey (in): The public key.
* hash (in): The hash of the message. This object will not be modified by the
* call.
* context (in): A "context" for this signature of up to 255 bytes. Must be the
* same as what was used for the prehash.
* context_len (in): Length of the context.
*
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
* you no seat belt.
* For Ed25519, it is unsafe to use the same key for both prehashed and
* non-prehashed messages, at least without some very careful protocol-level
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
* it harder to screw this up, but this C code gives you no seat belt.
*/
decaf_error_t decaf_ed448_sign_prehash(uint8_t
signature
[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
decaf_error_t decaf_ed448_sign_prehash(
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t hash[64],
const uint8_t *context,
size_t context_len)
__attribute__ ((nonnull(1, 2, 3, 4)));
/**
* @brief EdDSA signature verification.
/*
* EdDSA signature verification.
*
* Uses the standard (i.e. less-strict) verification formula.
*
* @param [in] signature The signature.
* @param [in] pubkey The public key.
* @param [in] message The message to verify.
* @param [in] message_len The length of the message.
* @param [in] prehashed Nonzero if the message is actually the hash of something you want to verify.
* @param [in] context A "context" for this signature of up to 255 bytes.
* @param [in] context_len Length of the context.
* signature (in): The signature.
* pubkey (in): The public key.
* message (in): The message to verify.
* message_len (in): The length of the message.
* prehashed (in): Nonzero if the message is actually the hash of something you
* want to verify.
* context (in): A "context" for this signature of up to 255 bytes.
* context_len (in): Length of the context.
*
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
* you no seat belt.
* For Ed25519, it is unsafe to use the same key for both prehashed and
* non-prehashed messages, at least without some very careful protocol-level
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
* it harder to screw this up, but this C code gives you no seat belt.
*/
decaf_error_t decaf_ed448_verify(const uint8_t
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
@ -134,34 +129,34 @@ decaf_error_t decaf_ed448_verify(const uint8_t
uint8_t context_len)
__attribute__ ((nonnull(1, 2)));
/**
* @brief EdDSA signature verification.
/*
* EdDSA signature verification.
*
* Uses the standard (i.e. less-strict) verification formula.
*
* @param [in] signature The signature.
* @param [in] pubkey The public key.
* @param [in] hash The hash of the message. This object will not be modified by the call.
* @param [in] context A "context" for this signature of up to 255 bytes. Must be the same as what was used for the prehash.
* @param [in] context_len Length of the context.
* signature (in): The signature.
* pubkey (in): The public key.
* hash (in): The hash of the message. This object will not be modified by the
* call.
* context (in): A "context" for this signature of up to 255 bytes. Must be the
* same as what was used for the prehash.
* context_len (in): Length of the context.
*
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
* you no seat belt.
* For Ed25519, it is unsafe to use the same key for both prehashed and
* non-prehashed messages, at least without some very careful protocol-level
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
* it harder to screw this up, but this C code gives you no seat belt.
*/
decaf_error_t decaf_ed448_verify_prehash(const uint8_t
signature
[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
decaf_error_t decaf_ed448_verify_prehash(
const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t hash[64],
const uint8_t *context,
uint8_t context_len)
__attribute__ ((nonnull(1, 2)));
/**
* @brief EdDSA point encoding. Used internally, exposed externally.
/*
* EdDSA point encoding. Used internally, exposed externally.
* Multiplies by DECAF_448_EDDSA_ENCODE_RATIO first.
*
* The multiplication is required because the EdDSA encoding represents
@ -181,62 +176,52 @@ decaf_error_t decaf_ed448_verify_prehash(const uint8_t
* this function, you will get DECAF_448_EDDSA_ENCODE_RATIO times the
* EdDSA base point.
*
* @param [out] enc The encoded point.
* @param [in] p The point.
* enc (out): The encoded point.
* p (in): The point.
*/
void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
enc
[DECAF_EDDSA_448_PUBLIC_BYTES],
const curve448_point_t
p);
void curve448_point_mul_by_ratio_and_encode_like_eddsa(
uint8_t enc [DECAF_EDDSA_448_PUBLIC_BYTES],
const curve448_point_t p);
/**
* @brief EdDSA point decoding. Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
* and ignores cofactor information.
/*
* EdDSA point decoding. Multiplies by DECAF_448_EDDSA_DECODE_RATIO, and
* ignores cofactor information.
*
* See notes on curve448_point_mul_by_ratio_and_encode_like_eddsa
*
* @param [out] enc The encoded point.
* @param [in] p The point.
* enc (out): The encoded point.
* p (in): The point.
*/
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
p,
const uint8_t
enc
[DECAF_EDDSA_448_PUBLIC_BYTES]
);
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
curve448_point_t p,
const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]);
/**
* @brief EdDSA to ECDH public key conversion
/*
* EdDSA to ECDH public key conversion
* Deserialize the point to get y on Edwards curve,
* Convert it to u coordinate on Montgomery curve.
*
* @warning This function does not check that the public key being converted
* is a valid EdDSA public key (FUTURE?)
* This function does not check that the public key being converted is a valid
* EdDSA public key (FUTURE?)
*
* @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
* @param[in] ed The EdDSA public key(point on Edwards curve)
* x (out): The ECDH public key as in RFC7748(point on Montgomery curve)
* ed (in): The EdDSA public key(point on Edwards curve)
*/
void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
const uint8_t
ed[DECAF_EDDSA_448_PUBLIC_BYTES]
);
void decaf_ed448_convert_public_key_to_x448(
uint8_t x[DECAF_X448_PUBLIC_BYTES],
const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]);
/**
* @brief EdDSA to ECDH private key conversion
/*
* EdDSA to ECDH private key conversion
* Using the appropriate hash function, hash the EdDSA private key
* and keep only the lower bytes to get the ECDH private key
*
* @param[out] x The ECDH private key as in RFC7748
* @param[in] ed The EdDSA private key
* x (out): The ECDH private key as in RFC7748
* ed (in): The EdDSA private key
*/
decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
x
[DECAF_X448_PRIVATE_BYTES],
const uint8_t
ed
[DECAF_EDDSA_448_PRIVATE_BYTES]
);
decaf_error_t decaf_ed448_convert_private_key_to_x448(
uint8_t x[DECAF_X448_PRIVATE_BYTES],
const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]);
#ifdef __cplusplus
} /* extern "C" */

View file

@ -55,8 +55,7 @@ static decaf_error_t oneshot_hash(uint8_t *out, size_t outlen,
return DECAF_SUCCESS;
}
static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
)
static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES])
{
uint8_t hibit = (1 << 0) >> 1;
@ -106,13 +105,9 @@ static decaf_error_t hash_init_with_dom(EVP_MD_CTX *hashctx,
}
/* In this file because it uses the hash */
decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
x
[DECAF_X448_PRIVATE_BYTES],
const uint8_t
ed
[DECAF_EDDSA_448_PRIVATE_BYTES]
)
decaf_error_t decaf_ed448_convert_private_key_to_x448(
uint8_t x[DECAF_X448_PRIVATE_BYTES],
const uint8_t ed [DECAF_EDDSA_448_PRIVATE_BYTES])
{
/* pass the private key through oneshot_hash function */
/* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
@ -121,13 +116,9 @@ decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
ed, DECAF_EDDSA_448_PRIVATE_BYTES);
}
decaf_error_t decaf_ed448_derive_public_key(uint8_t
pubkey
[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t
privkey
[DECAF_EDDSA_448_PRIVATE_BYTES]
)
decaf_error_t decaf_ed448_derive_public_key(
uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES])
{
/* only this much used for keygen */
uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
@ -136,9 +127,9 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
curve448_point_t p;
if (!oneshot_hash(secret_scalar_ser, sizeof(secret_scalar_ser), privkey,
DECAF_EDDSA_448_PRIVATE_BYTES)) {
DECAF_EDDSA_448_PRIVATE_BYTES))
return DECAF_FAILURE;
}
clamp(secret_scalar_ser);
curve448_scalar_decode_long(secret_scalar, secret_scalar_ser,
@ -152,9 +143,8 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
* converted it effectively picks up a factor of 2 from the isogenies. So
* we might start at 2 instead of 1.
*/
for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1)
curve448_scalar_halve(secret_scalar, secret_scalar);
}
curve448_precomputed_scalarmul(p, curve448_precomputed_base, secret_scalar);
@ -168,12 +158,10 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
return DECAF_SUCCESS;
}
decaf_error_t decaf_ed448_sign(uint8_t
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
decaf_error_t decaf_ed448_sign(
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t *message, size_t message_len,
uint8_t prehashed, const uint8_t *context,
size_t context_len)
@ -287,9 +275,6 @@ decaf_error_t decaf_ed448_sign_prehash(uint8_t
{
return decaf_ed448_sign(signature, privkey, pubkey, hash, 64, 1, context,
context_len);
/*
* OPENSSL_cleanse(hash,sizeof(hash));
*/
}
decaf_error_t decaf_ed448_verify(const uint8_t
@ -307,15 +292,13 @@ decaf_error_t decaf_ed448_verify(const uint8_t
curve448_scalar_t response_scalar;
unsigned int c;
if (DECAF_SUCCESS != error) {
if (DECAF_SUCCESS != error)
return error;
}
error =
curve448_point_decode_like_eddsa_and_mul_by_ratio(r_point, signature);
if (DECAF_SUCCESS != error) {
if (DECAF_SUCCESS != error)
return error;
}
{
/* Compute the challenge */
@ -345,9 +328,8 @@ decaf_error_t decaf_ed448_verify(const uint8_t
&signature[DECAF_EDDSA_448_PUBLIC_BYTES],
DECAF_EDDSA_448_PRIVATE_BYTES);
for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1) {
for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1)
curve448_scalar_add(response_scalar, response_scalar, response_scalar);
}
/* pk_point = -c(x(P)) + (cx + k)G = kG */
curve448_base_double_scalarmul_non_secret(pk_point,
@ -356,19 +338,15 @@ decaf_error_t decaf_ed448_verify(const uint8_t
return decaf_succeed_if(curve448_point_eq(pk_point, r_point));
}
decaf_error_t decaf_ed448_verify_prehash(const uint8_t
signature
[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t hash[64],
const uint8_t *context,
decaf_error_t decaf_ed448_verify_prehash(
const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
const uint8_t hash[64], const uint8_t *context,
uint8_t context_len)
{
decaf_error_t ret;
ret =
decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
ret = decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
context_len);
return ret;

View file

@ -11,8 +11,8 @@
*/
#include "field.h"
static const gf MODULUS =
{ FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
static const gf MODULUS = {
FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
0xffffffffffffff, 0xffffffffffffff)
};
@ -27,9 +27,8 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
gf_copy(red, x);
gf_strong_reduce(red);
if (!with_hibit) {
if (!with_hibit)
assert(gf_hibit(red) == 0);
}
UNROLL for (i = 0; i < (with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
if (fill < 8 && j < NLIMBS) {
@ -43,7 +42,7 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
}
}
/** Return high bit of x = low bit of 2x mod p */
/* Return high bit of x = low bit of 2x mod p */
mask_t gf_hibit(const gf x)
{
gf y;
@ -52,7 +51,7 @@ mask_t gf_hibit(const gf x)
return -(y->limb[0] & 1);
}
/** Return high bit of x = low bit of 2x mod p */
/* Return high bit of x = low bit of 2x mod p */
mask_t gf_lobit(const gf x)
{
gf y;
@ -61,7 +60,7 @@ mask_t gf_lobit(const gf x)
return -(y->limb[0] & 1);
}
/** Deserialize from wire format; return -1 on success and 0 on failure. */
/* Deserialize from wire format; return -1 on success and 0 on failure. */
mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
uint8_t hi_nmask)
{
@ -93,7 +92,7 @@ mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
}
/** Reduce to canonical form. */
/* Reduce to canonical form. */
void gf_strong_reduce(gf a)
{
dsword_t scarry;
@ -135,7 +134,7 @@ void gf_strong_reduce(gf a)
assert(word_is_zero(carry + scarry_0));
}
/** Subtract two gf elements d=a-b */
/* Subtract two gf elements d=a-b */
void gf_sub(gf d, const gf a, const gf b)
{
gf_sub_RAW(d, a, b);
@ -143,14 +142,14 @@ void gf_sub(gf d, const gf a, const gf b)
gf_weak_reduce(d);
}
/** Add two field elements d = a+b */
/* Add two field elements d = a+b */
void gf_add(gf d, const gf a, const gf b)
{
gf_add_RAW(d, a, b);
gf_weak_reduce(d);
}
/** Compare a==b */
/* Compare a==b */
mask_t gf_eq(const gf a, const gf b)
{
gf c;

View file

@ -17,7 +17,7 @@
# include "f_field.h"
# include <string.h>
/** Square x, n times. */
/* Square x, n times. */
static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
{
gf tmp;
@ -38,7 +38,7 @@ static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
# define gf_add_nr gf_add_RAW
/** Subtract mod p. Bias by 2 and don't reduce */
/* Subtract mod p. Bias by 2 and don't reduce */
static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
{
gf_sub_RAW(c, a, b);
@ -47,7 +47,7 @@ static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
gf_weak_reduce(c);
}
/** Subtract mod p. Bias by amt but don't reduce. */
/* Subtract mod p. Bias by amt but don't reduce. */
static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
{
gf_sub_RAW(c, a, b);
@ -56,7 +56,7 @@ static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
gf_weak_reduce(c);
}
/** Mul by signed int. Not constant-time WRT the sign of that int. */
/* Mul by signed int. Not constant-time WRT the sign of that int. */
static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
{
if (w > 0) {
@ -67,13 +67,13 @@ static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
}
}
/** Constant time, x = is_z ? z : y */
/* Constant time, x = is_z ? z : y */
static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z)
{
constant_time_select(x, y, z, sizeof(gf), is_z, 0);
}
/** Constant time, if (neg) x=-x; */
/* Constant time, if (neg) x=-x; */
static ossl_inline void gf_cond_neg(gf x, mask_t neg)
{
gf y;
@ -81,7 +81,7 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg)
gf_cond_sel(x, x, y, neg);
}
/** Constant time, if (swap) (x,y) = (y,x); */
/* Constant time, if (swap) (x,y) = (y,x); */
static ossl_inline void gf_cond_swap(gf x, gf_s * __restrict__ y, mask_t swap)
{
constant_time_cond_swap(x, y, sizeof(gf_s), swap);

View file

@ -20,148 +20,146 @@
extern "C" {
#endif
/** @cond internal */
# define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
/** @endcond */
/** The number of bits in a scalar */
/* The number of bits in a scalar */
# define DECAF_448_SCALAR_BITS 446
/** Number of bytes in a serialized point. */
/* Number of bytes in a serialized point. */
# define DECAF_448_SER_BYTES 56
/** Number of bytes in an elligated point. For now set the same as SER_BYTES
/*
* Number of bytes in an elligated point. For now set the same as SER_BYTES
* but could be different for other curves.
*/
# define DECAF_448_HASH_BYTES 56
/** Number of bytes in a serialized scalar. */
/* Number of bytes in a serialized scalar. */
# define DECAF_448_SCALAR_BYTES 56
/** Number of bits in the "which" field of an elligator inverse */
/* Number of bits in the "which" field of an elligator inverse */
# define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
/** The cofactor the curve would have, if we hadn't removed it */
/* The cofactor the curve would have, if we hadn't removed it */
# define DECAF_448_REMOVED_COFACTOR 4
/** X448 encoding ratio. */
/* X448 encoding ratio. */
# define DECAF_X448_ENCODE_RATIO 2
/** Number of bytes in an x448 public key */
/* Number of bytes in an x448 public key */
# define DECAF_X448_PUBLIC_BYTES 56
/** Number of bytes in an x448 private key */
/* Number of bytes in an x448 private key */
# define DECAF_X448_PRIVATE_BYTES 56
/** Twisted Edwards extended homogeneous coordinates */
/* Twisted Edwards extended homogeneous coordinates */
typedef struct curve448_point_s {
/** @cond internal */
gf_448_t x, y, z, t;
/** @endcond */
} curve448_point_t[1];
/** Precomputed table based on a point. Can be trivial implementation. */
/* Precomputed table based on a point. Can be trivial implementation. */
struct curve448_precomputed_s;
/** Precomputed table based on a point. Can be trivial implementation. */
/* Precomputed table based on a point. Can be trivial implementation. */
typedef struct curve448_precomputed_s curve448_precomputed_s;
/** Scalar is stored packed, because we don't need the speed. */
/* Scalar is stored packed, because we don't need the speed. */
typedef struct curve448_scalar_s {
/** @cond internal */
decaf_word_t limb[DECAF_448_SCALAR_LIMBS];
/** @endcond */
} curve448_scalar_t[1];
/** A scalar equal to 1. */
/* A scalar equal to 1. */
extern const curve448_scalar_t curve448_scalar_one;
/** A scalar equal to 0. */
/* A scalar equal to 0. */
extern const curve448_scalar_t curve448_scalar_zero;
/** The identity point on the curve. */
/* The identity point on the curve. */
extern const curve448_point_t curve448_point_identity;
/** An arbitrarily chosen base point on the curve. */
/* An arbitrarily chosen base point on the curve. */
extern const curve448_point_t curve448_point_base;
/** Precomputed table for the base point on the curve. */
/* Precomputed table for the base point on the curve. */
extern const struct curve448_precomputed_s *curve448_precomputed_base;
/**
* @brief Read a scalar from wire format or from bytes.
/*
* Read a scalar from wire format or from bytes.
*
* @param [in] ser Serialized form of a scalar.
* @param [out] out Deserialized form.
* ser (in): Serialized form of a scalar.
* out (out): Deserialized form.
*
* @retval DECAF_SUCCESS The scalar was correctly encoded.
* @retval DECAF_FAILURE The scalar was greater than the modulus,
* and has been reduced modulo that modulus.
* Returns:
* DECAF_SUCCESS: The scalar was correctly encoded.
* DECAF_FAILURE: The scalar was greater than the modulus, and has been reduced
* modulo that modulus.
*/
__owur decaf_error_t curve448_scalar_decode(curve448_scalar_t out,
const unsigned char
ser[DECAF_448_SCALAR_BYTES]
);
__owur decaf_error_t curve448_scalar_decode(
curve448_scalar_t out,
const unsigned char ser[DECAF_448_SCALAR_BYTES]);
/**
* @brief Read a scalar from wire format or from bytes. Reduces mod
* scalar prime.
/*
* Read a scalar from wire format or from bytes. Reduces mod scalar prime.
*
* @param [in] ser Serialized form of a scalar.
* @param [in] ser_len Length of serialized form.
* @param [out] out Deserialized form.
* ser (in): Serialized form of a scalar.
* ser_len (in): Length of serialized form.
* out (out): Deserialized form.
*/
void curve448_scalar_decode_long(curve448_scalar_t out,
const unsigned char *ser, size_t ser_len);
/**
* @brief Serialize a scalar to wire format.
/*
* Serialize a scalar to wire format.
*
* @param [out] ser Serialized form of a scalar.
* @param [in] s Deserialized scalar.
* ser (out): Serialized form of a scalar.
* s (in): Deserialized scalar.
*/
void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
const curve448_scalar_t s);
/**
* @brief Add two scalars. The scalars may use the same memory.
* @param [in] a One scalar.
* @param [in] b Another scalar.
* @param [out] out a+b.
/*
* Add two scalars. The scalars may use the same memory.
*
* a (in): One scalar.
* b (in): Another scalar.
* out (out): a+b.
*/
void curve448_scalar_add(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b);
/**
* @brief Subtract two scalars. The scalars may use the same memory.
* @param [in] a One scalar.
* @param [in] b Another scalar.
* @param [out] out a-b.
/*
* Subtract two scalars. The scalars may use the same memory.
* a (in): One scalar.
* b (in): Another scalar.
* out (out): a-b.
*/
void curve448_scalar_sub(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b);
/**
* @brief Multiply two scalars. The scalars may use the same memory.
* @param [in] a One scalar.
* @param [in] b Another scalar.
* @param [out] out a*b.
/*
* Multiply two scalars. The scalars may use the same memory.
*
* a (in): One scalar.
* b (in): Another scalar.
* out (out): a*b.
*/
void curve448_scalar_mul(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b);
/**
* @brief Halve a scalar. The scalars may use the same memory.
* @param [in] a A scalar.
* @param [out] out a/2.
/*
* Halve a scalar. The scalars may use the same memory.
*
* a (in): A scalar.
* out (out): a/2.
*/
void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a);
/**
* @brief Copy a scalar. The scalars may use the same memory, in which
* case this function does nothing.
* @param [in] a A scalar.
* @param [out] out Will become a copy of a.
/*
* Copy a scalar. The scalars may use the same memory, in which case this
* function does nothing.
*
* a (in): A scalar.
* out (out): Will become a copy of a.
*/
static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
const curve448_scalar_t a)
@ -169,12 +167,12 @@ static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
*out = *a;
}
/**
* @brief Copy a point. The input and output may alias,
* in which case this function does nothing.
/*
* Copy a point. The input and output may alias, in which case this function
* does nothing.
*
* @param [out] a A copy of the point.
* @param [in] b Any point.
* a (out): A copy of the point.
* b (in): Any point.
*/
static ossl_inline void curve448_point_copy(curve448_point_t a,
const curve448_point_t b)
@ -182,47 +180,48 @@ static ossl_inline void curve448_point_copy(curve448_point_t a,
*a = *b;
}
/**
* @brief Test whether two points are equal. If yes, return
* DECAF_TRUE, else return DECAF_FALSE.
/*
* Test whether two points are equal. If yes, return DECAF_TRUE, else return
* DECAF_FALSE.
*
* @param [in] a A point.
* @param [in] b Another point.
* @retval DECAF_TRUE The points are equal.
* @retval DECAF_FALSE The points are not equal.
* a (in): A point.
* b (in): Another point.
*
* Returns:
* DECAF_TRUE: The points are equal.
* DECAF_FALSE: The points are not equal.
*/
__owur decaf_bool_t curve448_point_eq(const curve448_point_t a,
const curve448_point_t b);
/**
* @brief Double a point. Equivalent to
* curve448_point_add(two_a,a,a), but potentially faster.
/*
* Double a point. Equivalent to curve448_point_add(two_a,a,a), but potentially
* faster.
*
* @param [out] two_a The sum a+a.
* @param [in] a A point.
* two_a (out): The sum a+a.
* a (in): A point.
*/
void curve448_point_double(curve448_point_t two_a, const curve448_point_t a);
/**
* @brief RFC 7748 Diffie-Hellman scalarmul. This function uses a different
/*
* RFC 7748 Diffie-Hellman scalarmul. This function uses a different
* (non-Decaf) encoding.
*
* @param [out] scaled The scaled point base*scalar
* @param [in] base The point to be scaled.
* @param [in] scalar The scalar to multiply by.
* out (out): The scaled point base*scalar
* base (in): The point to be scaled.
* scalar (in): The scalar to multiply by.
*
* @retval DECAF_SUCCESS The scalarmul succeeded.
* @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
* point is in a small subgroup.
* Returns:
* DECAF_SUCCESS: The scalarmul succeeded.
* DECAF_FAILURE: The scalarmul didn't succeed, because the base point is in a
* small subgroup.
*/
__owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
const uint8_t base[DECAF_X448_PUBLIC_BYTES],
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
);
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);
/**
* @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
* then encode it like RFC 7748.
/*
* Multiply a point by DECAF_X448_ENCODE_RATIO, then encode it like RFC 7748.
*
* This function is mainly used internally, but is exported in case
* it will be useful.
@ -237,83 +236,73 @@ __owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
* will be DECAF_X448_ENCODE_RATIO times the X448
* base point.
*
* @param [out] out The scaled and encoded point.
* @param [in] p The point to be scaled and encoded.
* out (out): The scaled and encoded point.
* p (in): The point to be scaled and encoded.
*/
void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
out
[DECAF_X448_PUBLIC_BYTES],
void curve448_point_mul_by_ratio_and_encode_like_x448(
uint8_t out[DECAF_X448_PUBLIC_BYTES],
const curve448_point_t p);
/** The base point for X448 Diffie-Hellman */
/* The base point for X448 Diffie-Hellman */
extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES];
/**
* @brief RFC 7748 Diffie-Hellman base point scalarmul. This function uses
* a different (non-Decaf) encoding.
/*
* RFC 7748 Diffie-Hellman base point scalarmul. This function uses a different
* (non-Decaf) encoding.
*
* Does exactly the same thing as decaf_x448_generate_key,
* but has a better name.
*
* @param [out] scaled The scaled point base*scalar
* @param [in] scalar The scalar to multiply by.
* out (out): The scaled point base*scalar
* scalar (in): The scalar to multiply by.
*/
void decaf_x448_derive_public_key(uint8_t out[DECAF_X448_PUBLIC_BYTES],
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
);
void decaf_x448_derive_public_key(
uint8_t out[DECAF_X448_PUBLIC_BYTES],
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);
/**
* @brief Multiply a precomputed base point by a scalar:
* scaled = scalar*base.
* Some implementations do not include precomputed points; for
* those implementations, this function is the same as
* curve448_point_scalarmul
/*
* Multiply a precomputed base point by a scalar: out = scalar*base.
*
* @param [out] scaled The scaled point base*scalar
* @param [in] base The point to be scaled.
* @param [in] scalar The scalar to multiply by.
* scaled (out): The scaled point base*scalar
* base (in): The point to be scaled.
* scalar (in): The scalar to multiply by.
*/
void curve448_precomputed_scalarmul(curve448_point_t scaled,
const curve448_precomputed_s * base,
const curve448_scalar_t scalar);
/**
* @brief Multiply two base points by two scalars:
* scaled = scalar1*curve448_point_base + scalar2*base2.
/*
* Multiply two base points by two scalars:
* combo = scalar1*curve448_point_base + scalar2*base2.
*
* Otherwise equivalent to curve448_point_double_scalarmul, but may be
* faster at the expense of being variable time.
*
* @param [out] combo The linear combination scalar1*base + scalar2*base2.
* @param [in] scalar1 A first scalar to multiply by.
* @param [in] base2 A second point to be scaled.
* @param [in] scalar2 A second scalar to multiply by.
* combo (out): The linear combination scalar1*base + scalar2*base2.
* scalar1 (in): A first scalar to multiply by.
* base2 (in): A second point to be scaled.
* scalar2 (in) A second scalar to multiply by.
*
* @warning: This function takes variable time, and may leak the scalars
* used. It is designed for signature verification.
* Warning: This function takes variable time, and may leak the scalars used.
* It is designed for signature verification.
*/
void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
const curve448_scalar_t scalar1,
const curve448_point_t base2,
const curve448_scalar_t scalar2);
/**
* @brief Test that a point is valid, for debugging purposes.
/*
* Test that a point is valid, for debugging purposes.
*
* @param [in] to_test The point to test.
* @retval DECAF_TRUE The point is valid.
* @retval DECAF_FALSE The point is invalid.
* to_test (in): The point to test.
*
* Returns:
* DECAF_TRUE The point is valid.
* DECAF_FALSE The point is invalid.
*/
__owur decaf_bool_t curve448_point_valid(const curve448_point_t to_test);
/**
* @brief Overwrite scalar with zeros.
*/
/* Overwrite scalar with zeros. */
void curve448_scalar_destroy(curve448_scalar_t scalar);
/**
* @brief Overwrite point with zeros.
*/
/* Overwrite point with zeros. */
void curve448_point_destroy(curve448_point_t point);
#ifdef __cplusplus

View file

@ -16,31 +16,36 @@
#include "point_448.h"
static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t) 0x3bd440fae918bc5;
static const curve448_scalar_t sc_p = { {{
SC_LIMB(0x2378c292ab5844f3),
SC_LIMB(0x216cc2728dc58f55),
SC_LIMB(0xc44edb49aed63690),
SC_LIMB(0xffffffff7cca23e9),
SC_LIMB(0xffffffffffffffff),
SC_LIMB(0xffffffffffffffff),
static const curve448_scalar_t sc_p = {
{
{
SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55),
SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9),
SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff),
SC_LIMB(0x3fffffffffffffff)
}}
}, sc_r2 = { { {
}
}
}, sc_r2 = {
{
{
SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9),
SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
SC_LIMB(0x3402a939f823b729)
}}};
}
}
};
/* End of template stuff */
#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
const curve448_scalar_t curve448_scalar_one = { {{1}} }, curve448_scalar_zero = { { {
0}}};
const curve448_scalar_t curve448_scalar_one = {{{1}}};
const curve448_scalar_t curve448_scalar_zero = {{{0}}};
/** {extra,accum} - sub +? p
/*
* {extra,accum} - sub +? p
* Must have extra <= 1
*/
static void sc_subx(curve448_scalar_t out,
@ -67,8 +72,8 @@ static void sc_subx(curve448_scalar_t out,
}
}
static void sc_montmul(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b)
static void sc_montmul(curve448_scalar_t out, const curve448_scalar_t a,
const curve448_scalar_t b)
{
unsigned int i, j;
decaf_word_t accum[DECAF_448_SCALAR_LIMBS + 1] = { 0 };
@ -104,24 +109,25 @@ static void sc_montmul(curve448_scalar_t out,
sc_subx(out, accum, sc_p, sc_p, hi_carry);
}
void curve448_scalar_mul(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b)
void curve448_scalar_mul(curve448_scalar_t out, const curve448_scalar_t a,
const curve448_scalar_t b)
{
sc_montmul(out, a, b);
sc_montmul(out, out, sc_r2);
}
void curve448_scalar_sub(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b)
void curve448_scalar_sub(curve448_scalar_t out, const curve448_scalar_t a,
const curve448_scalar_t b)
{
sc_subx(out, a->limb, b, sc_p, 0);
}
void curve448_scalar_add(curve448_scalar_t out,
const curve448_scalar_t a, const curve448_scalar_t b)
void curve448_scalar_add(curve448_scalar_t out, const curve448_scalar_t a,
const curve448_scalar_t b)
{
decaf_dword_t chain = 0;
unsigned int i;
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + b->limb[i];
out->limb[i] = chain;
@ -135,27 +141,26 @@ static ossl_inline void scalar_decode_short(curve448_scalar_t s,
unsigned int nbytes)
{
unsigned int i, j, k = 0;
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++) {
for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++)
out |= ((decaf_word_t) ser[k]) << (8 * j);
}
s->limb[i] = out;
}
}
decaf_error_t curve448_scalar_decode(curve448_scalar_t s,
const unsigned char
ser[DECAF_448_SCALAR_BYTES]
)
decaf_error_t curve448_scalar_decode(
curve448_scalar_t s,
const unsigned char ser[DECAF_448_SCALAR_BYTES])
{
unsigned int i;
decaf_dsword_t accum = 0;
scalar_decode_short(s, ser, DECAF_448_SCALAR_BYTES);
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++)
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
}
/* Here accum == 0 or -1 */
curve448_scalar_mul(s, s, curve448_scalar_one); /* ham-handed reduce */
@ -209,11 +214,11 @@ void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
const curve448_scalar_t s)
{
unsigned int i, j, k = 0;
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
for (j = 0; j < sizeof(decaf_word_t); j++, k++) {
for (j = 0; j < sizeof(decaf_word_t); j++, k++)
ser[k] = s->limb[i] >> (8 * j);
}
}
}
void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a)
@ -226,8 +231,7 @@ void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a)
out->limb[i] = chain;
chain >>= DECAF_WORD_BITS;
}
for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++) {
for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++)
out->limb[i] = out->limb[i] >> 1 | out->limb[i + 1] << (WBITS - 1);
}
out->limb[i] = out->limb[i] >> 1 | chain << (WBITS - 1);
}

View file

@ -162,7 +162,6 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
static ossl_inline big_register_t br_is_zero(big_register_t x)
{
return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
// return (big_register_t)(x == br_set_to_mask(0));
}
# elif defined(__ARM_NEON__)
static ossl_inline big_register_t br_is_zero(big_register_t x)
@ -196,7 +195,7 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
*/
static ossl_inline decaf_bool_t mask_to_bool(mask_t m)
{
return (decaf_sword_t) (sword_t) m;
return (decaf_sword_t)(sword_t)m;
}
static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
@ -204,13 +203,13 @@ static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
/* On most arches this will be optimized to a simple cast. */
mask_t ret = 0;
unsigned int i;
unsigned int limit = sizeof(decaf_bool_t) / sizeof(mask_t);
if (limit < 1)
limit = 1;
for (i = 0; i < limit; i++) {
for (i = 0; i < limit; i++)
ret |= ~word_is_zero(m >> (i * 8 * sizeof(word_t)));
}
return ret;
}