Manual formatting tweaks to Curve448 code

Following running openssl-format-source there were a lot of manual tweaks that were requried. Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> (Merged from https://github.com/openssl/openssl/pull/5105)
2017-12-04 13:30:53 +00:00 · 2017-12-04 13:30:53 +00:00 · 8d55f844b0
commit 8d55f844b0
parent 205fd63881
24 changed files with 1654 additions and 2709 deletions
--- a/crypto/ec/curve448/arch_32/f_impl.c
+++ b/crypto/ec/curve448/arch_32/f_impl.c
@ -24,13 +24,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint32_t *a = as->limb, *b = bs->limb;
    uint32_t *c = cs->limb;
-
    uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
    uint32_t mask = (1 << 28) - 1;
-
    uint32_t aa[8], bb[8];
-
    int i, j;
+
    for (i = 0; i < 8; i++) {
        aa[i] = a[i] + a[i + 8];
        bb[i] = b[i] + b[i + 8];
--- a/crypto/ec/curve448/arch_32/f_impl.h
+++ b/crypto/ec/curve448/arch_32/f_impl.h
@ -39,9 +39,8 @@ void gf_bias(gf a, int amt)
    unsigned int i;
    uint32_t co1 = ((1 << 28) - 1) * amt, co2 = co1 - amt;

-    for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++) {
+    for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++)
        a->limb[i] += (i == sizeof(*a) / sizeof(a->limb[0]) / 2) ? co2 : co1;
-    }
 }

 void gf_weak_reduce(gf a)
@ -51,8 +50,7 @@ void gf_weak_reduce(gf a)
    unsigned int i;

    a->limb[8] += tmp;
-    for (i = 15; i > 0; i--) {
+    for (i = 15; i > 0; i--)
        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
-    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
@ -19,7 +19,8 @@ static __inline__ __attribute((always_inline, unused))
 uint32_t word_is_zero(uint32_t a)
 {
    uint32_t ret;
- asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
+
+    asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
    return ret;
 }

--- a/crypto/ec/curve448/arch_arm_32/f_impl.c
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.c
@ -19,9 +19,10 @@ static inline void __attribute__ ((gnu_inline, always_inline))
 #ifdef  __ARMEL__
    uint32_t lo = *acc, hi = (*acc) >> 32;

-    __asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
-                         [hi] "+&r"(hi)
-                         :[a] "r"(a),[b] "r"(b));
+    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+                          : [lo]"+&r"(lo), [hi]"+&r"(hi)
+                          : [a]"r"(a), [b]"r"(b));
+

    *acc = lo + (((uint64_t)hi) << 32);
 #else
@ -35,9 +36,11 @@ static inline void __attribute__ ((gnu_inline, always_inline))
 #ifdef __ARMEL__
    uint32_t lo = *acc, hi = (*acc) >> 32;

-    __asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
-                         [hi] "+&r"(hi)
-                         :[a] "r"(a),[b] "r"(2 * b));
+    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+                          : [lo]"+&r"(lo), [hi]"+&r"(hi)
+                          : [a]"r"(a), [b]"r"(2 * b));
+
+

    *acc = lo + (((uint64_t)hi) << 32);
 #else
@ -51,9 +54,9 @@ static inline void __attribute__ ((gnu_inline, always_inline))
 #ifdef __ARMEL__
    uint32_t lo, hi;

-    __asm__ __volatile__("smull %[lo], %[hi], %[a], %[b]":[lo] "=&r"(lo),
-                         [hi] "=&r"(hi)
-                         :[a] "r"(a),[b] "r"(b));
+    __asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
+                          : [lo]"=&r"(lo), [hi]"=&r"(hi)
+                          : [a]"r"(a), [b]"r"(b));

    *acc = lo + (((uint64_t)hi) << 32);
 #else
@ -68,8 +71,8 @@ static inline void __attribute__ ((gnu_inline, always_inline))
    uint32_t lo, hi;

    __asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
- :                           [lo] "=&r"(lo),[hi] "=&r"(hi)
- :                           [a] "r"(a),[b] "r"(2 * b));
+                              : [lo]"=&r"(lo), [hi]"=&r"(hi)
+                              : [a]"r"(a), [b]"r"(2*b));

    *acc = lo + (((uint64_t)hi) << 32);
 #else
@ -729,16 +732,14 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
 void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
 {
    uint32_t mask = (1ull << 28) - 1;
-    assert(b <= mask);
-
    const uint32_t *a = as->limb;
    uint32_t *c = cs->limb;
-
    uint64_t accum0, accum8;
-
    int i;
-
    uint32_t c0, c8, n0, n8;
+
+    assert(b <= mask);
+
    c0 = a[0];
    c8 = a[8];
    accum0 = widemul(b, c0);
--- a/crypto/ec/curve448/arch_arm_32/f_impl.h
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.h
@ -23,10 +23,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] + ((const uint32xn_t *)b)[i];
    }
-    /*
-     * for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-     * out->limb[i] = a->limb[i] + b->limb[i]; }
-     */
 }

 void gf_sub_RAW(gf out, const gf a, const gf b)
@ -35,10 +31,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
    }
-    /*
-     * for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-     * out->limb[i] = a->limb[i] - b->limb[i]; }
-     */
 }

 void gf_bias(gf a, int amt)
@ -47,6 +39,7 @@ void gf_bias(gf a, int amt)
    uint32x4_t lo = { co1, co1, co1, co1 }, hi = {
    co2, co1, co1, co1};
    uint32x4_t *aa = (uint32x4_t *) a;
+
    aa[0] += lo;
    aa[1] += lo;
    aa[2] += hi;
@ -57,6 +50,7 @@ void gf_weak_reduce(gf a)
 {
    uint64_t mask = (1ull << 28) - 1;
    uint64_t tmp = a->limb[15] >> 28;
+
    a->limb[8] += tmp;
    for (unsigned int i = 15; i > 0; i--) {
        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
--- a/crypto/ec/curve448/arch_neon/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_neon/arch_intrinsics.h
@ -19,7 +19,7 @@ static __inline__ __attribute((always_inline, unused))
 uint32_t word_is_zero(uint32_t a)
 {
    uint32_t ret;
- __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
+    __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
    return ret;
 }

--- a/crypto/ec/curve448/arch_neon/f_impl.c
+++ b/crypto/ec/curve448/arch_neon/f_impl.c
--- a/crypto/ec/curve448/arch_neon/f_impl.h
+++ b/crypto/ec/curve448/arch_neon/f_impl.h
@ -15,11 +15,13 @@
 #define USE_NEON_PERM 1
 #define LIMBHI(x) ((x##ull)>>28)
 #define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
-#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
-    {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
-      LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
-      LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
-      LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) { \
+    { \
+        LIMBLO(a), LIMBLO(e), LIMBHI(a), LIMBHI(e), LIMBLO(b), LIMBLO(f), \
+        LIMBHI(b), LIMBHI(f), LIMBLO(c), LIMBLO(g), LIMBHI(c), LIMBHI(g), \
+        LIMBLO(d), LIMBLO(h), LIMBHI(d), LIMBHI(h) \
+    } \
+}

 #define LIMB_PLACE_VALUE(i) 28

@ -37,17 +39,13 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
    }
-    /*
-     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-     * out->limb[i] = a->limb[i] - b->limb[i]; }
-     */
 }

 void gf_bias(gf a, int amt)
 {
    uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt;
-    uint32x4_t lo = { co1, co2, co1, co1 }, hi = {
-    co1, co1, co1, co1};
+    uint32x4_t lo = { co1, co2, co1, co1 };
+    uint32x4_t hi = { co1, co1, co1, co1 };
    uint32x4_t *aa = (uint32x4_t *) a;
    aa[0] += lo;
    aa[1] += hi;
@ -57,13 +55,11 @@ void gf_bias(gf a, int amt)

 void gf_weak_reduce(gf a)
 {
+    uint32x2_t *aa = (uint32x2_t *) a;
+    uint32x2_t vmask = { (1ull << 28) - 1, (1ull << 28) - 1};
+    uint32x2_t vm2 = { 0, -1}, tmp = vshr_n_u32(aa[7], 28);

-    uint32x2_t *aa = (uint32x2_t *) a, vmask = {
-    (1ull << 28) - 1, (1ull << 28) - 1}, vm2 = {
-    0, -1}, tmp = vshr_n_u32(aa[7], 28);
-
-    for (unsigned int i = 7; i >= 1; i--) {
+    for (unsigned int i = 7; i >= 1; i--)
        aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28);
-    }
    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2);
 }
--- a/crypto/ec/curve448/arch_ref64/f_impl.c
+++ b/crypto/ec/curve448/arch_ref64/f_impl.c
@ -15,13 +15,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint64_t *a = as->limb, *b = bs->limb;
    uint64_t *c = cs->limb;
-
    __uint128_t accum0 = 0, accum1 = 0, accum2;
    uint64_t mask = (1ull << 56) - 1;
-
    uint64_t aa[4], bb[4], bbb[4];
-
    unsigned int i;
+
    for (i = 0; i < 4; i++) {
        aa[i] = a[i] + a[i + 4];
        bb[i] = b[i] + b[i + 4];
@ -177,11 +175,10 @@ void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
-
    __uint128_t accum0 = 0, accum4 = 0;
    uint64_t mask = (1ull << 56) - 1;
-
    int i;
+
    for (i = 0; i < 4; i++) {
        accum0 += widemul(b, a[i]);
        accum4 += widemul(b, a[i + 4]);
@ -204,10 +201,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
-
    __uint128_t accum0 = 0, accum1 = 0, accum2;
    uint64_t mask = (1ull << 56) - 1;
-
    uint64_t aa[4];

    /* For some reason clang doesn't vectorize this without prompting? */
--- a/crypto/ec/curve448/arch_ref64/f_impl.h
+++ b/crypto/ec/curve448/arch_ref64/f_impl.h
@ -17,18 +17,17 @@

 void gf_add_RAW(gf out, const gf a, const gf b)
 {
-    for (unsigned int i = 0; i < 8; i++) {
+    for (unsigned int i = 0; i < 8; i++)
        out->limb[i] = a->limb[i] + b->limb[i];
-    }
    gf_weak_reduce(out);
 }

 void gf_sub_RAW(gf out, const gf a, const gf b)
 {
    uint64_t co1 = ((1ull << 56) - 1) * 2, co2 = co1 - 2;
-    for (unsigned int i = 0; i < 8; i++) {
+
+    for (unsigned int i = 0; i < 8; i++)
        out->limb[i] = a->limb[i] - b->limb[i] + ((i == 4) ? co2 : co1);
-    }
    gf_weak_reduce(out);
 }

@ -42,9 +41,9 @@ void gf_weak_reduce(gf a)
 {
    uint64_t mask = (1ull << 56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
+
    a->limb[4] += tmp;
-    for (unsigned int i = 7; i > 0; i--) {
+    for (unsigned int i = 7; i > 0; i--)
        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
-    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
@ -10,287 +10,324 @@
 * Originally written by Mike Hamburg
 */
 #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
-# define __ARCH_X86_64_ARCH_INTRINSICS_H__
+#define __ARCH_X86_64_ARCH_INTRINSICS_H__

-# define ARCH_WORD_BITS 64
+#define ARCH_WORD_BITS 64

-# include <openssl/e_os2.h>
+#include <openssl/e_os2.h>

 /* FUTURE: autogenerate */
 static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
 {
-    uint64_t c, d;
-# ifndef __BMI2__
-    __asm__ volatile
-     ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx");
-# endif
-    return (((__uint128_t) (d)) << 64) | c;
+  uint64_t c, d;
+
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax;"
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rdx;"
+           "mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx");
+  #endif
+  return (((__uint128_t)(d)) << 64) | c;
 }

 static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
 {
-    uint64_t c, d;
-# ifndef __BMI2__
-    __asm__ volatile
-     ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-      :[b] "m"(*b),[a] "r"(a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-      :[b] "m"(*b),[a] "d"(a));
-# endif
-    return (((__uint128_t) (d)) << 64) | c;
+  uint64_t c, d;
+
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax;"
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"r"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"d"(a));
+  #endif
+  return (((__uint128_t)(d)) << 64) | c;
 }

 static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
 {
-    uint64_t c, d;
-# ifndef __BMI2__
-    __asm__ volatile
-     ("mulq %[b];":[c] "=a"(c),[d] "=d"(d)
-      :[b] "r"(b), "a"(a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-      :[b] "r"(b),[a] "d"(a));
-# endif
-    return (((__uint128_t) (d)) << 64) | c;
+  uint64_t c, d;
+
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("mulq %[b];"
+           : [c]"=a"(c), [d]"=d"(d)
+           : [b]"r"(b), "a"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"r"(b), [a]"d"(a));
+  #endif
+  return (((__uint128_t)(d)) << 64) | c;
 }

 static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
 {
-    uint64_t c, d;
-# ifndef __BMI2__
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "addq %%rax, %%rax; " "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rdx;"
-      "leaq (,%%rdx,2), %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx");
-# endif
-    return (((__uint128_t) (d)) << 64) | c;
+  uint64_t c, d;
+
+  #ifndef __BMI2__
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b];"
+           : [c]"=&a"(c), [d]"=d"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rdx;"
+           "leaq (,%%rdx,2), %%rdx;"
+           "mulx %[b], %[c], %[d];"
+           : [c]"=r"(c), [d]"=r"(d)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx");
+  #endif
+  return (((__uint128_t)(d)) << 64) | c;
 }

-static __inline__ void mac(__uint128_t * acc, const uint64_t *a,
+static __inline__ void mac(__uint128_t *acc, const uint64_t *a,
                           const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "addq %[c], %[lo]; "
-      "adcq %[d], %[hi]; ":[c] "=&r"(c),[d] "=&r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "mulq %[b]; "
-      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rax", "rdx", "cc");
-# endif
-
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
 }

-static __inline__ void macac(__uint128_t * acc, __uint128_t * acc2,
+static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2,
                             const uint64_t *a, const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-    uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
-
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "addq %[c], %[lo]; "
-      "adcq %[d], %[hi]; "
-      "addq %[c], %[lo2]; "
-      "adcq %[d], %[hi2]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi),
-      [lo2] "+r"(lo2),[hi2] "+r"(hi2)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "mulq %[b]; "
-      "addq %%rax, %[lo]; "
-      "adcq %%rdx, %[hi]; "
-      "addq %%rax, %[lo2]; "
-      "adcq %%rdx, %[hi2]; ":[lo] "+r"(lo),[hi] "+r"(hi),[lo2] "+r"(lo2),
-      [hi2] "+r"(hi2)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rax", "rdx", "cc");
-# endif
-
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
-    *acc2 = (((__uint128_t) (hi2)) << 64) | lo2;
+  uint64_t lo = *acc, hi = *acc >> 64;
+  uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           "addq %[c], %[lo2]; "
+           "adcq %[d], %[hi2]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           "addq %%rax, %[lo2]; "
+           "adcq %%rdx, %[hi2]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
+  *acc2 = (((__uint128_t)(hi2)) << 64) | lo2;
 }

-static __inline__ void mac_rm(__uint128_t * acc, uint64_t a, const uint64_t *b)
+static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("mulx %[b], %[c], %[d]; "
-      "addq %[c], %[lo]; "
-      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "d"(a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "mulq %[b]; "
-      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "r"(a)
-      :"rax", "rdx", "cc");
-# endif
-
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"d"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"r"(a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
 }

-static __inline__ void mac_rr(__uint128_t * acc, uint64_t a, const uint64_t b)
+static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("mulx %[b], %[c], %[d]; "
-      "addq %[c], %[lo]; "
-      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "r"(b),[a] "d"(a)
-      :"cc");
-# else
-    __asm__ volatile
-     ("mulq %[b]; "
-      "addq %%rax, %[lo]; "
-      "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi), "+a"(a)
-      :[b] "r"(b)
-      :"rdx", "cc");
-# endif
-
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"r"(b), [a]"d"(a)
+           : "cc");
+  #else
+      __asm__ volatile
+          ("mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
+           : [b]"r"(b)
+           : "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
 }

-static __inline__ void mac2(__uint128_t * acc, const uint64_t *a,
+static __inline__ void mac2(__uint128_t *acc, const uint64_t *a,
                            const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "addq %%rdx, %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "addq %[c], %[lo]; "
-      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "addq %%rax, %%rax; "
-      "mulq %[b]; "
-      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rax", "rdx", "cc");
-# endif
-
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;
+  
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "addq %%rdx, %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "addq %[c], %[lo]; "
+           "adcq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b]; "
+           "addq %%rax, %[lo]; "
+           "adcq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
 }

-static __inline__ void msb(__uint128_t * acc, const uint64_t *a,
+static __inline__ void msb(__uint128_t *acc, const uint64_t *a,
                           const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "subq %[c], %[lo]; "
-      "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "mulq %[b]; "
-      "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rax", "rdx", "cc");
-# endif
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;
+
+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "subq %[c], %[lo]; "
+           "sbbq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "mulq %[b]; "
+           "subq %%rax, %[lo]; "
+           "sbbq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  *acc = (((__uint128_t)(hi)) << 64) | lo;
 }

-static __inline__ void msb2(__uint128_t * acc, const uint64_t *a,
+static __inline__ void msb2(__uint128_t *acc, const uint64_t *a,
                            const uint64_t *b)
 {
-    uint64_t lo = *acc, hi = *acc >> 64;
-# ifdef __BMI2__
-    uint64_t c, d;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "addq %%rdx, %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "subq %[c], %[lo]; "
-      "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-# else
-    __asm__ volatile
-     ("movq %[a], %%rax; "
-      "addq %%rax, %%rax; "
-      "mulq %[b]; "
-      "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rax", "rdx", "cc");
-# endif
-    *acc = (((__uint128_t) (hi)) << 64) | lo;
+  uint64_t lo = *acc, hi = *acc >> 64;

+  #ifdef __BMI2__
+      uint64_t c,d;
+      __asm__ volatile
+          ("movq %[a], %%rdx; "
+           "addq %%rdx, %%rdx; "
+           "mulx %[b], %[c], %[d]; "
+           "subq %[c], %[lo]; "
+           "sbbq %[d], %[hi]; "
+           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rdx", "cc");
+  #else
+      __asm__ volatile
+          ("movq %[a], %%rax; "
+           "addq %%rax, %%rax; "
+           "mulq %[b]; "
+           "subq %%rax, %[lo]; "
+           "sbbq %%rdx, %[hi]; "
+           : [lo]"+r"(lo), [hi]"+r"(hi)
+           : [b]"m"(*b), [a]"m"(*a)
+           : "rax", "rdx", "cc");
+  #endif
+  *acc = (((__uint128_t)(hi))<<64) | lo;
+  
 }

-static __inline__ void mrs(__uint128_t * acc, const uint64_t *a,
+static __inline__ void mrs(__uint128_t *acc, const uint64_t *a,
                           const uint64_t *b)
 {
-    uint64_t c, d, lo = *acc, hi = *acc >> 64;
-    __asm__ volatile
-     ("movq %[a], %%rdx; "
-      "mulx %[b], %[c], %[d]; "
-      "subq %[lo], %[c]; "
-      "sbbq %[hi], %[d]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-      :[b] "m"(*b),[a] "m"(*a)
-      :"rdx", "cc");
-    *acc = (((__uint128_t) (d)) << 64) | c;
+  uint64_t c,d, lo = *acc, hi = *acc >> 64;
+  __asm__ volatile
+      ("movq %[a], %%rdx; "
+       "mulx %[b], %[c], %[d]; "
+       "subq %[lo], %[c]; "
+       "sbbq %[hi], %[d]; "
+       : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+       : [b]"m"(*b), [a]"m"(*a)
+       : "rdx", "cc");
+  *acc = (((__uint128_t)(d)) << 64) | c;
 }

 static __inline__ uint64_t word_is_zero(uint64_t x)
 {
-    __asm__ volatile ("neg %0; sbb %0, %0;":"+r" (x));
-    return ~x;
+  __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
+  return ~x;
 }

 static inline uint64_t shrld(__uint128_t x, int n)
@ -298,4 +335,4 @@ static inline uint64_t shrld(__uint128_t x, int n)
    return x >> n;
 }

-#endif                          /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
+#endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_x86_64/f_impl.c
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.c
@ -16,10 +16,8 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint64_t *a = as->limb, *b = bs->limb;
    uint64_t *c = cs->limb;
-
    __uint128_t accum0 = 0, accum1 = 0, accum2;
    uint64_t mask = (1ull << 56) - 1;
-
    uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;

    /* For some reason clang doesn't vectorize this without prompting? */
@ -202,10 +200,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
-
    __uint128_t accum0 = 0, accum1 = 0, accum2;
    uint64_t mask = (1ull << 56) - 1;
-
    uint64_t aa[4] VECTOR_ALIGNED;

    /* For some reason clang doesn't vectorize this without prompting? */
--- a/crypto/ec/curve448/arch_x86_64/f_impl.h
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.h
@ -20,10 +20,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
        ((uint64xn_t *) out)[i] =
            ((const uint64xn_t *)a)[i] + ((const uint64xn_t *)b)[i];
    }
-    /*
-     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-     * out->limb[i] = a->limb[i] + b->limb[i]; }
-     */
 }

 void gf_sub_RAW(gf out, const gf a, const gf b)
@ -32,10 +28,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
        ((uint64xn_t *) out)[i] =
            ((const uint64xn_t *)a)[i] - ((const uint64xn_t *)b)[i];
    }
-    /*
-     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-     * out->limb[i] = a->limb[i] - b->limb[i]; }
-     */
 }

 void gf_bias(gf a, int amt)
@ -68,6 +60,7 @@ void gf_weak_reduce(gf a)
    /* PERF: use pshufb/palignr if anyone cares about speed of this */
    uint64_t mask = (1ull << 56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
+
    a->limb[4] += tmp;
    for (unsigned int i = 7; i > 0; i--) {
        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
--- a/crypto/ec/curve448/constant_time.h
+++ b/crypto/ec/curve448/constant_time.h
@ -36,32 +36,32 @@
 * Instead, we're putting our trust in the loop unroller and unswitcher.
 */

-/**
+/*
 * Unaligned big (vector?) register.
 */
 typedef struct {
    big_register_t unaligned;
 } __attribute__ ((packed)) unaligned_br_t;

-/**
+/*
 * Unaligned word register, for architectures where that matters.
 */
 typedef struct {
    word_t unaligned;
 } __attribute__ ((packed)) unaligned_word_t;

-/**
- * @brief Constant-time conditional swap.
+/*
+ * Constant-time conditional swap.
 *
 * If doswap, then swap elem_bytes between *a and *b.
 *
 * *a and *b must not alias.  Also, they must be at least as aligned
 * as their sizes, if the CPU cares about that sort of thing.
 */
-static __inline__ void
-    __attribute__ ((unused, always_inline))
-    constant_time_cond_swap(void *__restrict__ a_,
-                        void *__restrict__ b_, word_t elem_bytes, mask_t doswap)
+static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
+                                                void *__restrict__ b_,
+                                                word_t elem_bytes,
+                                                mask_t doswap)
 {
    word_t k;
    unsigned char *a = (unsigned char *)a_;
@ -72,19 +72,19 @@ static __inline__ void
         k += sizeof(big_register_t)) {
        if (elem_bytes % sizeof(big_register_t)) {
            /* unaligned */
-            big_register_t xor =
-                ((unaligned_br_t *) (&a[k]))->unaligned
-                ^ ((unaligned_br_t *) (&b[k]))->unaligned;
+            big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
+                                 ^ ((unaligned_br_t *) (&b[k]))->unaligned;
+
            xor &= br_mask;
-            ((unaligned_br_t *) (&a[k]))->unaligned ^= xor;
-            ((unaligned_br_t *) (&b[k]))->unaligned ^= xor;
+            ((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
+            ((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
        } else {
            /* aligned */
            big_register_t xor = *((big_register_t *) (&a[k]))
-                ^ *((big_register_t *) (&b[k]));
+                                 ^ *((big_register_t *) (&b[k]));
            xor &= br_mask;
-            *((big_register_t *) (&a[k])) ^= xor;
-            *((big_register_t *) (&b[k])) ^= xor;
+            *((big_register_t *)(&a[k])) ^= xor;
+            *((big_register_t *)(&b[k])) ^= xor;
        }
    }

@ -92,19 +92,18 @@ static __inline__ void
        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (elem_bytes % sizeof(word_t)) {
                /* unaligned */
-                word_t xor =
-                    ((unaligned_word_t *) (&a[k]))->unaligned
-                    ^ ((unaligned_word_t *) (&b[k]))->unaligned;
+                word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
+                             ^ ((unaligned_word_t *)(&b[k]))->unaligned;
+
                xor &= doswap;
-                ((unaligned_word_t *) (&a[k]))->unaligned ^= xor;
-                ((unaligned_word_t *) (&b[k]))->unaligned ^= xor;
+                ((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
+                ((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
            } else {
                /* aligned */
-                word_t xor = *((word_t *) (&a[k]))
-                    ^ *((word_t *) (&b[k]));
+                word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
                xor &= doswap;
-                *((word_t *) (&a[k])) ^= xor;
-                *((word_t *) (&b[k])) ^= xor;
+                *((word_t *)(&a[k])) ^= xor;
+                *((word_t *)(&b[k])) ^= xor;
            }
        }
    }
@ -112,6 +111,7 @@ static __inline__ void
    if (elem_bytes % sizeof(word_t)) {
        for (; k < elem_bytes; k += 1) {
            unsigned char xor = a[k] ^ b[k];
+
            xor &= doswap;
            a[k] ^= xor;
            b[k] ^= xor;
@ -119,23 +119,23 @@ static __inline__ void
    }
 }

-/**
- * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
+/*
+ * Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
 *
 * The table must be at least as aligned as elem_bytes.  The output must be word aligned,
 * and if the input size is vector aligned it must also be vector aligned.
 *
 * The table and output must not alias.
 */
-static __inline__ void
-    __attribute__ ((unused, always_inline))
-    constant_time_lookup(void *__restrict__ out_,
-                     const void *table_,
-                     word_t elem_bytes, word_t n_table, word_t idx)
+static ossl_inline void constant_time_lookup(void *__restrict__ out_,
+                                             const void *table_,
+                                             word_t elem_bytes,
+                                             word_t n_table,
+                                             word_t idx)
 {
    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);

-    /* Can't do pointer arithmetic on void* */
+    /* Can't do pointer arithmetic on void * */
    unsigned char *out = (unsigned char *)out_;
    const unsigned char *table = (const unsigned char *)table_;
    word_t j, k;
@ -149,16 +149,15 @@ static __inline__ void
             k += sizeof(big_register_t)) {
            if (elem_bytes % sizeof(big_register_t)) {
                /* unaligned */
-                ((unaligned_br_t *) (out + k))->unaligned
-                    |=
-                    br_mask &
-                    ((const unaligned_br_t
-                      *)(&table[k + j * elem_bytes]))->unaligned;
+                ((unaligned_br_t *)(out + k))->unaligned |=
+                        br_mask
+                        & ((const unaligned_br_t *)
+                           (&table[k + j * elem_bytes]))->unaligned;
            } else {
                /* aligned */
-                *(big_register_t *) (out + k) |=
-                    br_mask & *(const big_register_t
-                                *)(&table[k + j * elem_bytes]);
+                *(big_register_t *)(out + k) |=
+                        br_mask
+                        & *(const big_register_t *)(&table[k + j * elem_bytes]);
            }
        }

@ -167,14 +166,15 @@ static __inline__ void
            for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
                if (elem_bytes % sizeof(word_t)) {
                    /* input unaligned, output aligned */
-                    *(word_t *) (out + k) |=
-                        mask &
-                        ((const unaligned_word_t
-                          *)(&table[k + j * elem_bytes]))->unaligned;
+                    *(word_t *)(out + k) |=
+                            mask
+                            & ((const unaligned_word_t *)
+                               (&table[k + j * elem_bytes]))->unaligned;
                } else {
                    /* aligned */
-                    *(word_t *) (out + k) |=
-                        mask & *(const word_t *)(&table[k + j * elem_bytes]);
+                    *(word_t *)(out + k) |=
+                            mask
+                            & *(const word_t *)(&table[k + j * elem_bytes]);
                }
            }
        }
@ -187,8 +187,8 @@ static __inline__ void
    }
 }

-/**
- * @brief Constant-time a = mask ? bTrue : bFalse.
+/*
+ * Constant-time a = mask ? bTrue : bFalse.
 *
 * The input and output must be at least as aligned as alignment_bytes
 * or their size, whichever is smaller.
@ -196,12 +196,12 @@ static __inline__ void
 * Note that the output is not __restrict__, but if it overlaps either
 * input, it must be equal and not partially overlap.
 */
-static __inline__ void
-    __attribute__ ((unused, always_inline))
-    constant_time_select(void *a_,
-                     const void *bFalse_,
-                     const void *bTrue_,
-                     word_t elem_bytes, mask_t mask, size_t alignment_bytes)
+static ossl_inline void constant_time_select(void *a_,
+                                             const void *bFalse_,
+                                             const void *bTrue_,
+                                             word_t elem_bytes,
+                                             mask_t mask,
+                                             size_t alignment_bytes)
 {
    unsigned char *a = (unsigned char *)a_;
    const unsigned char *bTrue = (const unsigned char *)bTrue_;
@ -215,15 +215,15 @@ static __inline__ void
         k += sizeof(big_register_t)) {
        if (alignment_bytes % sizeof(big_register_t)) {
            /* unaligned */
-            ((unaligned_br_t *) (&a[k]))->unaligned =
-                (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
-                | (~br_mask &
-                   ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
+            ((unaligned_br_t *)(&a[k]))->unaligned =
+                    (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
+                    | (~br_mask
+                       & ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
        } else {
            /* aligned */
            *(big_register_t *) (a + k) =
-                (br_mask & *(const big_register_t *)(&bTrue[k]))
-                | (~br_mask & *(const big_register_t *)(&bFalse[k]));
+                    (br_mask & *(const big_register_t *)(&bTrue[k]))
+                    | (~br_mask & *(const big_register_t *)(&bFalse[k]));
        }
    }

--- a/crypto/ec/curve448/curve448.c
+++ b/crypto/ec/curve448/curve448.c
@ -28,16 +28,13 @@
 #define DECAF_WNAF_VAR_TABLE_BITS 3

 static const int EDWARDS_D = -39081;
-static const curve448_scalar_t precomputed_scalarmul_adjustment = { {{
-                                                                      SC_LIMB
-                                                                      (0xc873d6d54a7bb0cf),
-                                                                      SC_LIMB
-                                                                      (0xe933d8d723a70aad),
-                                                                      SC_LIMB
-                                                                      (0xbb124b65129c96fd),
-                                                                      SC_LIMB
-                                                                      (0x00000008335dc163)
-                                                                      }}
+static const curve448_scalar_t precomputed_scalarmul_adjustment = {
+    {
+        {
+            SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad),
+            SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
+        }
+    }
 };

 const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
@ -69,7 +66,7 @@ extern const gf curve448_precomputed_base_as_fe[];
 const curve448_precomputed_s *curve448_precomputed_base =
    (const curve448_precomputed_s *)&curve448_precomputed_base_as_fe;

-/** Inverse. */
+/* Inverse. */
 static void gf_invert(gf y, const gf x, int assert_nonzero)
 {
    mask_t ret;
@ -89,11 +86,11 @@ static void gf_invert(gf y, const gf x, int assert_nonzero)
 const curve448_point_t curve448_point_identity =
    { {{{{0}}}, {{{1}}}, {{{1}}}, {{{0}}}} };

-static void
-point_double_internal(curve448_point_t p,
-                      const curve448_point_t q, int before_double)
+static void point_double_internal(curve448_point_t p, const curve448_point_t q,
+                                  int before_double)
 {
    gf a, b, c, d;
+
    gf_sqr(c, q->x);
    gf_sqr(a, q->y);
    gf_add_nr(d, c, a);         /* 2+e */
@ -136,6 +133,7 @@ static void pt_to_pniels(pniels_t b, const curve448_point_t a)
 static void pniels_to_pt(curve448_point_t e, const pniels_t d)
 {
    gf eu;
+
    gf_add(eu, d->n->b, d->n->a);
    gf_sub(e->y, d->n->b, d->n->a);
    gf_mul(e->t, e->y, eu);
@ -152,10 +150,11 @@ static void niels_to_pt(curve448_point_t e, const niels_t n)
    gf_copy(e->z, ONE);
 }

-static void
-add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
+static void add_niels_to_pt(curve448_point_t d, const niels_t e,
+                            int before_double)
 {
    gf a, b, c;
+
    gf_sub_nr(b, d->y, d->x);   /* 3+e */
    gf_mul(a, e->a, b);
    gf_add_nr(b, d->x, d->y);   /* 2+e */
@ -172,8 +171,8 @@ add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
        gf_mul(d->t, b, c);
 }

-static void
-sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
+static void sub_niels_from_pt(curve448_point_t d, const niels_t e,
+                              int before_double)
 {
    gf a, b, c;
    gf_sub_nr(b, d->y, d->x);   /* 3+e */
@ -192,19 +191,21 @@ sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
        gf_mul(d->t, b, c);
 }

-static void
-add_pniels_to_pt(curve448_point_t p, const pniels_t pn, int before_double)
+static void add_pniels_to_pt(curve448_point_t p, const pniels_t pn,
+                             int before_double)
 {
    gf L0;
+
    gf_mul(L0, p->z, pn->z);
    gf_copy(p->z, L0);
    add_niels_to_pt(p, pn->n, before_double);
 }

-static void
-sub_pniels_from_pt(curve448_point_t p, const pniels_t pn, int before_double)
+static void sub_pniels_from_pt(curve448_point_t p, const pniels_t pn,
+                               int before_double)
 {
    gf L0;
+
    gf_mul(L0, p->z, pn->z);
    gf_copy(p->z, L0);
    sub_niels_from_pt(p, pn->n, before_double);
@ -244,9 +245,9 @@ decaf_bool_t curve448_point_valid(const curve448_point_t p)
    return mask_to_bool(out);
 }

-static ossl_inline void
-constant_time_lookup_niels(niels_s * __restrict__ ni,
-                           const niels_t * table, int nelts, int idx)
+static ossl_inline void constant_time_lookup_niels(niels_s * __restrict__ ni,
+                                                   const niels_t * table,
+                                                   int nelts, int idx)
 {
    constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
 }
@ -300,10 +301,9 @@ void curve448_precomputed_scalarmul(curve448_point_t out,
    OPENSSL_cleanse(scalar1x, sizeof(scalar1x));
 }

-void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
-                                                       enc
-                                                       [DECAF_EDDSA_448_PUBLIC_BYTES],
-                                                       const curve448_point_t p)
+void curve448_point_mul_by_ratio_and_encode_like_eddsa(
+                                    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                    const curve448_point_t p)
 {

    /* The point is now on the twisted curve.  Move it to untwisted. */
@ -314,6 +314,7 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
    {
        /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
        gf u;
+
        gf_sqr(x, q->x);
        gf_sqr(t, q->y);
        gf_add(u, x, t);
@ -347,12 +348,9 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
    curve448_point_destroy(q);
 }

-decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
-                                                                p,
-                                                                const uint8_t
-                                                                enc
-                                                                [DECAF_EDDSA_448_PUBLIC_BYTES]
-    )
+decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
+                                curve448_point_t p,
+                                const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES])
 {
    uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
    mask_t low;
@ -411,8 +409,7 @@ decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t

 decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
                         const uint8_t base[X_PUBLIC_BYTES],
-                         const uint8_t scalar[X_PRIVATE_BYTES]
-    )
+                         const uint8_t scalar[X_PRIVATE_BYTES])
 {
    gf x1, x2, z2, x3, z3, t1, t2;
    int t;
@ -487,8 +484,7 @@ decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
 /* Thanks Johan Pascal */
 void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
                                            const uint8_t
-                                            ed[DECAF_EDDSA_448_PUBLIC_BYTES]
-    )
+                                            ed[DECAF_EDDSA_448_PUBLIC_BYTES])
 {
    gf y;
    const uint8_t mask = (uint8_t)(0xFE << (7));
@ -527,8 +523,7 @@ void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
 }

 void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
-                                  const uint8_t scalar[X_PRIVATE_BYTES]
-    )
+                                  const uint8_t scalar[X_PRIVATE_BYTES])
 {
    /* Scalar conditioning */
    uint8_t scalar2[X_PRIVATE_BYTES];
@ -553,17 +548,15 @@ void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
    curve448_point_destroy(p);
 }

-/**
- * @cond internal
- * Control for variable-time scalar multiply algorithms.
- */
+/* Control for variable-time scalar multiply algorithms. */
 struct smvt_control {
    int power, addend;
 };

-static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
-                                                      * 3] */
-                       const curve448_scalar_t scalar, unsigned int table_bits)
+static int recode_wnaf(struct smvt_control *control,
+                       /* [nbits/(table_bits + 1) + 3] */
+                       const curve448_scalar_t scalar,
+                       unsigned int table_bits)
 {
    unsigned int table_size = DECAF_448_SCALAR_BITS / (table_bits + 1) + 3;
    int position = table_size - 1; /* at the end */
@ -587,16 +580,13 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
    for (w = 1; w < (DECAF_448_SCALAR_BITS - 1) / 16 + 3; w++) {
        if (w < (DECAF_448_SCALAR_BITS - 1) / 16 + 1) {
            /* Refill the 16 high bits of current */
-            current +=
-                (uint32_t)((scalar->limb[w / B_OVER_16] >> (16 *
-                                                            (w %
-                                                             B_OVER_16))) <<
-                           16);
+            current += (uint32_t)((scalar->limb[w / B_OVER_16]
+                       >> (16 * (w %  B_OVER_16))) << 16);
        }

        while (current & 0xFFFF) {
-            uint32_t pos = __builtin_ctz((uint32_t)current), odd =
-                (uint32_t)current >> pos;
+            uint32_t pos = __builtin_ctz((uint32_t)current);
+            uint32_t odd = (uint32_t)current >> pos;
            int32_t delta = odd & mask;

            assert(position >= 0);
@ -619,9 +609,9 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
    return n - 1;
 }

-static void
-prepare_wnaf_table(pniels_t * output,
-                   const curve448_point_t working, unsigned int tbits)
+static void prepare_wnaf_table(pniels_t * output,
+                               const curve448_point_t working,
+                               unsigned int tbits)
 {
    curve448_point_t tmp;
    int i;
@ -698,12 +688,12 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,

            if (control_var[contv].addend > 0) {
                add_pniels_to_pt(combo,
-                                 precmp_var[control_var[contv].addend >> 1], i
-                                 && !cp);
+                                 precmp_var[control_var[contv].addend >> 1],
+                                 i && !cp);
            } else {
                sub_pniels_from_pt(combo,
-                                   precmp_var[(-control_var[contv].addend) >>
-                                              1], i && !cp);
+                                   precmp_var[(-control_var[contv].addend)
+                                              >> 1], i && !cp);
            }
            contv++;
        }
@ -713,8 +703,8 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,

            if (control_pre[contp].addend > 0) {
                add_niels_to_pt(combo,
-                                curve448_wnaf_base[control_pre[contp].addend >>
-                                                   1], i);
+                                curve448_wnaf_base[control_pre[contp].addend
+                                                   >> 1], i);
            } else {
                sub_niels_from_pt(combo,
                                  curve448_wnaf_base[(-control_pre
--- a/crypto/ec/curve448/curve448_tables.c
+++ b/crypto/ec/curve448/curve448_tables.c
--- a/crypto/ec/curve448/curve448utils.h
+++ b/crypto/ec/curve448/curve448utils.h
@ -27,48 +27,59 @@ extern "C" {
 * with arch_arm32.
 */
 # ifndef DECAF_WORD_BITS
-#  if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
-#   define DECAF_WORD_BITS 64      /**< The number of bits in a word */
+#  if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) \
+      || (((__UINT_FAST32_MAX__)>>30)>>30))
+#   define DECAF_WORD_BITS 64      /* The number of bits in a word */
 #  else
-#   define DECAF_WORD_BITS 32      /**< The number of bits in a word */
+#   define DECAF_WORD_BITS 32      /* The number of bits in a word */
 #  endif
 # endif

 # if DECAF_WORD_BITS == 64
-typedef uint64_t decaf_word_t;      /**< Word size for internal computations */
-typedef int64_t decaf_sword_t;      /**< Signed word size for internal computations */
-typedef uint64_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
-typedef __uint128_t decaf_dword_t;  /**< Double-word size for internal computations */
-typedef __int128_t decaf_dsword_t;  /**< Signed double-word size for internal computations */
-# elif DECAF_WORD_BITS == 32        /**< The number of bits in a word */
-typedef uint32_t decaf_word_t;      /**< Word size for internal computations */
-typedef int32_t decaf_sword_t;      /**< Signed word size for internal computations */
-typedef uint32_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
-typedef uint64_t decaf_dword_t;     /**< Double-word size for internal computations */
-typedef int64_t decaf_dsword_t;     /**< Signed double-word size for internal computations */
+/* Word size for internal computations */
+typedef uint64_t decaf_word_t;
+/* Signed word size for internal computations */
+typedef int64_t decaf_sword_t;
+/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
+typedef uint64_t decaf_bool_t;
+/* Double-word size for internal computations */
+typedef __uint128_t decaf_dword_t;
+/* Signed double-word size for internal computations */
+typedef __int128_t decaf_dsword_t;
+# elif DECAF_WORD_BITS == 32
+/* Word size for internal computations */
+typedef uint32_t decaf_word_t;
+/* Signed word size for internal computations */
+typedef int32_t decaf_sword_t;
+/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
+typedef uint32_t decaf_bool_t;
+/* Double-word size for internal computations */
+typedef uint64_t decaf_dword_t;
+/* Signed double-word size for internal computations */
+typedef int64_t decaf_dsword_t;
 # else
 #  error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
 # endif

-/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
+/* DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
 static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t) 1;

-/** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
+/* DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
 static const decaf_bool_t DECAF_FALSE = 0;

-/** Another boolean type used to indicate success or failure. */
+/* Another boolean type used to indicate success or failure. */
 typedef enum {
    DECAF_SUCCESS = -1, /**< The operation succeeded. */
    DECAF_FAILURE = 0   /**< The operation failed. */
 } decaf_error_t;

-/** Return success if x is true */
+/* Return success if x is true */
 static ossl_inline decaf_error_t decaf_succeed_if(decaf_bool_t x)
 {
    return (decaf_error_t) x;
 }

-/** Return DECAF_TRUE iff x == DECAF_SUCCESS */
+/* Return DECAF_TRUE iff x == DECAF_SUCCESS */
 static ossl_inline decaf_bool_t decaf_successful(decaf_error_t e)
 {
    decaf_dword_t w = ((decaf_word_t) e) ^ ((decaf_word_t) DECAF_SUCCESS);
--- a/crypto/ec/curve448/ed448.h
+++ b/crypto/ec/curve448/ed448.h
@ -19,111 +19,106 @@
 extern "C" {
 #endif

-/** Number of bytes in an EdDSA public key. */
+/* Number of bytes in an EdDSA public key. */
 # define DECAF_EDDSA_448_PUBLIC_BYTES 57

-/** Number of bytes in an EdDSA private key. */
+/* Number of bytes in an EdDSA private key. */
 # define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES

-/** Number of bytes in an EdDSA private key. */
-# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
+/* Number of bytes in an EdDSA private key. */
+# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + \
+                                          DECAF_EDDSA_448_PRIVATE_BYTES)

-/** Does EdDSA support non-contextual signatures? */
+/* Does EdDSA support non-contextual signatures? */
 # define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0

-/** EdDSA encoding ratio. */
+/* EdDSA encoding ratio. */
 # define DECAF_448_EDDSA_ENCODE_RATIO 4

-/** EdDSA decoding ratio. */
+/* EdDSA decoding ratio. */
 # define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)

-/**
- * @brief EdDSA key generation.  This function uses a different (non-Decaf)
- * encoding.
+/*
+ * EdDSA key generation.  This function uses a different (non-Decaf) encoding.
 *
- * @param [out] pubkey The public key.
- * @param [in] privkey The private key.
+ * pubkey (out): The public key.
+ * privkey (in): The private key.
 */
-decaf_error_t decaf_ed448_derive_public_key(uint8_t
-                                            pubkey
-                                            [DECAF_EDDSA_448_PUBLIC_BYTES],
-                                            const uint8_t
-                                            privkey
-                                            [DECAF_EDDSA_448_PRIVATE_BYTES]
-    );
+decaf_error_t decaf_ed448_derive_public_key(
+                        uint8_t pubkey [DECAF_EDDSA_448_PUBLIC_BYTES],
+                        const uint8_t privkey [DECAF_EDDSA_448_PRIVATE_BYTES]);

-/**
- * @brief EdDSA signing.
+/*
+ * EdDSA signing.
 *
- * @param [out] signature The signature.
- * @param [in] privkey The private key.
- * @param [in] pubkey The public key.
- * @param [in] message The message to sign.
- * @param [in] message_len The length of the message.
- * @param [in] prehashed Nonzero if the message is actually the hash of something you want to sign.
- * @param [in] context A "context" for this signature of up to 255 bytes.
- * @param [in] context_len Length of the context.
+ * signature (out): The signature.
+ * privkey (in): The private key.
+ * pubkey (in):  The public key.
+ * message (in):  The message to sign.
+ * message_len (in):  The length of the message.
+ * prehashed (in):  Nonzero if the message is actually the hash of something
+ *                  you want to sign.
+ * context (in):  A "context" for this signature of up to 255 bytes.
+ * context_len (in):  Length of the context.
 *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
+ * For Ed25519, it is unsafe to use the same key for both prehashed and
+ * non-prehashed messages, at least without some very careful protocol-level
+ * disambiguation.  For Ed448 it is safe.  The C++ wrapper is designed to make
+ * it harder to screw this up, but this C code gives you no seat belt.
 */
-decaf_error_t decaf_ed448_sign(uint8_t
-                               signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-                               const uint8_t
-                               privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-                               const uint8_t
-                               pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-                               const uint8_t *message, size_t message_len,
-                               uint8_t prehashed, const uint8_t *context,
-                               size_t context_len)
-    __attribute__ ((nonnull(1, 2, 3)));
+decaf_error_t decaf_ed448_sign(
+                        uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                        const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                        const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                        const uint8_t *message, size_t message_len,
+                        uint8_t prehashed, const uint8_t *context,
+                        size_t context_len)
+                        __attribute__ ((nonnull(1, 2, 3)));

-/**
- * @brief EdDSA signing with prehash.
+/*
+ * EdDSA signing with prehash.
 *
- * @param [out] signature The signature.
- * @param [in] privkey The private key.
- * @param [in] pubkey The public key.
- * @param [in] hash The hash of the message.  This object will not be modified by the call.
- * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
- * @param [in] context_len Length of the context.
+ * signature (out): The signature.
+ * privkey (in): The private key.
+ * pubkey (in): The public key.
+ * hash (in): The hash of the message.  This object will not be modified by the
+ *            call.
+ * context (in): A "context" for this signature of up to 255 bytes.  Must be the
+ *               same as what was used for the prehash.
+ * context_len (in): Length of the context.
 *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
+ * For Ed25519, it is unsafe to use the same key for both prehashed and
+ * non-prehashed messages, at least without some very careful protocol-level
+ * disambiguation.  For Ed448 it is safe.  The C++ wrapper is designed to make
+ * it harder to screw this up, but this C code gives you no seat belt.
 */
-decaf_error_t decaf_ed448_sign_prehash(uint8_t
-                                       signature
-                                       [DECAF_EDDSA_448_SIGNATURE_BYTES],
-                                       const uint8_t
-                                       privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-                                       const uint8_t
-                                       pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-                                       const uint8_t hash[64],
-                                       const uint8_t *context,
-                                       size_t context_len)
-    __attribute__ ((nonnull(1, 2, 3, 4)));
+decaf_error_t decaf_ed448_sign_prehash(
+                        uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                        const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                        const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                        const uint8_t hash[64],
+                        const uint8_t *context,
+                        size_t context_len)
+                        __attribute__ ((nonnull(1, 2, 3, 4)));

-/**
- * @brief EdDSA signature verification.
+/*
+ * EdDSA signature verification.
 *
 * Uses the standard (i.e. less-strict) verification formula.
 *
- * @param [in] signature The signature.
- * @param [in] pubkey The public key.
- * @param [in] message The message to verify.
- * @param [in] message_len The length of the message.
- * @param [in] prehashed Nonzero if the message is actually the hash of something you want to verify.
- * @param [in] context A "context" for this signature of up to 255 bytes.
- * @param [in] context_len Length of the context.
+ * signature (in): The signature.
+ * pubkey (in): The public key.
+ * message (in): The message to verify.
+ * message_len (in): The length of the message.
+ * prehashed (in): Nonzero if the message is actually the hash of something you
+ *                 want to verify.
+ * context (in): A "context" for this signature of up to 255 bytes.
+ * context_len (in): Length of the context.
 *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
+ * For Ed25519, it is unsafe to use the same key for both prehashed and
+ * non-prehashed messages, at least without some very careful protocol-level
+ * disambiguation.  For Ed448 it is safe.  The C++ wrapper is designed to make
+ * it harder to screw this up, but this C code gives you no seat belt.
 */
 decaf_error_t decaf_ed448_verify(const uint8_t
                                 signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
@ -132,36 +127,36 @@ decaf_error_t decaf_ed448_verify(const uint8_t
                                 const uint8_t *message, size_t message_len,
                                 uint8_t prehashed, const uint8_t *context,
                                 uint8_t context_len)
-    __attribute__ ((nonnull(1, 2)));
+                                 __attribute__ ((nonnull(1, 2)));

-/**
- * @brief EdDSA signature verification.
+/*
+ * EdDSA signature verification.
 *
 * Uses the standard (i.e. less-strict) verification formula.
 *
- * @param [in] signature The signature.
- * @param [in] pubkey The public key.
- * @param [in] hash The hash of the message.  This object will not be modified by the call.
- * @param [in] context A "context" for this signature of up to 255 bytes.  Must be the same as what was used for the prehash.
- * @param [in] context_len Length of the context.
+ * signature (in): The signature.
+ * pubkey (in): The public key.
+ * hash (in): The hash of the message.  This object will not be modified by the
+ *            call.
+ * context (in): A "context" for this signature of up to 255 bytes.  Must be the
+ *               same as what was used for the prehash.
+ * context_len (in): Length of the context.
 *
- * @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
- * messages, at least without some very careful protocol-level disambiguation.  For Ed448 it is
- * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
- * you no seat belt.
+ * For Ed25519, it is unsafe to use the same key for both prehashed and
+ * non-prehashed messages, at least without some very careful protocol-level
+ * disambiguation.  For Ed448 it is safe.  The C++ wrapper is designed to make
+ * it harder to screw this up, but this C code gives you no seat belt.
 */
-decaf_error_t decaf_ed448_verify_prehash(const uint8_t
-                                         signature
-                                         [DECAF_EDDSA_448_SIGNATURE_BYTES],
-                                         const uint8_t
-                                         pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-                                         const uint8_t hash[64],
-                                         const uint8_t *context,
-                                         uint8_t context_len)
-    __attribute__ ((nonnull(1, 2)));
+decaf_error_t decaf_ed448_verify_prehash(
+                    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                    const uint8_t hash[64],
+                    const uint8_t *context,
+                    uint8_t context_len)
+                    __attribute__ ((nonnull(1, 2)));

-/**
- * @brief EdDSA point encoding.  Used internally, exposed externally.
+/*
+ * EdDSA point encoding.  Used internally, exposed externally.
 * Multiplies by DECAF_448_EDDSA_ENCODE_RATIO first.
 *
 * The multiplication is required because the EdDSA encoding represents
@ -181,62 +176,52 @@ decaf_error_t decaf_ed448_verify_prehash(const uint8_t
 * this function, you will get DECAF_448_EDDSA_ENCODE_RATIO times the
 * EdDSA base point.
 *
- * @param [out] enc The encoded point.
- * @param [in] p The point.
+ * enc (out): The encoded point.
+ * p (in): The point.
 */
-void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
-                                                       enc
-                                                       [DECAF_EDDSA_448_PUBLIC_BYTES],
-                                                       const curve448_point_t
-                                                       p);
+void curve448_point_mul_by_ratio_and_encode_like_eddsa(
+                                    uint8_t enc [DECAF_EDDSA_448_PUBLIC_BYTES],
+                                    const curve448_point_t p);

-/**
- * @brief EdDSA point decoding.  Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
- * and ignores cofactor information.
+/*
+ * EdDSA point decoding.  Multiplies by DECAF_448_EDDSA_DECODE_RATIO, and
+ * ignores cofactor information.
 *
 * See notes on curve448_point_mul_by_ratio_and_encode_like_eddsa
 *
- * @param [out] enc The encoded point.
- * @param [in] p The point.
+ * enc (out): The encoded point.
+ * p (in): The point.
 */
-decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
-                                                                p,
-                                                                const uint8_t
-                                                                enc
-                                                                [DECAF_EDDSA_448_PUBLIC_BYTES]
-    );
+decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
+                            curve448_point_t p,
+                            const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]);

-/**
- * @brief EdDSA to ECDH public key conversion
+/*
+ * EdDSA to ECDH public key conversion
 * Deserialize the point to get y on Edwards curve,
 * Convert it to u coordinate on Montgomery curve.
 *
- * @warning This function does not check that the public key being converted
- * is a valid EdDSA public key (FUTURE?)
+ * This function does not check that the public key being converted is a valid
+ * EdDSA public key (FUTURE?)
 *
- * @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
- * @param[in] ed The EdDSA public key(point on Edwards curve)
+ * x (out): The ECDH public key as in RFC7748(point on Montgomery curve)
+ * ed (in): The EdDSA public key(point on Edwards curve)
 */
-void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
-                                            const uint8_t
-                                            ed[DECAF_EDDSA_448_PUBLIC_BYTES]
-    );
+void decaf_ed448_convert_public_key_to_x448(
+                                uint8_t x[DECAF_X448_PUBLIC_BYTES],
+                                const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]);

-/**
- * @brief EdDSA to ECDH private key conversion
+/*
+ * EdDSA to ECDH private key conversion
 * Using the appropriate hash function, hash the EdDSA private key
 * and keep only the lower bytes to get the ECDH private key
 *
- * @param[out] x The ECDH private key as in RFC7748
- * @param[in] ed The EdDSA private key
+ * x (out): The ECDH private key as in RFC7748
+ * ed (in): The EdDSA private key
 */
-decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
-                                                      x
-                                                      [DECAF_X448_PRIVATE_BYTES],
-                                                      const uint8_t
-                                                      ed
-                                                      [DECAF_EDDSA_448_PRIVATE_BYTES]
-    );
+decaf_error_t decaf_ed448_convert_private_key_to_x448(
+                            uint8_t x[DECAF_X448_PRIVATE_BYTES],
+                            const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]);

 #ifdef __cplusplus
 } /* extern "C" */
--- a/crypto/ec/curve448/eddsa.c
+++ b/crypto/ec/curve448/eddsa.c
@ -55,8 +55,7 @@ static decaf_error_t oneshot_hash(uint8_t *out, size_t outlen,
    return DECAF_SUCCESS;
 }

-static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
-    )
+static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES])
 {
    uint8_t hibit = (1 << 0) >> 1;

@ -106,13 +105,9 @@ static decaf_error_t hash_init_with_dom(EVP_MD_CTX *hashctx,
 }

 /* In this file because it uses the hash */
-decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
-                                                      x
-                                                      [DECAF_X448_PRIVATE_BYTES],
-                                                      const uint8_t
-                                                      ed
-                                                      [DECAF_EDDSA_448_PRIVATE_BYTES]
-    )
+decaf_error_t decaf_ed448_convert_private_key_to_x448(
+                            uint8_t x[DECAF_X448_PRIVATE_BYTES],
+                            const uint8_t ed [DECAF_EDDSA_448_PRIVATE_BYTES])
 {
    /* pass the private key through oneshot_hash function */
    /* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
@ -121,13 +116,9 @@ decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
                        ed, DECAF_EDDSA_448_PRIVATE_BYTES);
 }

-decaf_error_t decaf_ed448_derive_public_key(uint8_t
-                                            pubkey
-                                            [DECAF_EDDSA_448_PUBLIC_BYTES],
-                                            const uint8_t
-                                            privkey
-                                            [DECAF_EDDSA_448_PRIVATE_BYTES]
-    )
+decaf_error_t decaf_ed448_derive_public_key(
+                        uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                        const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES])
 {
    /* only this much used for keygen */
    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
@ -136,9 +127,9 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
    curve448_point_t p;

    if (!oneshot_hash(secret_scalar_ser, sizeof(secret_scalar_ser), privkey,
-                      DECAF_EDDSA_448_PRIVATE_BYTES)) {
+                      DECAF_EDDSA_448_PRIVATE_BYTES))
        return DECAF_FAILURE;
-    }
+
    clamp(secret_scalar_ser);

    curve448_scalar_decode_long(secret_scalar, secret_scalar_ser,
@ -152,9 +143,8 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
     * converted it effectively picks up a factor of 2 from the isogenies.  So
     * we might start at 2 instead of 1.
     */
-    for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
+    for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1)
        curve448_scalar_halve(secret_scalar, secret_scalar);
-    }

    curve448_precomputed_scalarmul(p, curve448_precomputed_base, secret_scalar);

@ -168,15 +158,13 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
    return DECAF_SUCCESS;
 }

-decaf_error_t decaf_ed448_sign(uint8_t
-                               signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-                               const uint8_t
-                               privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-                               const uint8_t
-                               pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-                               const uint8_t *message, size_t message_len,
-                               uint8_t prehashed, const uint8_t *context,
-                               size_t context_len)
+decaf_error_t decaf_ed448_sign(
+                        uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                        const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                        const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                        const uint8_t *message, size_t message_len,
+                        uint8_t prehashed, const uint8_t *context,
+                        size_t context_len)
 {
    curve448_scalar_t secret_scalar;
    EVP_MD_CTX *hashctx = EVP_MD_CTX_new();
@ -287,9 +275,6 @@ decaf_error_t decaf_ed448_sign_prehash(uint8_t
 {
    return decaf_ed448_sign(signature, privkey, pubkey, hash, 64, 1, context,
                            context_len);
-    /*
-     * OPENSSL_cleanse(hash,sizeof(hash));
-     */
 }

 decaf_error_t decaf_ed448_verify(const uint8_t
@ -307,15 +292,13 @@ decaf_error_t decaf_ed448_verify(const uint8_t
    curve448_scalar_t response_scalar;
    unsigned int c;

-    if (DECAF_SUCCESS != error) {
+    if (DECAF_SUCCESS != error)
        return error;
-    }

    error =
        curve448_point_decode_like_eddsa_and_mul_by_ratio(r_point, signature);
-    if (DECAF_SUCCESS != error) {
+    if (DECAF_SUCCESS != error)
        return error;
-    }

    {
        /* Compute the challenge */
@ -345,9 +328,8 @@ decaf_error_t decaf_ed448_verify(const uint8_t
                                &signature[DECAF_EDDSA_448_PUBLIC_BYTES],
                                DECAF_EDDSA_448_PRIVATE_BYTES);

-    for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1) {
+    for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1)
        curve448_scalar_add(response_scalar, response_scalar, response_scalar);
-    }

    /* pk_point = -c(x(P)) + (cx + k)G = kG */
    curve448_base_double_scalarmul_non_secret(pk_point,
@ -356,20 +338,16 @@ decaf_error_t decaf_ed448_verify(const uint8_t
    return decaf_succeed_if(curve448_point_eq(pk_point, r_point));
 }

-decaf_error_t decaf_ed448_verify_prehash(const uint8_t
-                                         signature
-                                         [DECAF_EDDSA_448_SIGNATURE_BYTES],
-                                         const uint8_t
-                                         pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-                                         const uint8_t hash[64],
-                                         const uint8_t *context,
-                                         uint8_t context_len)
+decaf_error_t decaf_ed448_verify_prehash(
+                    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                    const uint8_t hash[64], const uint8_t *context,
+                    uint8_t context_len)
 {
    decaf_error_t ret;

-    ret =
-        decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
-                           context_len);
+    ret = decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
+                             context_len);

    return ret;
 }
--- a/crypto/ec/curve448/f_generic.c
+++ b/crypto/ec/curve448/f_generic.c
@ -11,10 +11,10 @@
 */
 #include "field.h"

-static const gf MODULUS =
-    { FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
-                    0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
-                    0xffffffffffffff, 0xffffffffffffff)
+static const gf MODULUS = {
+    FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
+                  0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
+                  0xffffffffffffff, 0xffffffffffffff)
 };

 /** Serialize to wire format. */
@ -27,9 +27,8 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)

    gf_copy(red, x);
    gf_strong_reduce(red);
-    if (!with_hibit) {
+    if (!with_hibit)
        assert(gf_hibit(red) == 0);
-    }

    UNROLL for (i = 0; i < (with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
        if (fill < 8 && j < NLIMBS) {
@ -43,7 +42,7 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
    }
 }

-/** Return high bit of x = low bit of 2x mod p */
+/* Return high bit of x = low bit of 2x mod p */
 mask_t gf_hibit(const gf x)
 {
    gf y;
@ -52,7 +51,7 @@ mask_t gf_hibit(const gf x)
    return -(y->limb[0] & 1);
 }

-/** Return high bit of x = low bit of 2x mod p */
+/* Return high bit of x = low bit of 2x mod p */
 mask_t gf_lobit(const gf x)
 {
    gf y;
@ -61,7 +60,7 @@ mask_t gf_lobit(const gf x)
    return -(y->limb[0] & 1);
 }

-/** Deserialize from wire format; return -1 on success and 0 on failure. */
+/* Deserialize from wire format; return -1 on success and 0 on failure. */
 mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
                      uint8_t hi_nmask)
 {
@ -93,7 +92,7 @@ mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
    return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
 }

-/** Reduce to canonical form. */
+/* Reduce to canonical form. */
 void gf_strong_reduce(gf a)
 {
    dsword_t scarry;
@ -135,7 +134,7 @@ void gf_strong_reduce(gf a)
    assert(word_is_zero(carry + scarry_0));
 }

-/** Subtract two gf elements d=a-b */
+/* Subtract two gf elements d=a-b */
 void gf_sub(gf d, const gf a, const gf b)
 {
    gf_sub_RAW(d, a, b);
@ -143,14 +142,14 @@ void gf_sub(gf d, const gf a, const gf b)
    gf_weak_reduce(d);
 }

-/** Add two field elements d = a+b */
+/* Add two field elements d = a+b */
 void gf_add(gf d, const gf a, const gf b)
 {
    gf_add_RAW(d, a, b);
    gf_weak_reduce(d);
 }

-/** Compare a==b */
+/* Compare a==b */
 mask_t gf_eq(const gf a, const gf b)
 {
    gf c;
--- a/crypto/ec/curve448/field.h
+++ b/crypto/ec/curve448/field.h
@ -17,7 +17,7 @@
 # include "f_field.h"
 # include <string.h>

-/** Square x, n times. */
+/* Square x, n times. */
 static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
 {
    gf tmp;
@ -38,7 +38,7 @@ static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)

 # define gf_add_nr gf_add_RAW

-/** Subtract mod p.  Bias by 2 and don't reduce  */
+/* Subtract mod p.  Bias by 2 and don't reduce  */
 static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
 {
    gf_sub_RAW(c, a, b);
@ -47,7 +47,7 @@ static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
        gf_weak_reduce(c);
 }

-/** Subtract mod p. Bias by amt but don't reduce.  */
+/* Subtract mod p. Bias by amt but don't reduce.  */
 static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
 {
    gf_sub_RAW(c, a, b);
@ -56,7 +56,7 @@ static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
        gf_weak_reduce(c);
 }

-/** Mul by signed int.  Not constant-time WRT the sign of that int. */
+/* Mul by signed int.  Not constant-time WRT the sign of that int. */
 static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
 {
    if (w > 0) {
@ -67,13 +67,13 @@ static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
    }
 }

-/** Constant time, x = is_z ? z : y */
+/* Constant time, x = is_z ? z : y */
 static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z)
 {
    constant_time_select(x, y, z, sizeof(gf), is_z, 0);
 }

-/** Constant time, if (neg) x=-x; */
+/* Constant time, if (neg) x=-x; */
 static ossl_inline void gf_cond_neg(gf x, mask_t neg)
 {
    gf y;
@ -81,7 +81,7 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg)
    gf_cond_sel(x, x, y, neg);
 }

-/** Constant time, if (swap) (x,y) = (y,x); */
+/* Constant time, if (swap) (x,y) = (y,x); */
 static ossl_inline void gf_cond_swap(gf x, gf_s * __restrict__ y, mask_t swap)
 {
    constant_time_cond_swap(x, y, sizeof(gf_s), swap);
--- a/crypto/ec/curve448/point_448.h
+++ b/crypto/ec/curve448/point_448.h
@ -20,148 +20,146 @@
 extern "C" {
 #endif

-/** @cond internal */
 # define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
-/** @endcond */

-/** The number of bits in a scalar */
+/* The number of bits in a scalar */
 # define DECAF_448_SCALAR_BITS 446

-/** Number of bytes in a serialized point. */
+/* Number of bytes in a serialized point. */
 # define DECAF_448_SER_BYTES 56

-/** Number of bytes in an elligated point.  For now set the same as SER_BYTES
+/*
+ * Number of bytes in an elligated point.  For now set the same as SER_BYTES
 * but could be different for other curves.
 */
 # define DECAF_448_HASH_BYTES 56

-/** Number of bytes in a serialized scalar. */
+/* Number of bytes in a serialized scalar. */
 # define DECAF_448_SCALAR_BYTES 56

-/** Number of bits in the "which" field of an elligator inverse */
+/* Number of bits in the "which" field of an elligator inverse */
 # define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3

-/** The cofactor the curve would have, if we hadn't removed it */
+/* The cofactor the curve would have, if we hadn't removed it */
 # define DECAF_448_REMOVED_COFACTOR 4

-/** X448 encoding ratio. */
+/* X448 encoding ratio. */
 # define DECAF_X448_ENCODE_RATIO 2

-/** Number of bytes in an x448 public key */
+/* Number of bytes in an x448 public key */
 # define DECAF_X448_PUBLIC_BYTES 56

-/** Number of bytes in an x448 private key */
+/* Number of bytes in an x448 private key */
 # define DECAF_X448_PRIVATE_BYTES 56

-/** Twisted Edwards extended homogeneous coordinates */
+/* Twisted Edwards extended homogeneous coordinates */
 typedef struct curve448_point_s {
-    /** @cond internal */
    gf_448_t x, y, z, t;
-    /** @endcond */
 } curve448_point_t[1];

-/** Precomputed table based on a point.  Can be trivial implementation. */
+/* Precomputed table based on a point.  Can be trivial implementation. */
 struct curve448_precomputed_s;

-/** Precomputed table based on a point.  Can be trivial implementation. */
+/* Precomputed table based on a point.  Can be trivial implementation. */
 typedef struct curve448_precomputed_s curve448_precomputed_s;

-/** Scalar is stored packed, because we don't need the speed. */
+/* Scalar is stored packed, because we don't need the speed. */
 typedef struct curve448_scalar_s {
-    /** @cond internal */
    decaf_word_t limb[DECAF_448_SCALAR_LIMBS];
-    /** @endcond */
 } curve448_scalar_t[1];

-/** A scalar equal to 1. */
+/* A scalar equal to 1. */
 extern const curve448_scalar_t curve448_scalar_one;

-/** A scalar equal to 0. */
+/* A scalar equal to 0. */
 extern const curve448_scalar_t curve448_scalar_zero;

-/** The identity point on the curve. */
+/* The identity point on the curve. */
 extern const curve448_point_t curve448_point_identity;

-/** An arbitrarily chosen base point on the curve. */
+/* An arbitrarily chosen base point on the curve. */
 extern const curve448_point_t curve448_point_base;

-/** Precomputed table for the base point on the curve. */
+/* Precomputed table for the base point on the curve. */
 extern const struct curve448_precomputed_s *curve448_precomputed_base;

-/**
- * @brief Read a scalar from wire format or from bytes.
+/*
+ * Read a scalar from wire format or from bytes.
 *
- * @param [in] ser Serialized form of a scalar.
- * @param [out] out Deserialized form.
+ * ser (in): Serialized form of a scalar.
+ * out (out): Deserialized form.
 *
- * @retval DECAF_SUCCESS The scalar was correctly encoded.
- * @retval DECAF_FAILURE The scalar was greater than the modulus,
- * and has been reduced modulo that modulus.
+ * Returns:
+ * DECAF_SUCCESS: The scalar was correctly encoded.
+ * DECAF_FAILURE: The scalar was greater than the modulus, and has been reduced
+ * modulo that modulus.
 */
-__owur decaf_error_t curve448_scalar_decode(curve448_scalar_t out,
-                                            const unsigned char
-                                            ser[DECAF_448_SCALAR_BYTES]
-    );
+__owur decaf_error_t curve448_scalar_decode(
+                            curve448_scalar_t out,
+                            const unsigned char ser[DECAF_448_SCALAR_BYTES]);

-/**
- * @brief Read a scalar from wire format or from bytes.  Reduces mod
- * scalar prime.
+/*
+ * Read a scalar from wire format or from bytes.  Reduces mod scalar prime.
 *
- * @param [in] ser Serialized form of a scalar.
- * @param [in] ser_len Length of serialized form.
- * @param [out] out Deserialized form.
+ * ser (in): Serialized form of a scalar.
+ * ser_len (in): Length of serialized form.
+ * out (out): Deserialized form.
 */
 void curve448_scalar_decode_long(curve448_scalar_t out,
                                 const unsigned char *ser, size_t ser_len);

-/**
- * @brief Serialize a scalar to wire format.
+/*
+ * Serialize a scalar to wire format.
 *
- * @param [out] ser Serialized form of a scalar.
- * @param [in] s Deserialized scalar.
+ * ser (out): Serialized form of a scalar.
+ * s (in): Deserialized scalar.
 */
 void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
                            const curve448_scalar_t s);

-/**
- * @brief Add two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a+b.
+/*
+ * Add two scalars. The scalars may use the same memory.
+ * 
+ * a (in): One scalar.
+ * b (in): Another scalar.
+ * out (out): a+b.
 */
 void curve448_scalar_add(curve448_scalar_t out,
                         const curve448_scalar_t a, const curve448_scalar_t b);

-/**
- * @brief Subtract two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a-b.
+/*
+ * Subtract two scalars.  The scalars may use the same memory.
+ * a (in): One scalar.
+ * b (in): Another scalar.
+ * out (out): a-b.
 */
 void curve448_scalar_sub(curve448_scalar_t out,
                         const curve448_scalar_t a, const curve448_scalar_t b);

-/**
- * @brief Multiply two scalars.  The scalars may use the same memory.
- * @param [in] a One scalar.
- * @param [in] b Another scalar.
- * @param [out] out a*b.
+/*
+ * Multiply two scalars. The scalars may use the same memory.
+ * 
+ * a (in): One scalar.
+ * b (in): Another scalar.
+ * out (out): a*b.
 */
 void curve448_scalar_mul(curve448_scalar_t out,
                         const curve448_scalar_t a, const curve448_scalar_t b);

-/**
-* @brief Halve a scalar.  The scalars may use the same memory.
-* @param [in] a A scalar.
-* @param [out] out a/2.
+/*
+* Halve a scalar.  The scalars may use the same memory.
+* 
+* a (in): A scalar.
+* out (out): a/2.
 */
 void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a);

-/**
- * @brief Copy a scalar.  The scalars may use the same memory, in which
- * case this function does nothing.
- * @param [in] a A scalar.
- * @param [out] out Will become a copy of a.
+/*
+ * Copy a scalar.  The scalars may use the same memory, in which case this
+ * function does nothing.
+ * 
+ * a (in): A scalar.
+ * out (out): Will become a copy of a.
 */
 static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
                                             const curve448_scalar_t a)
@ -169,12 +167,12 @@ static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
    *out = *a;
 }

-/**
- * @brief Copy a point.  The input and output may alias,
- * in which case this function does nothing.
+/*
+ * Copy a point.  The input and output may alias, in which case this function
+ * does nothing.
 *
- * @param [out] a A copy of the point.
- * @param [in] b Any point.
+ * a (out): A copy of the point.
+ * b (in): Any point.
 */
 static ossl_inline void curve448_point_copy(curve448_point_t a,
                                            const curve448_point_t b)
@ -182,47 +180,48 @@ static ossl_inline void curve448_point_copy(curve448_point_t a,
    *a = *b;
 }

-/**
- * @brief Test whether two points are equal.  If yes, return
- * DECAF_TRUE, else return DECAF_FALSE.
+/*
+ * Test whether two points are equal.  If yes, return DECAF_TRUE, else return
+ * DECAF_FALSE.
 *
- * @param [in] a A point.
- * @param [in] b Another point.
- * @retval DECAF_TRUE The points are equal.
- * @retval DECAF_FALSE The points are not equal.
+ * a (in): A point.
+ * b (in): Another point.
+ * 
+ * Returns:
+ * DECAF_TRUE: The points are equal.
+ * DECAF_FALSE: The points are not equal.
 */
 __owur decaf_bool_t curve448_point_eq(const curve448_point_t a,
                                      const curve448_point_t b);

-/**
- * @brief Double a point.  Equivalent to
- * curve448_point_add(two_a,a,a), but potentially faster.
+/*
+ * Double a point. Equivalent to curve448_point_add(two_a,a,a), but potentially
+ * faster.
 *
- * @param [out] two_a The sum a+a.
- * @param [in] a A point.
+ * two_a (out): The sum a+a.
+ * a (in): A point.
 */
 void curve448_point_double(curve448_point_t two_a, const curve448_point_t a);

-/**
- * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
+/*
+ * RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
 * (non-Decaf) encoding.
 *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
+ * out (out): The scaled point base*scalar
+ * base (in): The point to be scaled.
+ * scalar (in): The scalar to multiply by.
 *
- * @retval DECAF_SUCCESS The scalarmul succeeded.
- * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
- * point is in a small subgroup.
+ * Returns:
+ * DECAF_SUCCESS: The scalarmul succeeded.
+ * DECAF_FAILURE: The scalarmul didn't succeed, because the base point is in a
+ * small subgroup.
 */
 __owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
                                const uint8_t base[DECAF_X448_PUBLIC_BYTES],
-                                const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-    );
+                                const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);

-/**
- * @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
- * then encode it like RFC 7748.
+/*
+ * Multiply a point by DECAF_X448_ENCODE_RATIO, then encode it like RFC 7748.
 *
 * This function is mainly used internally, but is exported in case
 * it will be useful.
@ -237,83 +236,73 @@ __owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
 * will be DECAF_X448_ENCODE_RATIO times the X448
 * base point.
 *
- * @param [out] out The scaled and encoded point.
- * @param [in] p The point to be scaled and encoded.
+ * out (out): The scaled and encoded point.
+ * p (in): The point to be scaled and encoded.
 */
-void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
-                                                      out
-                                                      [DECAF_X448_PUBLIC_BYTES],
-                                                      const curve448_point_t p);
+void curve448_point_mul_by_ratio_and_encode_like_x448(
+                                        uint8_t out[DECAF_X448_PUBLIC_BYTES],
+                                        const curve448_point_t p);

-/** The base point for X448 Diffie-Hellman */
+/* The base point for X448 Diffie-Hellman */
 extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES];

-/**
- * @brief RFC 7748 Diffie-Hellman base point scalarmul.  This function uses
- * a different (non-Decaf) encoding.
- *
- * Does exactly the same thing as decaf_x448_generate_key,
- * but has a better name.
- *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] scalar The scalar to multiply by.
+/*
+ * RFC 7748 Diffie-Hellman base point scalarmul.  This function uses a different
+ * (non-Decaf) encoding.
+ * 
+ * out (out): The scaled point base*scalar
+ * scalar (in): The scalar to multiply by.
 */
-void decaf_x448_derive_public_key(uint8_t out[DECAF_X448_PUBLIC_BYTES],
-                                  const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-    );
+void decaf_x448_derive_public_key(
+                                uint8_t out[DECAF_X448_PUBLIC_BYTES],
+                                const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);

-/**
- * @brief Multiply a precomputed base point by a scalar:
- * scaled = scalar*base.
- * Some implementations do not include precomputed points; for
- * those implementations, this function is the same as
- * curve448_point_scalarmul
+/*
+ * Multiply a precomputed base point by a scalar: out = scalar*base.
 *
- * @param [out] scaled The scaled point base*scalar
- * @param [in] base The point to be scaled.
- * @param [in] scalar The scalar to multiply by.
+ * scaled (out): The scaled point base*scalar
+ * base (in): The point to be scaled.
+ * scalar (in): The scalar to multiply by.
 */
 void curve448_precomputed_scalarmul(curve448_point_t scaled,
                                    const curve448_precomputed_s * base,
                                    const curve448_scalar_t scalar);

-/**
- * @brief Multiply two base points by two scalars:
- * scaled = scalar1*curve448_point_base + scalar2*base2.
+/*
+ * Multiply two base points by two scalars:
+ * combo = scalar1*curve448_point_base + scalar2*base2.
 *
 * Otherwise equivalent to curve448_point_double_scalarmul, but may be
 * faster at the expense of being variable time.
 *
- * @param [out] combo The linear combination scalar1*base + scalar2*base2.
- * @param [in] scalar1 A first scalar to multiply by.
- * @param [in] base2 A second point to be scaled.
- * @param [in] scalar2 A second scalar to multiply by.
+ * combo (out): The linear combination scalar1*base + scalar2*base2.
+ * scalar1 (in): A first scalar to multiply by.
+ * base2 (in): A second point to be scaled.
+ * scalar2 (in) A second scalar to multiply by.
 *
- * @warning: This function takes variable time, and may leak the scalars
- * used.  It is designed for signature verification.
+ * Warning: This function takes variable time, and may leak the scalars used. 
+ * It is designed for signature verification.
 */
 void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
                                               const curve448_scalar_t scalar1,
                                               const curve448_point_t base2,
                                               const curve448_scalar_t scalar2);

-/**
- * @brief Test that a point is valid, for debugging purposes.
+/*
+ * Test that a point is valid, for debugging purposes.
 *
- * @param [in] to_test The point to test.
- * @retval DECAF_TRUE The point is valid.
- * @retval DECAF_FALSE The point is invalid.
+ * to_test (in): The point to test.
+ *
+ * Returns:
+ * DECAF_TRUE The point is valid.
+ * DECAF_FALSE The point is invalid.
 */
 __owur decaf_bool_t curve448_point_valid(const curve448_point_t to_test);

-/**
- * @brief Overwrite scalar with zeros.
- */
+/* Overwrite scalar with zeros. */
 void curve448_scalar_destroy(curve448_scalar_t scalar);

-/**
- * @brief Overwrite point with zeros.
- */
+/* Overwrite point with zeros. */
 void curve448_point_destroy(curve448_point_t point);

 #ifdef __cplusplus
--- a/crypto/ec/curve448/scalar.c
+++ b/crypto/ec/curve448/scalar.c
@ -16,31 +16,36 @@
 #include "point_448.h"

 static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t) 0x3bd440fae918bc5;
-static const curve448_scalar_t sc_p = { {{
-                                          SC_LIMB(0x2378c292ab5844f3),
-                                          SC_LIMB(0x216cc2728dc58f55),
-                                          SC_LIMB(0xc44edb49aed63690),
-                                          SC_LIMB(0xffffffff7cca23e9),
-                                          SC_LIMB(0xffffffffffffffff),
-                                          SC_LIMB(0xffffffffffffffff),
-                                          SC_LIMB(0x3fffffffffffffff)
-                                          }}
-}, sc_r2 = { { {
+static const curve448_scalar_t sc_p = {
+    {
+        {
+            SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55),
+            SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9),
+            SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff),
+            SC_LIMB(0x3fffffffffffffff)
+        }
+    }
+}, sc_r2 = {
+    {
+        {

            SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9),
-                SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
-                SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
-                SC_LIMB(0x3402a939f823b729)
-}}};
+            SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
+            SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
+            SC_LIMB(0x3402a939f823b729)
+        }
+    }
+};

 /* End of template stuff */

 #define WBITS DECAF_WORD_BITS   /* NB this may be different from ARCH_WORD_BITS */

-const curve448_scalar_t curve448_scalar_one = { {{1}} }, curve448_scalar_zero = { { {
-0}}};
+const curve448_scalar_t curve448_scalar_one = {{{1}}};
+const curve448_scalar_t  curve448_scalar_zero = {{{0}}};

-/** {extra,accum} - sub +? p
+/*
+ * {extra,accum} - sub +? p
 * Must have extra <= 1
 */
 static void sc_subx(curve448_scalar_t out,
@ -67,8 +72,8 @@ static void sc_subx(curve448_scalar_t out,
    }
 }

-static void sc_montmul(curve448_scalar_t out,
-                       const curve448_scalar_t a, const curve448_scalar_t b)
+static void sc_montmul(curve448_scalar_t out, const curve448_scalar_t a,
+                       const curve448_scalar_t b)
 {
    unsigned int i, j;
    decaf_word_t accum[DECAF_448_SCALAR_LIMBS + 1] = { 0 };
@ -104,24 +109,25 @@ static void sc_montmul(curve448_scalar_t out,
    sc_subx(out, accum, sc_p, sc_p, hi_carry);
 }

-void curve448_scalar_mul(curve448_scalar_t out,
-                         const curve448_scalar_t a, const curve448_scalar_t b)
+void curve448_scalar_mul(curve448_scalar_t out, const curve448_scalar_t a,
+                         const curve448_scalar_t b)
 {
    sc_montmul(out, a, b);
    sc_montmul(out, out, sc_r2);
 }

-void curve448_scalar_sub(curve448_scalar_t out,
-                         const curve448_scalar_t a, const curve448_scalar_t b)
+void curve448_scalar_sub(curve448_scalar_t out, const curve448_scalar_t a,
+                         const curve448_scalar_t b)
 {
    sc_subx(out, a->limb, b, sc_p, 0);
 }

-void curve448_scalar_add(curve448_scalar_t out,
-                         const curve448_scalar_t a, const curve448_scalar_t b)
+void curve448_scalar_add(curve448_scalar_t out, const curve448_scalar_t a,
+                         const curve448_scalar_t b)
 {
    decaf_dword_t chain = 0;
    unsigned int i;
+
    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        chain = (chain + a->limb[i]) + b->limb[i];
        out->limb[i] = chain;
@ -135,27 +141,26 @@ static ossl_inline void scalar_decode_short(curve448_scalar_t s,
                                            unsigned int nbytes)
 {
    unsigned int i, j, k = 0;
+
    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        decaf_word_t out = 0;
-        for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++) {
+
+        for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++)
            out |= ((decaf_word_t) ser[k]) << (8 * j);
-        }
        s->limb[i] = out;
    }
 }

-decaf_error_t curve448_scalar_decode(curve448_scalar_t s,
-                                     const unsigned char
-                                     ser[DECAF_448_SCALAR_BYTES]
-    )
+decaf_error_t curve448_scalar_decode(
+                                curve448_scalar_t s,
+                                const unsigned char ser[DECAF_448_SCALAR_BYTES])
 {
    unsigned int i;
    decaf_dsword_t accum = 0;

    scalar_decode_short(s, ser, DECAF_448_SCALAR_BYTES);
-    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++)
        accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
-    }
    /* Here accum == 0 or -1 */

    curve448_scalar_mul(s, s, curve448_scalar_one); /* ham-handed reduce */
@ -209,10 +214,10 @@ void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
                            const curve448_scalar_t s)
 {
    unsigned int i, j, k = 0;
+
    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
-        for (j = 0; j < sizeof(decaf_word_t); j++, k++) {
+        for (j = 0; j < sizeof(decaf_word_t); j++, k++)
            ser[k] = s->limb[i] >> (8 * j);
-        }
    }
 }

@ -226,8 +231,7 @@ void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a)
        out->limb[i] = chain;
        chain >>= DECAF_WORD_BITS;
    }
-    for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++)
        out->limb[i] = out->limb[i] >> 1 | out->limb[i + 1] << (WBITS - 1);
-    }
    out->limb[i] = out->limb[i] >> 1 | chain << (WBITS - 1);
 }
--- a/crypto/ec/curve448/word.h
+++ b/crypto/ec/curve448/word.h
@ -162,7 +162,6 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
 static ossl_inline big_register_t br_is_zero(big_register_t x)
 {
    return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
-    // return (big_register_t)(x == br_set_to_mask(0));
 }
 # elif defined(__ARM_NEON__)
 static ossl_inline big_register_t br_is_zero(big_register_t x)
@ -196,7 +195,7 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
 */
 static ossl_inline decaf_bool_t mask_to_bool(mask_t m)
 {
-    return (decaf_sword_t) (sword_t) m;
+    return (decaf_sword_t)(sword_t)m;
 }

 static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
@ -204,13 +203,13 @@ static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
    /* On most arches this will be optimized to a simple cast. */
    mask_t ret = 0;
    unsigned int i;
-
    unsigned int limit = sizeof(decaf_bool_t) / sizeof(mask_t);
+
    if (limit < 1)
        limit = 1;
-    for (i = 0; i < limit; i++) {
+    for (i = 0; i < limit; i++)
        ret |= ~word_is_zero(m >> (i * 8 * sizeof(word_t)));
-    }
+
    return ret;
 }