Skip to content

Commit

Permalink
Aarch64 ASM: check CPU features before hw crypto instr use
Browse files Browse the repository at this point in the history
For SHA-256, SHA-512 and SHA3, get the CPU features to see if hardware
crypto is available. If not then fallback to an alternate
implementation.
  • Loading branch information
SparkiDev committed Dec 24, 2024
1 parent a13d0fd commit e1851cd
Show file tree
Hide file tree
Showing 9 changed files with 654 additions and 551 deletions.
1,070 changes: 555 additions & 515 deletions wolfcrypt/src/port/arm/armv8-sha256.c

Large diffs are not rendered by default.

27 changes: 13 additions & 14 deletions wolfcrypt/src/port/arm/armv8-sha3-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ L_SHA3_transform_crypto_r:
.xword 0x8000000080008008
#ifndef __APPLE__
.text
.globl BlockSha3
.type BlockSha3,@function
.globl BlockSha3_crypto
.type BlockSha3_crypto,@function
.align 2
BlockSha3:
BlockSha3_crypto:
#else
.section __TEXT,__text
.globl _BlockSha3
.globl _BlockSha3_crypto
.p2align 2
_BlockSha3:
_BlockSha3_crypto:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-80]!
add x29, sp, #0
Expand Down Expand Up @@ -204,9 +204,9 @@ L_sha3_crypto_begin:
ldp x29, x30, [sp], #0x50
ret
#ifndef __APPLE__
.size BlockSha3,.-BlockSha3
.size BlockSha3_crypto,.-BlockSha3_crypto
#endif /* __APPLE__ */
#else
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
#ifndef __APPLE__
.text
.type L_SHA3_transform_base_r, %object
Expand Down Expand Up @@ -247,15 +247,15 @@ L_SHA3_transform_base_r:
.xword 0x8000000080008008
#ifndef __APPLE__
.text
.globl BlockSha3
.type BlockSha3,@function
.globl BlockSha3_base
.type BlockSha3_base,@function
.align 2
BlockSha3:
BlockSha3_base:
#else
.section __TEXT,__text
.globl _BlockSha3
.globl _BlockSha3_base
.p2align 2
_BlockSha3:
_BlockSha3_base:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-160]!
add x29, sp, #0
Expand Down Expand Up @@ -449,9 +449,8 @@ L_SHA3_transform_base_begin:
ldp x29, x30, [sp], #0xa0
ret
#ifndef __APPLE__
.size BlockSha3,.-BlockSha3
.size BlockSha3_base,.-BlockSha3_base
#endif /* __APPLE__ */
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
#endif /* WOLFSSL_SHA3 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
Expand Down
7 changes: 3 additions & 4 deletions wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static const word64 L_SHA3_transform_crypto_r[] = {
0x8000000080008008UL,
};

void BlockSha3(word64* state)
void BlockSha3_crypto(word64* state)
{
__asm__ __volatile__ (
#ifdef __APPLE__
Expand Down Expand Up @@ -181,7 +181,7 @@ void BlockSha3(word64* state)
);
}

#else
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
static const word64 L_SHA3_transform_base_r[] = {
0x1UL,
0x8082UL,
Expand Down Expand Up @@ -209,7 +209,7 @@ static const word64 L_SHA3_transform_base_r[] = {
0x8000000080008008UL,
};

void BlockSha3(word64* state)
void BlockSha3_base(word64* state)
{
__asm__ __volatile__ (
"stp x29, x30, [sp, #-64]!\n\t"
Expand Down Expand Up @@ -397,7 +397,6 @@ void BlockSha3(word64* state)
);
}

#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
#endif /* WOLFSSL_SHA3 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
Expand Down
3 changes: 1 addition & 2 deletions wolfcrypt/src/port/arm/armv8-sha512-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA512
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
#ifndef __APPLE__
.text
.type L_SHA512_transform_neon_len_k, %object
Expand Down Expand Up @@ -1093,7 +1092,7 @@ L_sha512_len_neon_start:
#ifndef __APPLE__
.size Transform_Sha512_Len_neon,.-Transform_Sha512_Len_neon
#endif /* __APPLE__ */
#else
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
#ifndef __APPLE__
.text
.type L_SHA512_transform_crypto_len_k, %object
Expand Down
3 changes: 1 addition & 2 deletions wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#include <wolfssl/wolfcrypt/sha512.h>

#ifdef WOLFSSL_SHA512
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
static const word64 L_SHA512_transform_neon_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
Expand Down Expand Up @@ -1053,7 +1052,7 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
);
}

#else
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
static const word64 L_SHA512_transform_crypto_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
Expand Down
37 changes: 37 additions & 0 deletions wolfcrypt/src/port/arm/armv8-sha512.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
}
#endif
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/cpuid.h>
#include <wolfssl/wolfcrypt/hash.h>

#include <wolfssl/wolfcrypt/logging.h>
Expand All @@ -62,6 +63,11 @@
#include <wolfssl/wolfcrypt/cryptocb.h>
#endif

#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
static word32 cpuid_flags = 0;
static int cpuid_flags_set = 0;
#endif

#ifdef WOLFSSL_SHA512

static int InitSha512(wc_Sha512* sha512)
Expand Down Expand Up @@ -198,6 +204,13 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId,
if (ret != 0)
return ret;

#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
#endif

(void)devId;

return ret;
Expand Down Expand Up @@ -432,6 +445,22 @@ static void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len
}
#undef DATA

#elif defined(__aarch64__)

static WC_INLINE void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data,
word32 len)
{
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
if (IS_AARCH64_SHA512(cpuid_flags)) {
Transform_Sha512_Len_crypto(sha512, data, len);
}
else
#endif
{
Transform_Sha512_Len_neon(sha512, data, len);
}
}

#endif


Expand Down Expand Up @@ -855,6 +884,14 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
return ret;
}
#endif

#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
#endif

(void)devId;

return ret;
Expand Down
46 changes: 37 additions & 9 deletions wolfcrypt/src/sha3.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@
}
#endif

#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)

#ifdef USE_INTEL_SPEEDUP
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
#include <wolfssl/wolfcrypt/cpuid.h>

word32 cpuid_flags;
Expand All @@ -81,6 +81,8 @@
#endif
#endif

#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)

#ifdef WOLFSSL_SHA3_SMALL
/* Rotate a 64-bit value left.
*
Expand Down Expand Up @@ -659,11 +661,37 @@ static int InitSha3(wc_Sha3* sha3)
SHA3_BLOCK_N = NULL;
}
}
#define SHA3_FUNC_PTR
#endif
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
if (IS_AARCH64_SHA3(cpuid_flags)) {
SHA3_BLOCK = BlockSha3_crypto;
SHA3_BLOCK_N = NULL;
}
else
#endif
{
SHA3_BLOCK = BlockSha3_base;
SHA3_BLOCK_N = NULL;
}
}
#define SHA3_FUNC_PTR
#endif

return 0;
}

#if defined(__aarch64__) && defined(WOLFSSL_ARMASM)
void BlockSha3(word64* s)
{
(*SHA3_BLOCK)(s);
}
#endif

/* Update the SHA-3 hash state with message data.
*
* sha3 wc_Sha3 object holding state.
Expand Down Expand Up @@ -700,7 +728,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
for (i = 0; i < p; i++) {
sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i);
}
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(sha3->s);
#else
BlockSha3(sha3->s);
Expand All @@ -709,7 +737,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
}
}
blocks = len / (p * 8U);
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
if ((SHA3_BLOCK_N != NULL) && (blocks > 0)) {
(*SHA3_BLOCK_N)(sha3->s, data, blocks, p * 8U);
len -= blocks * (p * 8U);
Expand All @@ -721,7 +749,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
for (i = 0; i < p; i++) {
sha3->s[i] ^= Load64Unaligned(data + 8 * i);
}
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(sha3->s);
#else
BlockSha3(sha3->s);
Expand Down Expand Up @@ -773,7 +801,7 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
#endif

for (j = 0; l - j >= rate; j += rate) {
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(sha3->s);
#else
BlockSha3(sha3->s);
Expand All @@ -785,7 +813,7 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
#endif
}
if (j != l) {
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(sha3->s);
#else
BlockSha3(sha3->s);
Expand Down Expand Up @@ -1503,7 +1531,7 @@ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(shake->s);
#else
BlockSha3(shake->s);
Expand Down Expand Up @@ -1641,7 +1669,7 @@ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
#ifdef SHA3_FUNC_PTR
(*SHA3_BLOCK)(shake->s);
#else
BlockSha3(shake->s);
Expand Down
7 changes: 6 additions & 1 deletion wolfssl/wolfcrypt/sha3.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,13 @@ WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n,
WOLFSSL_LOCAL void sha3_block_bmi2(word64* s);
WOLFSSL_LOCAL void sha3_block_avx2(word64* s);
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM)
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
WOLFSSL_LOCAL void BlockSha3_crypto(word64 *s);
#endif
#if defined(WOLFSSL_ARMASM) || defined(WOLFSSL_RISCV_ASM)
WOLFSSL_LOCAL void BlockSha3_base(word64 *s);
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#elif defined(WOLFSSL_ARMASM) || defined(WOLFSSL_RISCV_ASM)
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#endif

Expand Down
5 changes: 1 addition & 4 deletions wolfssl/wolfcrypt/sha512.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,11 @@ struct wc_Sha512 {

#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_neon
#else
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_crypto
#endif
#else
extern void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data,
Expand Down

0 comments on commit e1851cd

Please sign in to comment.