Add and remove libs and components for Arduino Core 3 (#400)

* Add and remove libs and components for Arduino Core 3

* Add back NimBLE-Arduino in resources
This commit is contained in:
iranl
2024-06-20 18:34:49 +02:00
committed by GitHub
parent 90d13068c9
commit b673fb4d5c
1217 changed files with 118233 additions and 140 deletions

View File

@@ -0,0 +1,3 @@
SUBDIRS = \
libsodium

View File

@@ -0,0 +1,314 @@
lib_LTLIBRARIES = \
libsodium.la
libsodium_la_SOURCES = \
crypto_aead/aegis128l/aead_aegis128l.c \
crypto_aead/aegis128l/aegis128l_common.h \
crypto_aead/aegis128l/aegis128l_soft.c \
crypto_aead/aegis128l/aegis128l_soft.h \
crypto_aead/aegis128l/implementations.h \
crypto_aead/aegis256/aead_aegis256.c \
crypto_aead/aegis256/aegis256_common.h \
crypto_aead/aegis256/aegis256_soft.c \
crypto_aead/aegis256/aegis256_soft.h \
crypto_aead/aegis256/implementations.h \
crypto_aead/aes256gcm/aead_aes256gcm.c \
crypto_aead/chacha20poly1305/aead_chacha20poly1305.c \
crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c \
crypto_auth/crypto_auth.c \
crypto_auth/hmacsha256/auth_hmacsha256.c \
crypto_auth/hmacsha512/auth_hmacsha512.c \
crypto_auth/hmacsha512256/auth_hmacsha512256.c \
crypto_box/crypto_box.c \
crypto_box/crypto_box_easy.c \
crypto_box/crypto_box_seal.c \
crypto_box/curve25519xsalsa20poly1305/box_curve25519xsalsa20poly1305.c \
crypto_core/ed25519/ref10/ed25519_ref10.c \
crypto_core/hchacha20/core_hchacha20.c \
crypto_core/hsalsa20/ref2/core_hsalsa20_ref2.c \
crypto_core/hsalsa20/core_hsalsa20.c \
crypto_core/salsa/ref/core_salsa_ref.c \
crypto_core/softaes/softaes.c \
crypto_generichash/crypto_generichash.c \
crypto_generichash/blake2b/generichash_blake2.c \
crypto_generichash/blake2b/ref/blake2.h \
crypto_generichash/blake2b/ref/blake2b-compress-ref.c \
crypto_generichash/blake2b/ref/blake2b-load-sse2.h \
crypto_generichash/blake2b/ref/blake2b-load-sse41.h \
crypto_generichash/blake2b/ref/blake2b-load-avx2.h \
crypto_generichash/blake2b/ref/blake2b-ref.c \
crypto_generichash/blake2b/ref/generichash_blake2b.c \
crypto_hash/crypto_hash.c \
crypto_hash/sha256/hash_sha256.c \
crypto_hash/sha256/cp/hash_sha256_cp.c \
crypto_hash/sha512/hash_sha512.c \
crypto_hash/sha512/cp/hash_sha512_cp.c \
crypto_kdf/blake2b/kdf_blake2b.c \
crypto_kdf/crypto_kdf.c \
crypto_kdf/hkdf/kdf_hkdf_sha256.c \
crypto_kdf/hkdf/kdf_hkdf_sha512.c \
crypto_kx/crypto_kx.c \
crypto_onetimeauth/crypto_onetimeauth.c \
crypto_onetimeauth/poly1305/onetimeauth_poly1305.c \
crypto_onetimeauth/poly1305/onetimeauth_poly1305.h \
crypto_onetimeauth/poly1305/donna/poly1305_donna.h \
crypto_onetimeauth/poly1305/donna/poly1305_donna32.h \
crypto_onetimeauth/poly1305/donna/poly1305_donna64.h \
crypto_onetimeauth/poly1305/donna/poly1305_donna.c \
crypto_pwhash/argon2/argon2-core.c \
crypto_pwhash/argon2/argon2-core.h \
crypto_pwhash/argon2/argon2-encoding.c \
crypto_pwhash/argon2/argon2-encoding.h \
crypto_pwhash/argon2/argon2-fill-block-ref.c \
crypto_pwhash/argon2/argon2.c \
crypto_pwhash/argon2/argon2.h \
crypto_pwhash/argon2/blake2b-long.c \
crypto_pwhash/argon2/blake2b-long.h \
crypto_pwhash/argon2/blamka-round-ref.h \
crypto_pwhash/argon2/pwhash_argon2i.c \
crypto_pwhash/argon2/pwhash_argon2id.c \
crypto_pwhash/crypto_pwhash.c \
crypto_scalarmult/crypto_scalarmult.c \
crypto_scalarmult/curve25519/ref10/x25519_ref10.c \
crypto_scalarmult/curve25519/ref10/x25519_ref10.h \
crypto_scalarmult/curve25519/scalarmult_curve25519.c \
crypto_scalarmult/curve25519/scalarmult_curve25519.h \
crypto_secretbox/crypto_secretbox.c \
crypto_secretbox/crypto_secretbox_easy.c \
crypto_secretbox/xsalsa20poly1305/secretbox_xsalsa20poly1305.c \
crypto_secretstream/xchacha20poly1305/secretstream_xchacha20poly1305.c \
crypto_shorthash/crypto_shorthash.c \
crypto_shorthash/siphash24/shorthash_siphash24.c \
crypto_shorthash/siphash24/ref/shorthash_siphash24_ref.c \
crypto_shorthash/siphash24/ref/shorthash_siphash_ref.h \
crypto_sign/crypto_sign.c \
crypto_sign/ed25519/sign_ed25519.c \
crypto_sign/ed25519/ref10/keypair.c \
crypto_sign/ed25519/ref10/open.c \
crypto_sign/ed25519/ref10/sign.c \
crypto_sign/ed25519/ref10/sign_ed25519_ref10.h \
crypto_stream/chacha20/stream_chacha20.c \
crypto_stream/chacha20/stream_chacha20.h \
crypto_stream/chacha20/ref/chacha20_ref.h \
crypto_stream/chacha20/ref/chacha20_ref.c \
crypto_stream/crypto_stream.c \
crypto_stream/salsa20/stream_salsa20.c \
crypto_stream/salsa20/stream_salsa20.h \
crypto_stream/xsalsa20/stream_xsalsa20.c \
crypto_verify/verify.c \
include/sodium/private/asm_cet.h \
include/sodium/private/chacha20_ietf_ext.h \
include/sodium/private/common.h \
include/sodium/private/ed25519_ref10.h \
include/sodium/private/implementations.h \
include/sodium/private/mutex.h \
include/sodium/private/sse2_64_32.h \
include/sodium/private/softaes.h \
randombytes/randombytes.c \
sodium/codecs.c \
sodium/core.c \
sodium/runtime.c \
sodium/utils.c \
sodium/version.c
if HAVE_TI_MODE
libsodium_la_SOURCES += \
crypto_core/ed25519/ref10/fe_51/base.h \
crypto_core/ed25519/ref10/fe_51/base2.h \
crypto_core/ed25519/ref10/fe_51/constants.h \
crypto_core/ed25519/ref10/fe_51/fe.h \
include/sodium/private/ed25519_ref10_fe_51.h
else
libsodium_la_SOURCES += \
crypto_core/ed25519/ref10/fe_25_5/base.h \
crypto_core/ed25519/ref10/fe_25_5/base2.h \
crypto_core/ed25519/ref10/fe_25_5/constants.h \
crypto_core/ed25519/ref10/fe_25_5/fe.h \
include/sodium/private/ed25519_ref10_fe_25_5.h
endif
if HAVE_AMD64_ASM
libsodium_la_SOURCES += \
crypto_stream/salsa20/xmm6/salsa20_xmm6-asm.S \
crypto_stream/salsa20/xmm6/salsa20_xmm6.c \
crypto_stream/salsa20/xmm6/salsa20_xmm6.h
else
libsodium_la_SOURCES += \
crypto_stream/salsa20/ref/salsa20_ref.c \
crypto_stream/salsa20/ref/salsa20_ref.h
endif
noinst_HEADERS = \
crypto_scalarmult/curve25519/sandy2x/consts.S \
crypto_scalarmult/curve25519/sandy2x/fe51_mul.S \
crypto_scalarmult/curve25519/sandy2x/fe51_nsquare.S \
crypto_scalarmult/curve25519/sandy2x/fe51_pack.S \
crypto_scalarmult/curve25519/sandy2x/ladder.S
if HAVE_AVX_ASM
libsodium_la_SOURCES += \
crypto_scalarmult/curve25519/sandy2x/consts_namespace.h \
crypto_scalarmult/curve25519/sandy2x/curve25519_sandy2x.c \
crypto_scalarmult/curve25519/sandy2x/curve25519_sandy2x.h \
crypto_scalarmult/curve25519/sandy2x/fe.h \
crypto_scalarmult/curve25519/sandy2x/fe51.h \
crypto_scalarmult/curve25519/sandy2x/fe51_invert.c \
crypto_scalarmult/curve25519/sandy2x/fe51_namespace.h \
crypto_scalarmult/curve25519/sandy2x/fe_frombytes_sandy2x.c \
crypto_scalarmult/curve25519/sandy2x/ladder.h \
crypto_scalarmult/curve25519/sandy2x/ladder_namespace.h \
crypto_scalarmult/curve25519/sandy2x/sandy2x.S
endif
if !MINIMAL
libsodium_la_SOURCES += \
crypto_box/curve25519xchacha20poly1305/box_curve25519xchacha20poly1305.c \
crypto_box/curve25519xchacha20poly1305/box_seal_curve25519xchacha20poly1305.c \
crypto_core/ed25519/core_ed25519.c \
crypto_core/ed25519/core_ristretto255.c \
crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c \
crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h \
crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c \
crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c \
crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.h \
crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c \
crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c \
crypto_scalarmult/ed25519/ref10/scalarmult_ed25519_ref10.c \
crypto_scalarmult/ristretto255/ref10/scalarmult_ristretto255_ref10.c \
crypto_secretbox/xchacha20poly1305/secretbox_xchacha20poly1305.c \
crypto_shorthash/siphash24/shorthash_siphashx24.c \
crypto_shorthash/siphash24/ref/shorthash_siphashx24_ref.c \
crypto_sign/ed25519/ref10/obsolete.c \
crypto_stream/salsa2012/ref/stream_salsa2012_ref.c \
crypto_stream/salsa2012/stream_salsa2012.c \
crypto_stream/salsa208/ref/stream_salsa208_ref.c \
crypto_stream/salsa208/stream_salsa208.c \
crypto_stream/xchacha20/stream_xchacha20.c
endif
randombytes_internal_randombytes_internal_random_CFLAGS = @CFLAGS_RDRAND@
libsodium_la_LDFLAGS = \
$(AM_LDFLAGS) \
-export-dynamic \
-no-undefined \
$(LIBTOOL_EXTRA_FLAGS)
libsodium_la_CPPFLAGS = \
$(LTDLINCL) \
-I$(srcdir)/include/sodium \
-I$(builddir)/include/sodium
if HAVE_LD_OUTPUT_DEF
libsodium_la_LDFLAGS += -Wl,--output-def,libsodium-$(DLL_VERSION).def
defexecdir = $(bindir)
defexec_DATA = libsodium-$(DLL_VERSION).def
CLEANFILES = $(defexec_DATA)
libsodium-$(DLL_VERSION).def: libsodium.la
endif
SUBDIRS = \
include
libsodium_la_LIBADD = libaesni.la libarmcrypto.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la
noinst_LTLIBRARIES = libaesni.la libarmcrypto.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la
librdrand_la_LDFLAGS = $(libsodium_la_LDFLAGS)
librdrand_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_RDRAND@
librdrand_la_SOURCES = \
randombytes/internal/randombytes_internal_random.c
if !EMSCRIPTEN
libsodium_la_LIBADD += librdrand.la
noinst_LTLIBRARIES += librdrand.la
libsodium_la_SOURCES += \
randombytes/sysrandom/randombytes_sysrandom.c
endif
libarmcrypto_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libarmcrypto_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_ARMCRYPTO@
libarmcrypto_la_SOURCES = \
crypto_aead/aegis128l/aegis128l_armcrypto.c \
crypto_aead/aegis128l/aegis128l_armcrypto.h \
crypto_aead/aegis256/aegis256_armcrypto.c \
crypto_aead/aegis256/aegis256_armcrypto.h \
crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c
libaesni_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libaesni_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_AVX@ @CFLAGS_AESNI@ @CFLAGS_PCLMUL@
libaesni_la_SOURCES = \
crypto_aead/aegis128l/aegis128l_aesni.c \
crypto_aead/aegis128l/aegis128l_aesni.h \
crypto_aead/aegis256/aegis256_aesni.c \
crypto_aead/aegis256/aegis256_aesni.h \
crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c
libsse2_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libsse2_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@
libsse2_la_SOURCES = \
crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c \
crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h
if !MINIMAL
libsse2_la_SOURCES += \
crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c
endif
if !HAVE_AMD64_ASM
libsse2_la_SOURCES += \
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.c \
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.h \
crypto_stream/salsa20/xmm6int/u0.h \
crypto_stream/salsa20/xmm6int/u1.h \
crypto_stream/salsa20/xmm6int/u4.h
endif
libssse3_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libssse3_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@ @CFLAGS_SSSE3@
libssse3_la_SOURCES = \
crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c \
crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h \
crypto_pwhash/argon2/argon2-fill-block-ssse3.c \
crypto_pwhash/argon2/blamka-round-ssse3.h \
crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.c \
crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.h \
crypto_stream/chacha20/dolbeau/u0.h \
crypto_stream/chacha20/dolbeau/u1.h \
crypto_stream/chacha20/dolbeau/u4.h
libsse41_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libsse41_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_SSE41@
libsse41_la_SOURCES = \
crypto_generichash/blake2b/ref/blake2b-compress-sse41.c \
crypto_generichash/blake2b/ref/blake2b-compress-sse41.h
libavx2_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libavx2_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_SSE41@ @CFLAGS_AVX@ @CFLAGS_AVX2@
libavx2_la_SOURCES = \
crypto_generichash/blake2b/ref/blake2b-compress-avx2.c \
crypto_generichash/blake2b/ref/blake2b-compress-avx2.h \
crypto_pwhash/argon2/argon2-fill-block-avx2.c \
crypto_pwhash/argon2/blamka-round-avx2.h \
crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.c \
crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.h \
crypto_stream/chacha20/dolbeau/u8.h \
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.c \
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.h \
crypto_stream/salsa20/xmm6int/u0.h \
crypto_stream/salsa20/xmm6int/u1.h \
crypto_stream/salsa20/xmm6int/u4.h \
crypto_stream/salsa20/xmm6int/u8.h
libavx512f_la_LDFLAGS = $(libsodium_la_LDFLAGS)
libavx512f_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
@CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_SSE41@ @CFLAGS_AVX@ @CFLAGS_AVX2@ @CFLAGS_AVX512F@
libavx512f_la_SOURCES = \
crypto_pwhash/argon2/argon2-fill-block-avx512f.c \
crypto_pwhash/argon2/blamka-round-avx512f.h

View File

@@ -0,0 +1,159 @@
#include <errno.h>
#include <stdlib.h>
#include "core.h"
#include "crypto_aead_aegis128l.h"
#include "private/common.h"
#include "private/implementations.h"
#include "randombytes.h"
#include "runtime.h"
#include "aegis128l_soft.h"
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
#include "aegis128l_armcrypto.h"
#endif
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
#include "aegis128l_aesni.h"
#endif
static const aegis128l_implementation *implementation = &aegis128l_soft_implementation;
size_t
crypto_aead_aegis128l_keybytes(void)
{
return crypto_aead_aegis128l_KEYBYTES;
}
size_t
crypto_aead_aegis128l_nsecbytes(void)
{
return crypto_aead_aegis128l_NSECBYTES;
}
size_t
crypto_aead_aegis128l_npubbytes(void)
{
return crypto_aead_aegis128l_NPUBBYTES;
}
size_t
crypto_aead_aegis128l_abytes(void)
{
return crypto_aead_aegis128l_ABYTES;
}
size_t
crypto_aead_aegis128l_messagebytes_max(void)
{
return crypto_aead_aegis128l_MESSAGEBYTES_MAX;
}
void
crypto_aead_aegis128l_keygen(unsigned char k[crypto_aead_aegis128l_KEYBYTES])
{
randombytes_buf(k, crypto_aead_aegis128l_KEYBYTES);
}
int
crypto_aead_aegis128l_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
unsigned long long clen = 0ULL;
int ret;
ret = crypto_aead_aegis128l_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub,
k);
if (clen_p != NULL) {
if (ret == 0) {
clen = mlen + crypto_aead_aegis128l_ABYTES;
}
*clen_p = clen;
}
return ret;
}
int
crypto_aead_aegis128l_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
{
unsigned long long mlen = 0ULL;
int ret = -1;
if (clen >= crypto_aead_aegis128l_ABYTES) {
ret = crypto_aead_aegis128l_decrypt_detached(
m, nsec, c, clen - crypto_aead_aegis128l_ABYTES,
c + clen - crypto_aead_aegis128l_ABYTES, ad, adlen, npub, k);
}
if (mlen_p != NULL) {
if (ret == 0) {
mlen = clen - crypto_aead_aegis128l_ABYTES;
}
*mlen_p = mlen;
}
return ret;
}
int
crypto_aead_aegis128l_encrypt_detached(unsigned char *c, unsigned char *mac,
unsigned long long *maclen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
const size_t maclen = crypto_aead_aegis128l_ABYTES;
if (maclen_p != NULL) {
*maclen_p = maclen;
}
if (mlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX ||
adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen,
npub, k);
}
int
crypto_aead_aegis128l_decrypt_detached(unsigned char *m, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *mac, const unsigned char *ad,
unsigned long long adlen, const unsigned char *npub,
const unsigned char *k)
{
const size_t maclen = crypto_aead_aegis128l_ABYTES;
if (clen > crypto_aead_aegis128l_MESSAGEBYTES_MAX ||
adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) {
return -1;
}
return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen,
npub, k);
}
int
_crypto_aead_aegis128l_pick_best_implementation(void)
{
implementation = &aegis128l_soft_implementation;
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
if (sodium_runtime_has_armcrypto()) {
implementation = &aegis128l_armcrypto_implementation;
return 0;
}
#endif
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) {
implementation = &aegis128l_aesni_implementation;
return 0;
}
#endif
return 0; /* LCOV_EXCL_LINE */
}

View File

@@ -0,0 +1,70 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis128l.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
#include "aegis128l_aesni.h"
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC target("aes,avx")
#endif
#include "private/sse2_64_32.h"
#include <immintrin.h>
#include <wmmintrin.h>
#define AES_BLOCK_LENGTH 16
typedef __m128i aes_block_t;
#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B))
#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B))
#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A))
#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B))
#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B))
#define AES_ENC(A, B) _mm_aesenc_si128((A), (B))
static inline void
aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2)
{
aes_block_t tmp;
tmp = state[7];
state[7] = AES_ENC(state[6], state[7]);
state[6] = AES_ENC(state[5], state[6]);
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_ENC(state[3], state[4]);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_ENC(tmp, state[0]);
state[0] = AES_BLOCK_XOR(state[0], d1);
state[4] = AES_BLOCK_XOR(state[4], d2);
}
#include "aegis128l_common.h"
struct aegis128l_implementation aegis128l_aesni_implementation = { SODIUM_C99(.encrypt_detached =)
encrypt_detached,
SODIUM_C99(.decrypt_detached =)
decrypt_detached };
#ifdef __clang__
#pragma clang attribute pop
#endif
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis128l_aesni_H
#define aegis128l_aesni_H
#include "implementations.h"
extern struct aegis128l_implementation aegis128l_aesni_implementation;
#endif

View File

@@ -0,0 +1,72 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis128l.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
#include "aegis128l_armcrypto.h"
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("neon,crypto,aes"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC target("+simd+crypto")
#endif
#ifndef __ARM_FEATURE_CRYPTO
#define __ARM_FEATURE_CRYPTO 1
#endif
#ifndef __ARM_FEATURE_AES
#define __ARM_FEATURE_AES 1
#endif
#include <arm_neon.h>
#define AES_BLOCK_LENGTH 16
typedef uint8x16_t aes_block_t;
#define AES_BLOCK_XOR(A, B) veorq_u8((A), (B))
#define AES_BLOCK_AND(A, B) vandq_u8((A), (B))
#define AES_BLOCK_LOAD(A) vld1q_u8(A)
#define AES_BLOCK_LOAD_64x2(A, B) vreinterpretq_u8_u64(vsetq_lane_u64((A), vmovq_n_u64(B), 1))
#define AES_BLOCK_STORE(A, B) vst1q_u8((A), (B))
#define AES_ENC(A, B) veorq_u8(vaesmcq_u8(vaeseq_u8((A), vmovq_n_u8(0))), (B))
static inline void
aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2)
{
aes_block_t tmp;
tmp = state[7];
state[7] = AES_ENC(state[6], state[7]);
state[6] = AES_ENC(state[5], state[6]);
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_BLOCK_XOR(AES_ENC(state[3], state[4]), d2);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d1);
}
#include "aegis128l_common.h"
struct aegis128l_implementation aegis128l_armcrypto_implementation = {
SODIUM_C99(.encrypt_detached =) encrypt_detached,
SODIUM_C99(.decrypt_detached =) decrypt_detached
};
#ifdef __clang__
#pragma clang attribute pop
#endif
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis128l_armcrypto_H
#define aegis128l_armcrypto_H
#include "implementations.h"
extern struct aegis128l_implementation aegis128l_armcrypto_implementation;
#endif

View File

@@ -0,0 +1,229 @@
#define RATE 32
static void
aegis128l_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state)
{
static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 };
static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd };
const aes_block_t c0 = AES_BLOCK_LOAD(c0_);
const aes_block_t c1 = AES_BLOCK_LOAD(c1_);
aes_block_t k;
aes_block_t n;
int i;
k = AES_BLOCK_LOAD(key);
n = AES_BLOCK_LOAD(nonce);
state[0] = AES_BLOCK_XOR(k, n);
state[1] = c1;
state[2] = c0;
state[3] = c1;
state[4] = AES_BLOCK_XOR(k, n);
state[5] = AES_BLOCK_XOR(k, c0);
state[6] = AES_BLOCK_XOR(k, c1);
state[7] = AES_BLOCK_XOR(k, c0);
for (i = 0; i < 10; i++) {
aegis128l_update(state, n, k);
}
}
static void
aegis128l_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state)
{
aes_block_t tmp;
int i;
tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3);
tmp = AES_BLOCK_XOR(tmp, state[2]);
for (i = 0; i < 7; i++) {
aegis128l_update(state, tmp, tmp);
}
if (maclen == 16) {
tmp = AES_BLOCK_XOR(state[6], AES_BLOCK_XOR(state[5], state[4]));
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2]));
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
AES_BLOCK_STORE(mac, tmp);
} else if (maclen == 32) {
tmp = AES_BLOCK_XOR(state[3], state[2]);
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
AES_BLOCK_STORE(mac, tmp);
tmp = AES_BLOCK_XOR(state[7], state[6]);
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[5], state[4]));
AES_BLOCK_STORE(mac + 16, tmp);
} else {
memset(mac, 0, maclen);
}
}
static inline void
aegis128l_absorb(const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg0, msg1;
msg0 = AES_BLOCK_LOAD(src);
msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
aegis128l_update(state, msg0, msg1);
}
static void
aegis128l_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg0, msg1;
aes_block_t tmp0, tmp1;
msg0 = AES_BLOCK_LOAD(src);
msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
tmp0 = AES_BLOCK_XOR(msg0, state[6]);
tmp0 = AES_BLOCK_XOR(tmp0, state[1]);
tmp1 = AES_BLOCK_XOR(msg1, state[5]);
tmp1 = AES_BLOCK_XOR(tmp1, state[2]);
tmp0 = AES_BLOCK_XOR(tmp0, AES_BLOCK_AND(state[2], state[3]));
tmp1 = AES_BLOCK_XOR(tmp1, AES_BLOCK_AND(state[6], state[7]));
AES_BLOCK_STORE(dst, tmp0);
AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, tmp1);
aegis128l_update(state, msg0, msg1);
}
static void
aegis128l_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg0, msg1;
msg0 = AES_BLOCK_LOAD(src);
msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
msg0 = AES_BLOCK_XOR(msg0, state[6]);
msg0 = AES_BLOCK_XOR(msg0, state[1]);
msg1 = AES_BLOCK_XOR(msg1, state[5]);
msg1 = AES_BLOCK_XOR(msg1, state[2]);
msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3]));
msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7]));
AES_BLOCK_STORE(dst, msg0);
AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, msg1);
aegis128l_update(state, msg0, msg1);
}
static void
aegis128l_declast(uint8_t *const dst, const uint8_t *const src, size_t len,
aes_block_t *const state)
{
uint8_t pad[RATE];
aes_block_t msg0, msg1;
memset(pad, 0, sizeof pad);
memcpy(pad, src, len);
msg0 = AES_BLOCK_LOAD(pad);
msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH);
msg0 = AES_BLOCK_XOR(msg0, state[6]);
msg0 = AES_BLOCK_XOR(msg0, state[1]);
msg1 = AES_BLOCK_XOR(msg1, state[5]);
msg1 = AES_BLOCK_XOR(msg1, state[2]);
msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3]));
msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7]));
AES_BLOCK_STORE(pad, msg0);
AES_BLOCK_STORE(pad + AES_BLOCK_LENGTH, msg1);
memset(pad + len, 0, sizeof pad - len);
memcpy(dst, pad, len);
msg0 = AES_BLOCK_LOAD(pad);
msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH);
aegis128l_update(state, msg0, msg1);
}
static int
encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
{
aes_block_t state[8];
CRYPTO_ALIGN(RATE) uint8_t src[RATE];
CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
size_t i;
aegis128l_init(k, npub, state);
for (i = 0; i + RATE <= adlen; i += RATE) {
aegis128l_absorb(ad + i, state);
}
if (adlen % RATE) {
memset(src, 0, RATE);
memcpy(src, ad + i, adlen % RATE);
aegis128l_absorb(src, state);
}
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis128l_enc(c + i, m + i, state);
}
if (mlen % RATE) {
memset(src, 0, RATE);
memcpy(src, m + i, mlen % RATE);
aegis128l_enc(dst, src, state);
memcpy(c + i, dst, mlen % RATE);
}
aegis128l_mac(mac, maclen, adlen, mlen, state);
return 0;
}
static int
decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
{
aes_block_t state[8];
CRYPTO_ALIGN(RATE) uint8_t src[RATE];
CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
CRYPTO_ALIGN(16) uint8_t computed_mac[32];
const size_t mlen = clen;
size_t i;
int ret;
aegis128l_init(k, npub, state);
for (i = 0; i + RATE <= adlen; i += RATE) {
aegis128l_absorb(ad + i, state);
}
if (adlen % RATE) {
memset(src, 0, RATE);
memcpy(src, ad + i, adlen % RATE);
aegis128l_absorb(src, state);
}
if (m != NULL) {
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis128l_dec(m + i, c + i, state);
}
} else {
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis128l_dec(dst, c + i, state);
}
}
if (mlen % RATE) {
if (m != NULL) {
aegis128l_declast(m + i, c + i, mlen % RATE, state);
} else {
aegis128l_declast(dst, c + i, mlen % RATE, state);
}
}
COMPILER_ASSERT(sizeof computed_mac >= 32);
aegis128l_mac(computed_mac, maclen, adlen, mlen, state);
ret = -1;
if (maclen == 16) {
ret = crypto_verify_16(computed_mac, mac);
} else if (maclen == 32) {
ret = crypto_verify_32(computed_mac, mac);
}
if (ret != 0 && m != NULL) {
memset(m, 0, mlen);
}
return ret;
}

View File

@@ -0,0 +1,59 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis128l.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#include "crypto_aead_aegis128l.h"
#include "private/softaes.h"
#if 1
#include "aegis128l_soft.h"
#define AES_BLOCK_LENGTH 16
typedef SoftAesBlock aes_block_t;
#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B))
#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B))
#define AES_BLOCK_LOAD(A) softaes_block_load(A)
#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B))
#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B))
#define AES_ENC(A, B) softaes_block_encrypt((A), (B))
static inline void
aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2)
{
aes_block_t tmp;
tmp = state[7];
state[7] = AES_ENC(state[6], state[7]);
state[6] = AES_ENC(state[5], state[6]);
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_ENC(state[3], state[4]);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_ENC(tmp, state[0]);
state[0] = AES_BLOCK_XOR(state[0], d1);
state[4] = AES_BLOCK_XOR(state[4], d2);
}
#include "aegis128l_common.h"
struct aegis128l_implementation aegis128l_soft_implementation = { SODIUM_C99(.encrypt_detached =)
encrypt_detached,
SODIUM_C99(.decrypt_detached =)
decrypt_detached };
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis128l_soft_H
#define aegis128l_soft_H
#include "implementations.h"
extern struct aegis128l_implementation aegis128l_soft_implementation;
#endif

View File

@@ -0,0 +1,17 @@
#ifndef aegis128l_implementations_H
#define aegis128l_implementations_H
#include <stddef.h>
#include <stdint.h>
#include "crypto_aead_aegis128l.h"
typedef struct aegis128l_implementation {
int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k);
int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac,
size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub,
const uint8_t *k);
} aegis128l_implementation;
#endif

View File

@@ -0,0 +1,158 @@
#include <errno.h>
#include <stdlib.h>
#include "core.h"
#include "crypto_aead_aegis256.h"
#include "private/common.h"
#include "private/implementations.h"
#include "randombytes.h"
#include "runtime.h"
#include "aegis256_soft.h"
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
#include "aegis256_armcrypto.h"
#endif
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
#include "aegis256_aesni.h"
#endif
static const aegis256_implementation *implementation = &aegis256_soft_implementation;
size_t
crypto_aead_aegis256_keybytes(void)
{
return crypto_aead_aegis256_KEYBYTES;
}
size_t
crypto_aead_aegis256_nsecbytes(void)
{
return crypto_aead_aegis256_NSECBYTES;
}
size_t
crypto_aead_aegis256_npubbytes(void)
{
return crypto_aead_aegis256_NPUBBYTES;
}
size_t
crypto_aead_aegis256_abytes(void)
{
return crypto_aead_aegis256_ABYTES;
}
size_t
crypto_aead_aegis256_messagebytes_max(void)
{
return crypto_aead_aegis256_MESSAGEBYTES_MAX;
}
void
crypto_aead_aegis256_keygen(unsigned char k[crypto_aead_aegis256_KEYBYTES])
{
randombytes_buf(k, crypto_aead_aegis256_KEYBYTES);
}
int
crypto_aead_aegis256_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
unsigned long long clen = 0ULL;
int ret;
ret =
crypto_aead_aegis256_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k);
if (clen_p != NULL) {
if (ret == 0) {
clen = mlen + crypto_aead_aegis256_ABYTES;
}
*clen_p = clen;
}
return ret;
}
int
crypto_aead_aegis256_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
{
unsigned long long mlen = 0ULL;
int ret = -1;
if (clen >= crypto_aead_aegis256_ABYTES) {
ret = crypto_aead_aegis256_decrypt_detached(m, nsec, c, clen - crypto_aead_aegis256_ABYTES,
c + clen - crypto_aead_aegis256_ABYTES, ad,
adlen, npub, k);
}
if (mlen_p != NULL) {
if (ret == 0) {
mlen = clen - crypto_aead_aegis256_ABYTES;
}
*mlen_p = mlen;
}
return ret;
}
int
crypto_aead_aegis256_encrypt_detached(unsigned char *c, unsigned char *mac,
unsigned long long *maclen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
const size_t maclen = crypto_aead_aegis256_ABYTES;
if (maclen_p != NULL) {
*maclen_p = maclen;
}
if (mlen > crypto_aead_aegis256_MESSAGEBYTES_MAX ||
adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen,
npub, k);
}
int
crypto_aead_aegis256_decrypt_detached(unsigned char *m, unsigned char *nsec, const unsigned char *c,
unsigned long long clen, const unsigned char *mac,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
{
const size_t maclen = crypto_aead_aegis256_ABYTES;
if (clen > crypto_aead_aegis256_MESSAGEBYTES_MAX ||
adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) {
return -1;
}
return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen,
npub, k);
}
int
_crypto_aead_aegis256_pick_best_implementation(void)
{
implementation = &aegis256_soft_implementation;
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
if (sodium_runtime_has_armcrypto()) {
implementation = &aegis256_armcrypto_implementation;
return 0;
}
#endif
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) {
implementation = &aegis256_aesni_implementation;
return 0;
}
#endif
return 0; /* LCOV_EXCL_LINE */
}

View File

@@ -0,0 +1,65 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis256.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
#include "aegis256_aesni.h"
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC target("aes,avx")
#endif
#include "private/sse2_64_32.h"
#include <immintrin.h>
#include <wmmintrin.h>
#define AES_BLOCK_LENGTH 16
typedef __m128i aes_block_t;
#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B))
#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B))
#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A))
#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B))
#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B))
#define AES_ENC(A, B) _mm_aesenc_si128((A), (B))
static inline void
aegis256_update(aes_block_t *const state, const aes_block_t d)
{
aes_block_t tmp;
tmp = state[5];
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_ENC(state[3], state[4]);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d);
}
#include "aegis256_common.h"
struct aegis256_implementation aegis256_aesni_implementation = { SODIUM_C99(.encrypt_detached =)
encrypt_detached,
SODIUM_C99(.decrypt_detached =)
decrypt_detached };
#ifdef __clang__
#pragma clang attribute pop
#endif
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis256_aesni_H
#define aegis256_aesni_H
#include "implementations.h"
extern struct aegis256_implementation aegis256_aesni_implementation;
#endif

View File

@@ -0,0 +1,70 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis256.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
#include "aegis256_armcrypto.h"
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("neon,crypto,aes"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC target("+simd+crypto")
#endif
#ifndef __ARM_FEATURE_CRYPTO
#define __ARM_FEATURE_CRYPTO 1
#endif
#ifndef __ARM_FEATURE_AES
#define __ARM_FEATURE_AES 1
#endif
#include <arm_neon.h>
#define AES_BLOCK_LENGTH 16
typedef uint8x16_t aes_block_t;
#define AES_BLOCK_XOR(A, B) veorq_u8((A), (B))
#define AES_BLOCK_AND(A, B) vandq_u8((A), (B))
#define AES_BLOCK_LOAD(A) vld1q_u8(A)
#define AES_BLOCK_LOAD_64x2(A, B) vreinterpretq_u8_u64(vsetq_lane_u64((A), vmovq_n_u64(B), 1))
#define AES_BLOCK_STORE(A, B) vst1q_u8((A), (B))
#define AES_ENC(A, B) veorq_u8(vaesmcq_u8(vaeseq_u8((A), vmovq_n_u8(0))), (B))
static inline void
aegis256_update(aes_block_t *const state, const aes_block_t d)
{
aes_block_t tmp;
tmp = state[5];
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_ENC(state[3], state[4]);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d);
}
#include "aegis256_common.h"
struct aegis256_implementation aegis256_armcrypto_implementation = { SODIUM_C99(.encrypt_detached =)
encrypt_detached,
SODIUM_C99(.decrypt_detached =)
decrypt_detached };
#ifdef __clang__
#pragma clang attribute pop
#endif
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis256_armcrypto_H
#define aegis256_armcrypto_H
#include "implementations.h"
extern struct aegis256_implementation aegis256_armcrypto_implementation;
#endif

View File

@@ -0,0 +1,214 @@
#define RATE 16
static void
aegis256_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state)
{
static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 };
static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd };
const aes_block_t c0 = AES_BLOCK_LOAD(c0_);
const aes_block_t c1 = AES_BLOCK_LOAD(c1_);
const aes_block_t k0 = AES_BLOCK_LOAD(key);
const aes_block_t k1 = AES_BLOCK_LOAD(key + AES_BLOCK_LENGTH);
const aes_block_t n0 = AES_BLOCK_LOAD(nonce);
const aes_block_t n1 = AES_BLOCK_LOAD(nonce + AES_BLOCK_LENGTH);
const aes_block_t k0_n0 = AES_BLOCK_XOR(k0, n0);
const aes_block_t k1_n1 = AES_BLOCK_XOR(k1, n1);
int i;
state[0] = k0_n0;
state[1] = k1_n1;
state[2] = c1;
state[3] = c0;
state[4] = AES_BLOCK_XOR(k0, c0);
state[5] = AES_BLOCK_XOR(k1, c1);
for (i = 0; i < 4; i++) {
aegis256_update(state, k0);
aegis256_update(state, k1);
aegis256_update(state, k0_n0);
aegis256_update(state, k1_n1);
}
}
static void
aegis256_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state)
{
aes_block_t tmp;
int i;
tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3);
tmp = AES_BLOCK_XOR(tmp, state[3]);
for (i = 0; i < 7; i++) {
aegis256_update(state, tmp);
}
if (maclen == 16) {
tmp = AES_BLOCK_XOR(state[5], state[4]);
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2]));
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
AES_BLOCK_STORE(mac, tmp);
} else if (maclen == 32) {
tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[2], state[1]), state[0]);
AES_BLOCK_STORE(mac, tmp);
tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[5], state[4]), state[3]);
AES_BLOCK_STORE(mac + 16, tmp);
} else {
memset(mac, 0, maclen);
}
}
static inline void
aegis256_absorb(const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg;
msg = AES_BLOCK_LOAD(src);
aegis256_update(state, msg);
}
static void
aegis256_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg;
aes_block_t tmp;
msg = AES_BLOCK_LOAD(src);
tmp = AES_BLOCK_XOR(msg, state[5]);
tmp = AES_BLOCK_XOR(tmp, state[4]);
tmp = AES_BLOCK_XOR(tmp, state[1]);
tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_AND(state[2], state[3]));
AES_BLOCK_STORE(dst, tmp);
aegis256_update(state, msg);
}
static void
aegis256_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
{
aes_block_t msg;
msg = AES_BLOCK_LOAD(src);
msg = AES_BLOCK_XOR(msg, state[5]);
msg = AES_BLOCK_XOR(msg, state[4]);
msg = AES_BLOCK_XOR(msg, state[1]);
msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3]));
AES_BLOCK_STORE(dst, msg);
aegis256_update(state, msg);
}
static void
aegis256_declast(uint8_t *const dst, const uint8_t *const src, size_t len, aes_block_t *const state)
{
uint8_t pad[RATE];
aes_block_t msg;
memset(pad, 0, sizeof pad);
memcpy(pad, src, len);
msg = AES_BLOCK_LOAD(pad);
msg = AES_BLOCK_XOR(msg, state[5]);
msg = AES_BLOCK_XOR(msg, state[4]);
msg = AES_BLOCK_XOR(msg, state[1]);
msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3]));
AES_BLOCK_STORE(pad, msg);
memset(pad + len, 0, sizeof pad - len);
memcpy(dst, pad, len);
msg = AES_BLOCK_LOAD(pad);
aegis256_update(state, msg);
}
static int
encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
{
aes_block_t state[6];
CRYPTO_ALIGN(RATE) uint8_t src[RATE];
CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
size_t i;
aegis256_init(k, npub, state);
for (i = 0; i + RATE <= adlen; i += RATE) {
aegis256_absorb(ad + i, state);
}
if (adlen % RATE) {
memset(src, 0, RATE);
memcpy(src, ad + i, adlen % RATE);
aegis256_absorb(src, state);
}
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis256_enc(c + i, m + i, state);
}
if (mlen % RATE) {
memset(src, 0, RATE);
memcpy(src, m + i, mlen % RATE);
aegis256_enc(dst, src, state);
memcpy(c + i, dst, mlen % RATE);
}
aegis256_mac(mac, maclen, adlen, mlen, state);
return 0;
}
static int
decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
{
aes_block_t state[6];
CRYPTO_ALIGN(RATE) uint8_t src[RATE];
CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
CRYPTO_ALIGN(16) uint8_t computed_mac[32];
const size_t mlen = clen;
size_t i;
int ret;
aegis256_init(k, npub, state);
for (i = 0; i + RATE <= adlen; i += RATE) {
aegis256_absorb(ad + i, state);
}
if (adlen % RATE) {
memset(src, 0, RATE);
memcpy(src, ad + i, adlen % RATE);
aegis256_absorb(src, state);
}
if (m != NULL) {
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis256_dec(m + i, c + i, state);
}
} else {
for (i = 0; i + RATE <= mlen; i += RATE) {
aegis256_dec(dst, c + i, state);
}
}
if (mlen % RATE) {
if (m != NULL) {
aegis256_declast(m + i, c + i, mlen % RATE, state);
} else {
aegis256_declast(dst, c + i, mlen % RATE, state);
}
}
COMPILER_ASSERT(sizeof computed_mac >= 32);
aegis256_mac(computed_mac, maclen, adlen, mlen, state);
ret = -1;
if (maclen == 16) {
ret = crypto_verify_16(computed_mac, mac);
} else if (maclen == 32) {
ret = crypto_verify_32(computed_mac, mac);
}
if (ret != 0 && m != NULL) {
memset(m, 0, mlen);
}
return ret;
}

View File

@@ -0,0 +1,54 @@
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aegis256.h"
#include "crypto_verify_16.h"
#include "crypto_verify_32.h"
#include "export.h"
#include "utils.h"
#include "private/common.h"
#include "crypto_aead_aegis256.h"
#include "private/softaes.h"
#if 1
#include "aegis256_soft.h"
#define AES_BLOCK_LENGTH 16
typedef SoftAesBlock aes_block_t;
#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B))
#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B))
#define AES_BLOCK_LOAD(A) softaes_block_load(A)
#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B))
#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B))
#define AES_ENC(A, B) softaes_block_encrypt((A), (B))
static inline void
aegis256_update(aes_block_t *const state, const aes_block_t d)
{
aes_block_t tmp;
tmp = state[5];
state[5] = AES_ENC(state[4], state[5]);
state[4] = AES_ENC(state[3], state[4]);
state[3] = AES_ENC(state[2], state[3]);
state[2] = AES_ENC(state[1], state[2]);
state[1] = AES_ENC(state[0], state[1]);
state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d);
}
#include "aegis256_common.h"
struct aegis256_implementation aegis256_soft_implementation = { SODIUM_C99(.encrypt_detached =)
encrypt_detached,
SODIUM_C99(.decrypt_detached =)
decrypt_detached };
#endif

View File

@@ -0,0 +1,8 @@
#ifndef aegis256_soft_H
#define aegis256_soft_H
#include "implementations.h"
extern struct aegis256_implementation aegis256_soft_implementation;
#endif

View File

@@ -0,0 +1,17 @@
#ifndef aegis256_implementations_H
#define aegis256_implementations_H
#include <stddef.h>
#include <stdint.h>
#include "crypto_aead_aegis256.h"
typedef struct aegis256_implementation {
int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k);
int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac,
size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub,
const uint8_t *k);
} aegis256_implementation;
#endif

View File

@@ -0,0 +1,157 @@
#include <errno.h>
#include <stdlib.h>
#include "crypto_aead_aes256gcm.h"
#include "private/common.h"
#include "randombytes.h"
size_t
crypto_aead_aes256gcm_keybytes(void)
{
return crypto_aead_aes256gcm_KEYBYTES;
}
size_t
crypto_aead_aes256gcm_nsecbytes(void)
{
return crypto_aead_aes256gcm_NSECBYTES;
}
size_t
crypto_aead_aes256gcm_npubbytes(void)
{
return crypto_aead_aes256gcm_NPUBBYTES;
}
size_t
crypto_aead_aes256gcm_abytes(void)
{
return crypto_aead_aes256gcm_ABYTES;
}
size_t
crypto_aead_aes256gcm_statebytes(void)
{
return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U;
}
size_t
crypto_aead_aes256gcm_messagebytes_max(void)
{
return crypto_aead_aes256gcm_MESSAGEBYTES_MAX;
}
void
crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES])
{
randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES);
}
#if !((defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN)) || \
(defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H)))
#ifndef ENOSYS
#define ENOSYS ENXIO
#endif
int
crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac,
unsigned long long *maclen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub, const unsigned char *k)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *mac, const unsigned char *ad,
unsigned long long adlen, const unsigned char *npub,
const unsigned char *k)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac,
unsigned long long *maclen_p, const unsigned char *m,
unsigned long long mlen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *nsec,
const unsigned char *npub,
const crypto_aead_aes256gcm_state *st_)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const crypto_aead_aes256gcm_state *st_)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *mac, const unsigned char *ad,
unsigned long long adlen, const unsigned char *npub,
const crypto_aead_aes256gcm_state *st_)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
unsigned char *nsec, const unsigned char *c,
unsigned long long clen, const unsigned char *ad,
unsigned long long adlen, const unsigned char *npub,
const crypto_aead_aes256gcm_state *st_)
{
errno = ENOSYS;
return -1;
}
int
crypto_aead_aes256gcm_is_available(void)
{
return 0;
}
#endif

View File

@@ -0,0 +1,400 @@
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_chacha20poly1305.h"
#include "crypto_onetimeauth_poly1305.h"
#include "crypto_stream_chacha20.h"
#include "crypto_verify_16.h"
#include "randombytes.h"
#include "utils.h"
#include "private/chacha20_ietf_ext.h"
#include "private/common.h"
static const unsigned char _pad0[16] = { 0 };
int
crypto_aead_chacha20poly1305_encrypt_detached(unsigned char *c,
unsigned char *mac,
unsigned long long *maclen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
(void) nsec;
crypto_stream_chacha20(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_stream_chacha20_xor_ic(c, m, mlen, npub, 1U, k);
crypto_onetimeauth_poly1305_update(&state, c, mlen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, mac);
sodium_memzero(&state, sizeof state);
if (maclen_p != NULL) {
*maclen_p = crypto_aead_chacha20poly1305_ABYTES;
}
return 0;
}
int
crypto_aead_chacha20poly1305_encrypt(unsigned char *c,
unsigned long long *clen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long clen = 0ULL;
int ret;
if (mlen > crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX) {
sodium_misuse();
}
ret = crypto_aead_chacha20poly1305_encrypt_detached(c,
c + mlen, NULL,
m, mlen,
ad, adlen,
nsec, npub, k);
if (clen_p != NULL) {
if (ret == 0) {
clen = mlen + crypto_aead_chacha20poly1305_ABYTES;
}
*clen_p = clen;
}
return ret;
}
int
crypto_aead_chacha20poly1305_ietf_encrypt_detached(unsigned char *c,
unsigned char *mac,
unsigned long long *maclen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
(void) nsec;
crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
crypto_stream_chacha20_ietf_xor_ic(c, m, mlen, npub, 1U, k);
crypto_onetimeauth_poly1305_update(&state, c, mlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, mac);
sodium_memzero(&state, sizeof state);
if (maclen_p != NULL) {
*maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
}
return 0;
}
int
crypto_aead_chacha20poly1305_ietf_encrypt(unsigned char *c,
unsigned long long *clen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long clen = 0ULL;
int ret;
if (mlen > crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX) {
sodium_misuse();
}
ret = crypto_aead_chacha20poly1305_ietf_encrypt_detached(c,
c + mlen, NULL,
m, mlen,
ad, adlen,
nsec, npub, k);
if (clen_p != NULL) {
if (ret == 0) {
clen = mlen + crypto_aead_chacha20poly1305_ietf_ABYTES;
}
*clen_p = clen;
}
return ret;
}
int
crypto_aead_chacha20poly1305_decrypt_detached(unsigned char *m,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *mac,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
unsigned char computed_mac[crypto_aead_chacha20poly1305_ABYTES];
unsigned long long mlen;
int ret;
(void) nsec;
crypto_stream_chacha20(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
mlen = clen;
crypto_onetimeauth_poly1305_update(&state, c, mlen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, computed_mac);
sodium_memzero(&state, sizeof state);
COMPILER_ASSERT(sizeof computed_mac == 16U);
ret = crypto_verify_16(computed_mac, mac);
sodium_memzero(computed_mac, sizeof computed_mac);
if (m == NULL) {
return ret;
}
if (ret != 0) {
memset(m, 0, mlen);
return -1;
}
crypto_stream_chacha20_xor_ic(m, c, mlen, npub, 1U, k);
return 0;
}
int
crypto_aead_chacha20poly1305_decrypt(unsigned char *m,
unsigned long long *mlen_p,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long mlen = 0ULL;
int ret = -1;
if (clen >= crypto_aead_chacha20poly1305_ABYTES) {
ret = crypto_aead_chacha20poly1305_decrypt_detached
(m, nsec,
c, clen - crypto_aead_chacha20poly1305_ABYTES,
c + clen - crypto_aead_chacha20poly1305_ABYTES,
ad, adlen, npub, k);
}
if (mlen_p != NULL) {
if (ret == 0) {
mlen = clen - crypto_aead_chacha20poly1305_ABYTES;
}
*mlen_p = mlen;
}
return ret;
}
int
crypto_aead_chacha20poly1305_ietf_decrypt_detached(unsigned char *m,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *mac,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
unsigned long long mlen;
int ret;
(void) nsec;
crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
mlen = clen;
crypto_onetimeauth_poly1305_update(&state, c, mlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, computed_mac);
sodium_memzero(&state, sizeof state);
COMPILER_ASSERT(sizeof computed_mac == 16U);
ret = crypto_verify_16(computed_mac, mac);
sodium_memzero(computed_mac, sizeof computed_mac);
if (m == NULL) {
return ret;
}
if (ret != 0) {
memset(m, 0, mlen);
return -1;
}
crypto_stream_chacha20_ietf_xor_ic(m, c, mlen, npub, 1U, k);
return 0;
}
int
crypto_aead_chacha20poly1305_ietf_decrypt(unsigned char *m,
unsigned long long *mlen_p,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long mlen = 0ULL;
int ret = -1;
if (clen >= crypto_aead_chacha20poly1305_ietf_ABYTES) {
ret = crypto_aead_chacha20poly1305_ietf_decrypt_detached
(m, nsec,
c, clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
c + clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
ad, adlen, npub, k);
}
if (mlen_p != NULL) {
if (ret == 0) {
mlen = clen - crypto_aead_chacha20poly1305_ietf_ABYTES;
}
*mlen_p = mlen;
}
return ret;
}
size_t
crypto_aead_chacha20poly1305_ietf_keybytes(void)
{
return crypto_aead_chacha20poly1305_ietf_KEYBYTES;
}
size_t
crypto_aead_chacha20poly1305_ietf_npubbytes(void)
{
return crypto_aead_chacha20poly1305_ietf_NPUBBYTES;
}
size_t
crypto_aead_chacha20poly1305_ietf_nsecbytes(void)
{
return crypto_aead_chacha20poly1305_ietf_NSECBYTES;
}
size_t
crypto_aead_chacha20poly1305_ietf_abytes(void)
{
return crypto_aead_chacha20poly1305_ietf_ABYTES;
}
size_t
crypto_aead_chacha20poly1305_ietf_messagebytes_max(void)
{
return crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX;
}
void
crypto_aead_chacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_chacha20poly1305_ietf_KEYBYTES])
{
randombytes_buf(k, crypto_aead_chacha20poly1305_ietf_KEYBYTES);
}
size_t
crypto_aead_chacha20poly1305_keybytes(void)
{
return crypto_aead_chacha20poly1305_KEYBYTES;
}
size_t
crypto_aead_chacha20poly1305_npubbytes(void)
{
return crypto_aead_chacha20poly1305_NPUBBYTES;
}
size_t
crypto_aead_chacha20poly1305_nsecbytes(void)
{
return crypto_aead_chacha20poly1305_NSECBYTES;
}
size_t
crypto_aead_chacha20poly1305_abytes(void)
{
return crypto_aead_chacha20poly1305_ABYTES;
}
size_t
crypto_aead_chacha20poly1305_messagebytes_max(void)
{
return crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX;
}
void
crypto_aead_chacha20poly1305_keygen(unsigned char k[crypto_aead_chacha20poly1305_KEYBYTES])
{
randombytes_buf(k, crypto_aead_chacha20poly1305_KEYBYTES);
}

View File

@@ -0,0 +1,262 @@
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_chacha20poly1305.h"
#include "crypto_aead_xchacha20poly1305.h"
#include "crypto_core_hchacha20.h"
#include "crypto_onetimeauth_poly1305.h"
#include "crypto_stream_chacha20.h"
#include "crypto_verify_16.h"
#include "randombytes.h"
#include "utils.h"
#include "private/chacha20_ietf_ext.h"
#include "private/common.h"
static const unsigned char _pad0[16] = { 0 };
static int
_encrypt_detached(unsigned char *c,
unsigned char *mac,
unsigned long long *maclen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
(void) nsec;
crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
crypto_stream_chacha20_ietf_ext_xor_ic(c, m, mlen, npub, 1U, k);
crypto_onetimeauth_poly1305_update(&state, c, mlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, mac);
sodium_memzero(&state, sizeof state);
if (maclen_p != NULL) {
*maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
}
return 0;
}
static int
_decrypt_detached(unsigned char *m,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *mac,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
crypto_onetimeauth_poly1305_state state;
unsigned char block0[64U];
unsigned char slen[8U];
unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
unsigned long long mlen;
int ret;
(void) nsec;
crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
crypto_onetimeauth_poly1305_init(&state, block0);
sodium_memzero(block0, sizeof block0);
crypto_onetimeauth_poly1305_update(&state, ad, adlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
mlen = clen;
crypto_onetimeauth_poly1305_update(&state, c, mlen);
crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
STORE64_LE(slen, (uint64_t) adlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
STORE64_LE(slen, (uint64_t) mlen);
crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
crypto_onetimeauth_poly1305_final(&state, computed_mac);
sodium_memzero(&state, sizeof state);
COMPILER_ASSERT(sizeof computed_mac == 16U);
ret = crypto_verify_16(computed_mac, mac);
sodium_memzero(computed_mac, sizeof computed_mac);
if (m == NULL) {
return ret;
}
if (ret != 0) {
memset(m, 0, mlen);
return -1;
}
crypto_stream_chacha20_ietf_ext_xor_ic(m, c, mlen, npub, 1U, k);
return 0;
}
int
crypto_aead_xchacha20poly1305_ietf_encrypt_detached(unsigned char *c,
unsigned char *mac,
unsigned long long *maclen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
int ret;
crypto_core_hchacha20(k2, npub, k, NULL);
memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
ret = _encrypt_detached(c, mac, maclen_p, m, mlen, ad, adlen,
nsec, npub2, k2);
sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
return ret;
}
int
crypto_aead_xchacha20poly1305_ietf_encrypt(unsigned char *c,
unsigned long long *clen_p,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long clen = 0ULL;
int ret;
if (mlen > crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX) {
sodium_misuse();
}
ret = crypto_aead_xchacha20poly1305_ietf_encrypt_detached
(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k);
if (clen_p != NULL) {
if (ret == 0) {
clen = mlen + crypto_aead_xchacha20poly1305_ietf_ABYTES;
}
*clen_p = clen;
}
return ret;
}
int
crypto_aead_xchacha20poly1305_ietf_decrypt_detached(unsigned char *m,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *mac,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
int ret;
crypto_core_hchacha20(k2, npub, k, NULL);
memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
ret = _decrypt_detached(m, nsec, c, clen, mac, ad, adlen, npub2, k2);
sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
return ret;
}
int
crypto_aead_xchacha20poly1305_ietf_decrypt(unsigned char *m,
unsigned long long *mlen_p,
unsigned char *nsec,
const unsigned char *c,
unsigned long long clen,
const unsigned char *ad,
unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
unsigned long long mlen = 0ULL;
int ret = -1;
if (clen >= crypto_aead_xchacha20poly1305_ietf_ABYTES) {
ret = crypto_aead_xchacha20poly1305_ietf_decrypt_detached
(m, nsec,
c, clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
c + clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
ad, adlen, npub, k);
}
if (mlen_p != NULL) {
if (ret == 0) {
mlen = clen - crypto_aead_xchacha20poly1305_ietf_ABYTES;
}
*mlen_p = mlen;
}
return ret;
}
size_t
crypto_aead_xchacha20poly1305_ietf_keybytes(void)
{
return crypto_aead_xchacha20poly1305_ietf_KEYBYTES;
}
size_t
crypto_aead_xchacha20poly1305_ietf_npubbytes(void)
{
return crypto_aead_xchacha20poly1305_ietf_NPUBBYTES;
}
size_t
crypto_aead_xchacha20poly1305_ietf_nsecbytes(void)
{
return crypto_aead_xchacha20poly1305_ietf_NSECBYTES;
}
size_t
crypto_aead_xchacha20poly1305_ietf_abytes(void)
{
return crypto_aead_xchacha20poly1305_ietf_ABYTES;
}
size_t
crypto_aead_xchacha20poly1305_ietf_messagebytes_max(void)
{
return crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX;
}
void
crypto_aead_xchacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_xchacha20poly1305_ietf_KEYBYTES])
{
randombytes_buf(k, crypto_aead_xchacha20poly1305_ietf_KEYBYTES);
}

View File

@@ -0,0 +1,41 @@
#include "crypto_auth.h"
#include "randombytes.h"
size_t
crypto_auth_bytes(void)
{
return crypto_auth_BYTES;
}
size_t
crypto_auth_keybytes(void)
{
return crypto_auth_KEYBYTES;
}
const char *
crypto_auth_primitive(void)
{
return crypto_auth_PRIMITIVE;
}
int
crypto_auth(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
return crypto_auth_hmacsha512256(out, in, inlen, k);
}
int
crypto_auth_verify(const unsigned char *h, const unsigned char *in,
unsigned long long inlen,const unsigned char *k)
{
return crypto_auth_hmacsha512256_verify(h, in, inlen, k);
}
void
crypto_auth_keygen(unsigned char k[crypto_auth_KEYBYTES])
{
randombytes_buf(k, crypto_auth_KEYBYTES);
}

View File

@@ -0,0 +1,118 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "crypto_auth_hmacsha256.h"
#include "crypto_hash_sha256.h"
#include "crypto_verify_32.h"
#include "randombytes.h"
#include "utils.h"
size_t
crypto_auth_hmacsha256_bytes(void)
{
return crypto_auth_hmacsha256_BYTES;
}
size_t
crypto_auth_hmacsha256_keybytes(void)
{
return crypto_auth_hmacsha256_KEYBYTES;
}
size_t
crypto_auth_hmacsha256_statebytes(void)
{
return sizeof(crypto_auth_hmacsha256_state);
}
void
crypto_auth_hmacsha256_keygen(unsigned char k[crypto_auth_hmacsha256_KEYBYTES])
{
randombytes_buf(k, crypto_auth_hmacsha256_KEYBYTES);
}
int
crypto_auth_hmacsha256_init(crypto_auth_hmacsha256_state *state,
const unsigned char *key, size_t keylen)
{
unsigned char pad[64];
unsigned char khash[32];
size_t i;
if (keylen > 64) {
crypto_hash_sha256_init(&state->ictx);
crypto_hash_sha256_update(&state->ictx, key, keylen);
crypto_hash_sha256_final(&state->ictx, khash);
key = khash;
keylen = 32;
}
crypto_hash_sha256_init(&state->ictx);
memset(pad, 0x36, 64);
for (i = 0; i < keylen; i++) {
pad[i] ^= key[i];
}
crypto_hash_sha256_update(&state->ictx, pad, 64);
crypto_hash_sha256_init(&state->octx);
memset(pad, 0x5c, 64);
for (i = 0; i < keylen; i++) {
pad[i] ^= key[i];
}
crypto_hash_sha256_update(&state->octx, pad, 64);
sodium_memzero((void *) pad, sizeof pad);
sodium_memzero((void *) khash, sizeof khash);
return 0;
}
int
crypto_auth_hmacsha256_update(crypto_auth_hmacsha256_state *state,
const unsigned char *in, unsigned long long inlen)
{
crypto_hash_sha256_update(&state->ictx, in, inlen);
return 0;
}
int
crypto_auth_hmacsha256_final(crypto_auth_hmacsha256_state *state,
unsigned char *out)
{
unsigned char ihash[32];
crypto_hash_sha256_final(&state->ictx, ihash);
crypto_hash_sha256_update(&state->octx, ihash, 32);
crypto_hash_sha256_final(&state->octx, out);
sodium_memzero((void *) ihash, sizeof ihash);
return 0;
}
int
crypto_auth_hmacsha256(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
crypto_auth_hmacsha256_state state;
crypto_auth_hmacsha256_init(&state, k, crypto_auth_hmacsha256_KEYBYTES);
crypto_auth_hmacsha256_update(&state, in, inlen);
crypto_auth_hmacsha256_final(&state, out);
return 0;
}
int
crypto_auth_hmacsha256_verify(const unsigned char *h, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
unsigned char correct[32];
crypto_auth_hmacsha256(correct, in, inlen, k);
return crypto_verify_32(h, correct) | (-(h == correct)) |
sodium_memcmp(correct, h, 32);
}

View File

@@ -0,0 +1,118 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "crypto_auth_hmacsha512.h"
#include "crypto_hash_sha512.h"
#include "crypto_verify_64.h"
#include "randombytes.h"
#include "utils.h"
size_t
crypto_auth_hmacsha512_bytes(void)
{
return crypto_auth_hmacsha512_BYTES;
}
size_t
crypto_auth_hmacsha512_keybytes(void)
{
return crypto_auth_hmacsha512_KEYBYTES;
}
size_t
crypto_auth_hmacsha512_statebytes(void)
{
return sizeof(crypto_auth_hmacsha512_state);
}
void
crypto_auth_hmacsha512_keygen(unsigned char k[crypto_auth_hmacsha512_KEYBYTES])
{
randombytes_buf(k, crypto_auth_hmacsha512_KEYBYTES);
}
int
crypto_auth_hmacsha512_init(crypto_auth_hmacsha512_state *state,
const unsigned char *key, size_t keylen)
{
unsigned char pad[128];
unsigned char khash[64];
size_t i;
if (keylen > 128) {
crypto_hash_sha512_init(&state->ictx);
crypto_hash_sha512_update(&state->ictx, key, keylen);
crypto_hash_sha512_final(&state->ictx, khash);
key = khash;
keylen = 64;
}
crypto_hash_sha512_init(&state->ictx);
memset(pad, 0x36, 128);
for (i = 0; i < keylen; i++) {
pad[i] ^= key[i];
}
crypto_hash_sha512_update(&state->ictx, pad, 128);
crypto_hash_sha512_init(&state->octx);
memset(pad, 0x5c, 128);
for (i = 0; i < keylen; i++) {
pad[i] ^= key[i];
}
crypto_hash_sha512_update(&state->octx, pad, 128);
sodium_memzero((void *) pad, sizeof pad);
sodium_memzero((void *) khash, sizeof khash);
return 0;
}
int
crypto_auth_hmacsha512_update(crypto_auth_hmacsha512_state *state,
const unsigned char *in, unsigned long long inlen)
{
crypto_hash_sha512_update(&state->ictx, in, inlen);
return 0;
}
int
crypto_auth_hmacsha512_final(crypto_auth_hmacsha512_state *state,
unsigned char *out)
{
unsigned char ihash[64];
crypto_hash_sha512_final(&state->ictx, ihash);
crypto_hash_sha512_update(&state->octx, ihash, 64);
crypto_hash_sha512_final(&state->octx, out);
sodium_memzero((void *) ihash, sizeof ihash);
return 0;
}
int
crypto_auth_hmacsha512(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
crypto_auth_hmacsha512_state state;
crypto_auth_hmacsha512_init(&state, k, crypto_auth_hmacsha512_KEYBYTES);
crypto_auth_hmacsha512_update(&state, in, inlen);
crypto_auth_hmacsha512_final(&state, out);
return 0;
}
int
crypto_auth_hmacsha512_verify(const unsigned char *h, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
unsigned char correct[64];
crypto_auth_hmacsha512(correct, in, inlen, k);
return crypto_verify_64(h, correct) | (-(h == correct)) |
sodium_memcmp(correct, h, 64);
}

View File

@@ -0,0 +1,93 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "crypto_auth_hmacsha512.h"
#include "crypto_auth_hmacsha512256.h"
#include "crypto_hash_sha512.h"
#include "crypto_verify_32.h"
#include "randombytes.h"
#include "utils.h"
size_t
crypto_auth_hmacsha512256_bytes(void)
{
return crypto_auth_hmacsha512256_BYTES;
}
size_t
crypto_auth_hmacsha512256_keybytes(void)
{
return crypto_auth_hmacsha512256_KEYBYTES;
}
size_t
crypto_auth_hmacsha512256_statebytes(void)
{
return sizeof(crypto_auth_hmacsha512256_state);
}
void
crypto_auth_hmacsha512256_keygen(
unsigned char k[crypto_auth_hmacsha512256_KEYBYTES])
{
randombytes_buf(k, crypto_auth_hmacsha512256_KEYBYTES);
}
int
crypto_auth_hmacsha512256_init(crypto_auth_hmacsha512256_state *state,
const unsigned char *key, size_t keylen)
{
return crypto_auth_hmacsha512_init((crypto_auth_hmacsha512_state *) state,
key, keylen);
}
int
crypto_auth_hmacsha512256_update(crypto_auth_hmacsha512256_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return crypto_auth_hmacsha512_update((crypto_auth_hmacsha512_state *) state,
in, inlen);
}
int
crypto_auth_hmacsha512256_final(crypto_auth_hmacsha512256_state *state,
unsigned char *out)
{
unsigned char out0[64];
crypto_auth_hmacsha512_final((crypto_auth_hmacsha512_state *) state, out0);
memcpy(out, out0, 32);
return 0;
}
int
crypto_auth_hmacsha512256(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
crypto_auth_hmacsha512256_state state;
crypto_auth_hmacsha512256_init(&state, k,
crypto_auth_hmacsha512256_KEYBYTES);
crypto_auth_hmacsha512256_update(&state, in, inlen);
crypto_auth_hmacsha512256_final(&state, out);
return 0;
}
int
crypto_auth_hmacsha512256_verify(const unsigned char *h,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k)
{
unsigned char correct[32];
crypto_auth_hmacsha512256(correct, in, inlen, k);
return crypto_verify_32(h, correct) | (-(h == correct)) |
sodium_memcmp(correct, h, 32);
}

View File

@@ -0,0 +1,114 @@
#include "crypto_box.h"
size_t
crypto_box_seedbytes(void)
{
return crypto_box_SEEDBYTES;
}
size_t
crypto_box_publickeybytes(void)
{
return crypto_box_PUBLICKEYBYTES;
}
size_t
crypto_box_secretkeybytes(void)
{
return crypto_box_SECRETKEYBYTES;
}
size_t
crypto_box_beforenmbytes(void)
{
return crypto_box_BEFORENMBYTES;
}
size_t
crypto_box_noncebytes(void)
{
return crypto_box_NONCEBYTES;
}
size_t
crypto_box_zerobytes(void)
{
return crypto_box_ZEROBYTES;
}
size_t
crypto_box_boxzerobytes(void)
{
return crypto_box_BOXZEROBYTES;
}
size_t
crypto_box_macbytes(void)
{
return crypto_box_MACBYTES;
}
size_t
crypto_box_messagebytes_max(void)
{
return crypto_box_MESSAGEBYTES_MAX;
}
const char *
crypto_box_primitive(void)
{
return crypto_box_PRIMITIVE;
}
int
crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed)
{
return crypto_box_curve25519xsalsa20poly1305_seed_keypair(pk, sk, seed);
}
int
crypto_box_keypair(unsigned char *pk, unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_keypair(pk, sk);
}
int
crypto_box_beforenm(unsigned char *k, const unsigned char *pk,
const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_beforenm(k, pk, sk);
}
int
crypto_box_afternm(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k)
{
return crypto_box_curve25519xsalsa20poly1305_afternm(c, m, mlen, n, k);
}
int
crypto_box_open_afternm(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
return crypto_box_curve25519xsalsa20poly1305_open_afternm(m, c, clen, n, k);
}
int
crypto_box(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305(c, m, mlen, n, pk, sk);
}
int
crypto_box_open(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
return crypto_box_curve25519xsalsa20poly1305_open(m, c, clen, n, pk, sk);
}

View File

@@ -0,0 +1,115 @@
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include "core.h"
#include "crypto_box.h"
#include "crypto_secretbox.h"
#include "private/common.h"
#include "utils.h"
int
crypto_box_detached_afternm(unsigned char *c, unsigned char *mac,
const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *k)
{
return crypto_secretbox_detached(c, mac, m, mlen, n, k);
}
int
crypto_box_detached(unsigned char *c, unsigned char *mac,
const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
{
unsigned char k[crypto_box_BEFORENMBYTES];
int ret;
COMPILER_ASSERT(crypto_box_BEFORENMBYTES >= crypto_secretbox_KEYBYTES);
if (crypto_box_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_detached_afternm(c, mac, m, mlen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
int
crypto_box_easy_afternm(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k)
{
if (mlen > crypto_box_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return crypto_box_detached_afternm(c + crypto_box_MACBYTES, c, m, mlen, n,
k);
}
int
crypto_box_easy(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
if (mlen > crypto_box_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return crypto_box_detached(c + crypto_box_MACBYTES, c, m, mlen, n,
pk, sk);
}
int
crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c,
const unsigned char *mac,
unsigned long long clen,
const unsigned char *n,
const unsigned char *k)
{
return crypto_secretbox_open_detached(m, c, mac, clen, n, k);
}
int
crypto_box_open_detached(unsigned char *m, const unsigned char *c,
const unsigned char *mac,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
unsigned char k[crypto_box_BEFORENMBYTES];
int ret;
if (crypto_box_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_open_detached_afternm(m, c, mac, clen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
int
crypto_box_open_easy_afternm(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *k)
{
if (clen < crypto_box_MACBYTES) {
return -1;
}
return crypto_box_open_detached_afternm(m, c + crypto_box_MACBYTES, c,
clen - crypto_box_MACBYTES,
n, k);
}
int
crypto_box_open_easy(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
{
if (clen < crypto_box_MACBYTES) {
return -1;
}
return crypto_box_open_detached(m, c + crypto_box_MACBYTES, c,
clen - crypto_box_MACBYTES,
n, pk, sk);
}

View File

@@ -0,0 +1,68 @@
#include <string.h>
#include "crypto_box.h"
#include "crypto_generichash.h"
#include "private/common.h"
#include "utils.h"
static int
_crypto_box_seal_nonce(unsigned char *nonce,
const unsigned char *pk1, const unsigned char *pk2)
{
crypto_generichash_state st;
crypto_generichash_init(&st, NULL, 0U, crypto_box_NONCEBYTES);
crypto_generichash_update(&st, pk1, crypto_box_PUBLICKEYBYTES);
crypto_generichash_update(&st, pk2, crypto_box_PUBLICKEYBYTES);
crypto_generichash_final(&st, nonce, crypto_box_NONCEBYTES);
return 0;
}
int
crypto_box_seal(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *pk)
{
unsigned char nonce[crypto_box_NONCEBYTES];
unsigned char epk[crypto_box_PUBLICKEYBYTES];
unsigned char esk[crypto_box_SECRETKEYBYTES];
int ret;
if (crypto_box_keypair(epk, esk) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
_crypto_box_seal_nonce(nonce, epk, pk);
ret = crypto_box_easy(c + crypto_box_PUBLICKEYBYTES, m, mlen,
nonce, pk, esk);
memcpy(c, epk, crypto_box_PUBLICKEYBYTES);
sodium_memzero(esk, sizeof esk);
sodium_memzero(epk, sizeof epk);
sodium_memzero(nonce, sizeof nonce);
return ret;
}
int
crypto_box_seal_open(unsigned char *m, const unsigned char *c,
unsigned long long clen,
const unsigned char *pk, const unsigned char *sk)
{
unsigned char nonce[crypto_box_NONCEBYTES];
if (clen < crypto_box_SEALBYTES) {
return -1;
}
_crypto_box_seal_nonce(nonce, c, pk);
COMPILER_ASSERT(crypto_box_PUBLICKEYBYTES < crypto_box_SEALBYTES);
return crypto_box_open_easy(m, c + crypto_box_PUBLICKEYBYTES,
clen - crypto_box_PUBLICKEYBYTES,
nonce, c, sk);
}
size_t
crypto_box_sealbytes(void)
{
return crypto_box_SEALBYTES;
}

View File

@@ -0,0 +1,204 @@
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_box_curve25519xchacha20poly1305.h"
#include "crypto_core_hchacha20.h"
#include "crypto_hash_sha512.h"
#include "crypto_scalarmult_curve25519.h"
#include "crypto_secretbox_xchacha20poly1305.h"
#include "private/common.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_box_curve25519xchacha20poly1305_seed_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char *seed)
{
unsigned char hash[64];
crypto_hash_sha512(hash, seed, 32);
memcpy(sk, hash, 32);
sodium_memzero(hash, sizeof hash);
return crypto_scalarmult_curve25519_base(pk, sk);
}
int
crypto_box_curve25519xchacha20poly1305_keypair(unsigned char *pk,
unsigned char *sk)
{
randombytes_buf(sk, 32);
return crypto_scalarmult_curve25519_base(pk, sk);
}
int
crypto_box_curve25519xchacha20poly1305_beforenm(unsigned char *k,
const unsigned char *pk,
const unsigned char *sk)
{
static const unsigned char zero[16] = { 0 };
unsigned char s[32];
if (crypto_scalarmult_curve25519(s, sk, pk) != 0) {
return -1;
}
return crypto_core_hchacha20(k, zero, s, NULL);
}
int
crypto_box_curve25519xchacha20poly1305_detached_afternm(
unsigned char *c, unsigned char *mac, const unsigned char *m,
unsigned long long mlen, const unsigned char *n, const unsigned char *k)
{
return crypto_secretbox_xchacha20poly1305_detached(c, mac, m, mlen, n, k);
}
int
crypto_box_curve25519xchacha20poly1305_detached(
unsigned char *c, unsigned char *mac, const unsigned char *m,
unsigned long long mlen, const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
{
unsigned char k[crypto_box_curve25519xchacha20poly1305_BEFORENMBYTES];
int ret;
COMPILER_ASSERT(crypto_box_curve25519xchacha20poly1305_BEFORENMBYTES >=
crypto_secretbox_xchacha20poly1305_KEYBYTES);
if (crypto_box_curve25519xchacha20poly1305_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_curve25519xchacha20poly1305_detached_afternm(c, mac, m,
mlen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
int
crypto_box_curve25519xchacha20poly1305_easy_afternm(unsigned char *c,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *k)
{
if (mlen > crypto_box_curve25519xchacha20poly1305_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return crypto_box_curve25519xchacha20poly1305_detached_afternm(
c + crypto_box_curve25519xchacha20poly1305_MACBYTES, c, m, mlen, n, k);
}
int
crypto_box_curve25519xchacha20poly1305_easy(
unsigned char *c, const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk, const unsigned char *sk)
{
if (mlen > crypto_box_curve25519xchacha20poly1305_MESSAGEBYTES_MAX) {
sodium_misuse();
}
return crypto_box_curve25519xchacha20poly1305_detached(
c + crypto_box_curve25519xchacha20poly1305_MACBYTES, c, m, mlen, n, pk,
sk);
}
int
crypto_box_curve25519xchacha20poly1305_open_detached_afternm(
unsigned char *m, const unsigned char *c, const unsigned char *mac,
unsigned long long clen, const unsigned char *n, const unsigned char *k)
{
return crypto_secretbox_xchacha20poly1305_open_detached(m, c, mac, clen, n,
k);
}
int
crypto_box_curve25519xchacha20poly1305_open_detached(
unsigned char *m, const unsigned char *c, const unsigned char *mac,
unsigned long long clen, const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
{
unsigned char k[crypto_box_curve25519xchacha20poly1305_BEFORENMBYTES];
int ret;
if (crypto_box_curve25519xchacha20poly1305_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_curve25519xchacha20poly1305_open_detached_afternm(
m, c, mac, clen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
int
crypto_box_curve25519xchacha20poly1305_open_easy_afternm(
unsigned char *m, const unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k)
{
if (clen < crypto_box_curve25519xchacha20poly1305_MACBYTES) {
return -1;
}
return crypto_box_curve25519xchacha20poly1305_open_detached_afternm(
m, c + crypto_box_curve25519xchacha20poly1305_MACBYTES, c,
clen - crypto_box_curve25519xchacha20poly1305_MACBYTES, n, k);
}
int
crypto_box_curve25519xchacha20poly1305_open_easy(
unsigned char *m, const unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *pk, const unsigned char *sk)
{
if (clen < crypto_box_curve25519xchacha20poly1305_MACBYTES) {
return -1;
}
return crypto_box_curve25519xchacha20poly1305_open_detached(
m, c + crypto_box_curve25519xchacha20poly1305_MACBYTES, c,
clen - crypto_box_curve25519xchacha20poly1305_MACBYTES, n, pk, sk);
}
size_t
crypto_box_curve25519xchacha20poly1305_seedbytes(void)
{
return crypto_box_curve25519xchacha20poly1305_SEEDBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_publickeybytes(void)
{
return crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_secretkeybytes(void)
{
return crypto_box_curve25519xchacha20poly1305_SECRETKEYBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_beforenmbytes(void)
{
return crypto_box_curve25519xchacha20poly1305_BEFORENMBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_noncebytes(void)
{
return crypto_box_curve25519xchacha20poly1305_NONCEBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_macbytes(void)
{
return crypto_box_curve25519xchacha20poly1305_MACBYTES;
}
size_t
crypto_box_curve25519xchacha20poly1305_messagebytes_max(void)
{
return crypto_box_curve25519xchacha20poly1305_MESSAGEBYTES_MAX;
}

View File

@@ -0,0 +1,79 @@
#include <string.h>
#include "crypto_box_curve25519xchacha20poly1305.h"
#include "crypto_generichash.h"
#include "private/common.h"
#include "utils.h"
static int
_crypto_box_curve25519xchacha20poly1305_seal_nonce(unsigned char *nonce,
const unsigned char *pk1,
const unsigned char *pk2)
{
crypto_generichash_state st;
crypto_generichash_init(&st, NULL, 0U,
crypto_box_curve25519xchacha20poly1305_NONCEBYTES);
crypto_generichash_update(&st, pk1,
crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES);
crypto_generichash_update(&st, pk2,
crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES);
crypto_generichash_final(&st, nonce,
crypto_box_curve25519xchacha20poly1305_NONCEBYTES);
return 0;
}
int
crypto_box_curve25519xchacha20poly1305_seal(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk)
{
unsigned char nonce[crypto_box_curve25519xchacha20poly1305_NONCEBYTES];
unsigned char epk[crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES];
unsigned char esk[crypto_box_curve25519xchacha20poly1305_SECRETKEYBYTES];
int ret;
if (crypto_box_curve25519xchacha20poly1305_keypair(epk, esk) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
_crypto_box_curve25519xchacha20poly1305_seal_nonce(nonce, epk, pk);
ret = crypto_box_curve25519xchacha20poly1305_easy(
c + crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES, m, mlen,
nonce, pk, esk);
memcpy(c, epk, crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES);
sodium_memzero(esk, sizeof esk);
sodium_memzero(epk, sizeof epk);
sodium_memzero(nonce, sizeof nonce);
return ret;
}
int
crypto_box_curve25519xchacha20poly1305_seal_open(unsigned char *m, const unsigned char *c,
unsigned long long clen,
const unsigned char *pk,
const unsigned char *sk)
{
unsigned char nonce[crypto_box_curve25519xchacha20poly1305_NONCEBYTES];
if (clen < crypto_box_curve25519xchacha20poly1305_SEALBYTES) {
return -1;
}
_crypto_box_curve25519xchacha20poly1305_seal_nonce(nonce, c, pk);
COMPILER_ASSERT(crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES <
crypto_box_curve25519xchacha20poly1305_SEALBYTES);
return crypto_box_curve25519xchacha20poly1305_open_easy(
m, c + crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES,
clen - crypto_box_curve25519xchacha20poly1305_PUBLICKEYBYTES,
nonce, c, sk);
}
size_t
crypto_box_curve25519xchacha20poly1305_sealbytes(void)
{
return crypto_box_curve25519xchacha20poly1305_SEALBYTES;
}

View File

@@ -0,0 +1,156 @@
#include <string.h>
#include "crypto_box_curve25519xsalsa20poly1305.h"
#include "crypto_core_hsalsa20.h"
#include "crypto_hash_sha512.h"
#include "crypto_scalarmult_curve25519.h"
#include "crypto_secretbox_xsalsa20poly1305.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_box_curve25519xsalsa20poly1305_seed_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char *seed)
{
unsigned char hash[64];
crypto_hash_sha512(hash, seed, 32);
memcpy(sk, hash, 32);
sodium_memzero(hash, sizeof hash);
return crypto_scalarmult_curve25519_base(pk, sk);
}
int
crypto_box_curve25519xsalsa20poly1305_keypair(unsigned char *pk,
unsigned char *sk)
{
randombytes_buf(sk, 32);
return crypto_scalarmult_curve25519_base(pk, sk);
}
int
crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k,
const unsigned char *pk,
const unsigned char *sk)
{
static const unsigned char zero[16] = { 0 };
unsigned char s[32];
if (crypto_scalarmult_curve25519(s, sk, pk) != 0) {
return -1;
}
return crypto_core_hsalsa20(k, zero, s, NULL);
}
int
crypto_box_curve25519xsalsa20poly1305_afternm(unsigned char *c,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *k)
{
return crypto_secretbox_xsalsa20poly1305(c, m, mlen, n, k);
}
int
crypto_box_curve25519xsalsa20poly1305_open_afternm(unsigned char *m,
const unsigned char *c,
unsigned long long clen,
const unsigned char *n,
const unsigned char *k)
{
return crypto_secretbox_xsalsa20poly1305_open(m, c, clen, n, k);
}
int
crypto_box_curve25519xsalsa20poly1305(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *pk,
const unsigned char *sk)
{
unsigned char k[crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES];
int ret;
if (crypto_box_curve25519xsalsa20poly1305_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_curve25519xsalsa20poly1305_afternm(c, m, mlen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
int
crypto_box_curve25519xsalsa20poly1305_open(
unsigned char *m, const unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *pk, const unsigned char *sk)
{
unsigned char k[crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES];
int ret;
if (crypto_box_curve25519xsalsa20poly1305_beforenm(k, pk, sk) != 0) {
return -1;
}
ret = crypto_box_curve25519xsalsa20poly1305_open_afternm(m, c, clen, n, k);
sodium_memzero(k, sizeof k);
return ret;
}
size_t
crypto_box_curve25519xsalsa20poly1305_seedbytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_SEEDBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_publickeybytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_secretkeybytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_beforenmbytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_noncebytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_NONCEBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_zerobytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_ZEROBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_boxzerobytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_macbytes(void)
{
return crypto_box_curve25519xsalsa20poly1305_MACBYTES;
}
size_t
crypto_box_curve25519xsalsa20poly1305_messagebytes_max(void)
{
return crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX;
}

View File

@@ -0,0 +1,225 @@
#include <stdint.h>
#include "crypto_core_ed25519.h"
#include "private/common.h"
#include "private/ed25519_ref10.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_core_ed25519_is_valid_point(const unsigned char *p)
{
ge25519_p3 p_p3;
if (ge25519_is_canonical(p) == 0 ||
ge25519_has_small_order(p) != 0 ||
ge25519_frombytes(&p_p3, p) != 0 ||
ge25519_is_on_curve(&p_p3) == 0 ||
ge25519_is_on_main_subgroup(&p_p3) == 0) {
return 0;
}
return 1;
}
int
crypto_core_ed25519_add(unsigned char *r,
const unsigned char *p, const unsigned char *q)
{
ge25519_p3 p_p3, q_p3, r_p3;
ge25519_p1p1 r_p1p1;
ge25519_cached q_cached;
if (ge25519_frombytes(&p_p3, p) != 0 || ge25519_is_on_curve(&p_p3) == 0 ||
ge25519_frombytes(&q_p3, q) != 0 || ge25519_is_on_curve(&q_p3) == 0) {
return -1;
}
ge25519_p3_to_cached(&q_cached, &q_p3);
ge25519_add(&r_p1p1, &p_p3, &q_cached);
ge25519_p1p1_to_p3(&r_p3, &r_p1p1);
ge25519_p3_tobytes(r, &r_p3);
return 0;
}
int
crypto_core_ed25519_sub(unsigned char *r,
const unsigned char *p, const unsigned char *q)
{
ge25519_p3 p_p3, q_p3, r_p3;
ge25519_p1p1 r_p1p1;
ge25519_cached q_cached;
if (ge25519_frombytes(&p_p3, p) != 0 || ge25519_is_on_curve(&p_p3) == 0 ||
ge25519_frombytes(&q_p3, q) != 0 || ge25519_is_on_curve(&q_p3) == 0) {
return -1;
}
ge25519_p3_to_cached(&q_cached, &q_p3);
ge25519_sub(&r_p1p1, &p_p3, &q_cached);
ge25519_p1p1_to_p3(&r_p3, &r_p1p1);
ge25519_p3_tobytes(r, &r_p3);
return 0;
}
int
crypto_core_ed25519_from_uniform(unsigned char *p, const unsigned char *r)
{
ge25519_from_uniform(p, r);
return 0;
}
int
crypto_core_ed25519_from_hash(unsigned char *p, const unsigned char *h)
{
ge25519_from_hash(p, h);
return 0;
}
void
crypto_core_ed25519_random(unsigned char *p)
{
unsigned char h[crypto_core_ed25519_UNIFORMBYTES];
randombytes_buf(h, sizeof h);
(void) crypto_core_ed25519_from_uniform(p, h);
}
void
crypto_core_ed25519_scalar_random(unsigned char *r)
{
do {
randombytes_buf(r, crypto_core_ed25519_SCALARBYTES);
r[crypto_core_ed25519_SCALARBYTES - 1] &= 0x1f;
} while (sc25519_is_canonical(r) == 0 ||
sodium_is_zero(r, crypto_core_ed25519_SCALARBYTES));
}
int
crypto_core_ed25519_scalar_invert(unsigned char *recip, const unsigned char *s)
{
sc25519_invert(recip, s);
return - sodium_is_zero(s, crypto_core_ed25519_SCALARBYTES);
}
/* 2^252+27742317777372353535851937790883648493 */
static const unsigned char L[] = {
0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7,
0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
};
void
crypto_core_ed25519_scalar_negate(unsigned char *neg, const unsigned char *s)
{
unsigned char t_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
unsigned char s_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
COMPILER_ASSERT(crypto_core_ed25519_NONREDUCEDSCALARBYTES >=
2 * crypto_core_ed25519_SCALARBYTES);
memset(t_, 0, sizeof t_);
memset(s_, 0, sizeof s_);
memcpy(t_ + crypto_core_ed25519_SCALARBYTES, L,
crypto_core_ed25519_SCALARBYTES);
memcpy(s_, s, crypto_core_ed25519_SCALARBYTES);
sodium_sub(t_, s_, sizeof t_);
sc25519_reduce(t_);
memcpy(neg, t_, crypto_core_ed25519_SCALARBYTES);
}
void
crypto_core_ed25519_scalar_complement(unsigned char *comp,
const unsigned char *s)
{
unsigned char t_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
unsigned char s_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
COMPILER_ASSERT(crypto_core_ed25519_NONREDUCEDSCALARBYTES >=
2 * crypto_core_ed25519_SCALARBYTES);
memset(t_, 0, sizeof t_);
memset(s_, 0, sizeof s_);
t_[0]++;
memcpy(t_ + crypto_core_ed25519_SCALARBYTES, L,
crypto_core_ed25519_SCALARBYTES);
memcpy(s_, s, crypto_core_ed25519_SCALARBYTES);
sodium_sub(t_, s_, sizeof t_);
sc25519_reduce(t_);
memcpy(comp, t_, crypto_core_ed25519_SCALARBYTES);
}
void
crypto_core_ed25519_scalar_add(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
unsigned char x_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
unsigned char y_[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
memset(x_, 0, sizeof x_);
memset(y_, 0, sizeof y_);
memcpy(x_, x, crypto_core_ed25519_SCALARBYTES);
memcpy(y_, y, crypto_core_ed25519_SCALARBYTES);
sodium_add(x_, y_, crypto_core_ed25519_SCALARBYTES);
crypto_core_ed25519_scalar_reduce(z, x_);
}
void
crypto_core_ed25519_scalar_sub(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
unsigned char yn[crypto_core_ed25519_SCALARBYTES];
crypto_core_ed25519_scalar_negate(yn, y);
crypto_core_ed25519_scalar_add(z, x, yn);
}
void
crypto_core_ed25519_scalar_mul(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
sc25519_mul(z, x, y);
}
void
crypto_core_ed25519_scalar_reduce(unsigned char *r,
const unsigned char *s)
{
unsigned char t[crypto_core_ed25519_NONREDUCEDSCALARBYTES];
memcpy(t, s, sizeof t);
sc25519_reduce(t);
memcpy(r, t, crypto_core_ed25519_SCALARBYTES);
sodium_memzero(t, sizeof t);
}
size_t
crypto_core_ed25519_bytes(void)
{
return crypto_core_ed25519_BYTES;
}
size_t
crypto_core_ed25519_nonreducedscalarbytes(void)
{
return crypto_core_ed25519_NONREDUCEDSCALARBYTES;
}
size_t
crypto_core_ed25519_uniformbytes(void)
{
return crypto_core_ed25519_UNIFORMBYTES;
}
size_t
crypto_core_ed25519_hashbytes(void)
{
return crypto_core_ed25519_HASHBYTES;
}
size_t
crypto_core_ed25519_scalarbytes(void)
{
return crypto_core_ed25519_SCALARBYTES;
}

View File

@@ -0,0 +1,156 @@
#include <stdint.h>
#include "crypto_core_ed25519.h"
#include "crypto_core_ristretto255.h"
#include "private/common.h"
#include "private/ed25519_ref10.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_core_ristretto255_is_valid_point(const unsigned char *p)
{
ge25519_p3 p_p3;
if (ristretto255_frombytes(&p_p3, p) != 0) {
return 0;
}
return 1;
}
int
crypto_core_ristretto255_add(unsigned char *r,
const unsigned char *p, const unsigned char *q)
{
ge25519_p3 p_p3, q_p3, r_p3;
ge25519_p1p1 r_p1p1;
ge25519_cached q_cached;
if (ristretto255_frombytes(&p_p3, p) != 0 ||
ristretto255_frombytes(&q_p3, q) != 0) {
return -1;
}
ge25519_p3_to_cached(&q_cached, &q_p3);
ge25519_add(&r_p1p1, &p_p3, &q_cached);
ge25519_p1p1_to_p3(&r_p3, &r_p1p1);
ristretto255_p3_tobytes(r, &r_p3);
return 0;
}
int
crypto_core_ristretto255_sub(unsigned char *r,
const unsigned char *p, const unsigned char *q)
{
ge25519_p3 p_p3, q_p3, r_p3;
ge25519_p1p1 r_p1p1;
ge25519_cached q_cached;
if (ristretto255_frombytes(&p_p3, p) != 0 ||
ristretto255_frombytes(&q_p3, q) != 0) {
return -1;
}
ge25519_p3_to_cached(&q_cached, &q_p3);
ge25519_sub(&r_p1p1, &p_p3, &q_cached);
ge25519_p1p1_to_p3(&r_p3, &r_p1p1);
ristretto255_p3_tobytes(r, &r_p3);
return 0;
}
int
crypto_core_ristretto255_from_hash(unsigned char *p, const unsigned char *r)
{
ristretto255_from_hash(p, r);
return 0;
}
void
crypto_core_ristretto255_random(unsigned char *p)
{
unsigned char h[crypto_core_ristretto255_HASHBYTES];
randombytes_buf(h, sizeof h);
(void) crypto_core_ristretto255_from_hash(p, h);
}
void
crypto_core_ristretto255_scalar_random(unsigned char *r)
{
crypto_core_ed25519_scalar_random(r);
}
int
crypto_core_ristretto255_scalar_invert(unsigned char *recip,
const unsigned char *s)
{
return crypto_core_ed25519_scalar_invert(recip, s);
}
void
crypto_core_ristretto255_scalar_negate(unsigned char *neg,
const unsigned char *s)
{
crypto_core_ed25519_scalar_negate(neg, s);
}
void
crypto_core_ristretto255_scalar_complement(unsigned char *comp,
const unsigned char *s)
{
crypto_core_ed25519_scalar_complement(comp, s);
}
void
crypto_core_ristretto255_scalar_add(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
crypto_core_ed25519_scalar_add(z, x, y);
}
void
crypto_core_ristretto255_scalar_sub(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
crypto_core_ed25519_scalar_sub(z, x, y);
}
void
crypto_core_ristretto255_scalar_mul(unsigned char *z, const unsigned char *x,
const unsigned char *y)
{
sc25519_mul(z, x, y);
}
void
crypto_core_ristretto255_scalar_reduce(unsigned char *r,
const unsigned char *s)
{
crypto_core_ed25519_scalar_reduce(r, s);
}
size_t
crypto_core_ristretto255_bytes(void)
{
return crypto_core_ristretto255_BYTES;
}
size_t
crypto_core_ristretto255_nonreducedscalarbytes(void)
{
return crypto_core_ristretto255_NONREDUCEDSCALARBYTES;
}
size_t
crypto_core_ristretto255_hashbytes(void)
{
return crypto_core_ristretto255_HASHBYTES;
}
size_t
crypto_core_ristretto255_scalarbytes(void)
{
return crypto_core_ristretto255_SCALARBYTES;
}

View File

@@ -0,0 +1,40 @@
{
{ 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 },
{ -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 },
{ -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 }
},
{
{ 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 },
{ 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 },
{ 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 }
},
{
{ 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 },
{ 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 },
{ 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 }
},
{
{ 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 },
{ -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 },
{ 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 }
},
{
{ -22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, -1361450, -13062696, 13821877 },
{ -6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, -7212327, 18853322, -14220951 },
{ 4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, -10431137, 2207753, -3209784 }
},
{
{ -25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, -663000, -31111463, -16132436 },
{ 25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, 15725684, 171356, 6466918 },
{ 23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, -14088058, -30714912, 16193877 }
},
{
{ -33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, 4729455, -18074513, 9256800 },
{ -25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, 9761698, -19827198, 630305 },
{ -13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, -15960994, -2449256, -14291300 }
},
{
{ -3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, 15033784, 25105118, -7894876 },
{ -24326370, 15950226, -31801215, -14592823, -11662737, -5090925, 1573892, -2625887, 2198790, -15804619 },
{ -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, -16236442, -32461234, -12290683 }
}

View File

@@ -0,0 +1,40 @@
/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
static const fe25519 d = {
-10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
};
/* 2 * d =
* 16295367250680780974490674513165176452449235426866156013048779062215315747161
*/
static const fe25519 d2 = {
-21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 };
/* sqrt(-1) */
static const fe25519 sqrtm1 = {
-32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
};
/* A = 486662 */
static const fe25519 curve25519_A = {
486662, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* sqrt(ad - 1) with a = -1 (mod p) */
static const fe25519 sqrtadm1 = {
24849947, -153582, -23613485, 6347715, -21072328, -667138, -25271143, -15367704, -870347, 14525639
};
/* 1 / sqrt(a - d) */
static const fe25519 invsqrtamd = {
6111485, 4156064, -27798727, 12243468, -25904040, 120897, 20826367, -7060776, 6093568, -1986012
};
/* 1 - d ^ 2 */
static const fe25519 onemsqd = {
6275446, -16617371, -22938544, -3773710, 11667077, 7397348, -27922721, 1766195, -24433858, 672203
};
/* (d - 1) ^ 2 */
static const fe25519 sqdmone = {
15551795, -11097455, -13425098, -10125071, -11896535, 10178284, -26634327, 4729244, -5282110, -10116402
};

View File

@@ -0,0 +1,220 @@
/*
Ignores top bit of s.
*/
void
fe25519_frombytes(fe25519 h, const unsigned char *s)
{
int64_t h0 = load_4(s);
int64_t h1 = load_3(s + 4) << 6;
int64_t h2 = load_3(s + 7) << 5;
int64_t h3 = load_3(s + 10) << 3;
int64_t h4 = load_3(s + 13) << 2;
int64_t h5 = load_4(s + 16);
int64_t h6 = load_3(s + 20) << 7;
int64_t h7 = load_3(s + 23) << 5;
int64_t h8 = load_3(s + 26) << 4;
int64_t h9 = (load_3(s + 29) & 8388607) << 2;
int64_t carry0;
int64_t carry1;
int64_t carry2;
int64_t carry3;
int64_t carry4;
int64_t carry5;
int64_t carry6;
int64_t carry7;
int64_t carry8;
int64_t carry9;
carry9 = (h9 + (int64_t)(1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 * ((uint64_t) 1L << 25);
carry1 = (h1 + (int64_t)(1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 * ((uint64_t) 1L << 25);
carry3 = (h3 + (int64_t)(1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 * ((uint64_t) 1L << 25);
carry5 = (h5 + (int64_t)(1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 * ((uint64_t) 1L << 25);
carry7 = (h7 + (int64_t)(1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 * ((uint64_t) 1L << 25);
carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 * ((uint64_t) 1L << 26);
carry2 = (h2 + (int64_t)(1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 * ((uint64_t) 1L << 26);
carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 * ((uint64_t) 1L << 26);
carry6 = (h6 + (int64_t)(1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 * ((uint64_t) 1L << 26);
carry8 = (h8 + (int64_t)(1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 * ((uint64_t) 1L << 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
h[2] = (int32_t) h2;
h[3] = (int32_t) h3;
h[4] = (int32_t) h4;
h[5] = (int32_t) h5;
h[6] = (int32_t) h6;
h[7] = (int32_t) h7;
h[8] = (int32_t) h8;
h[9] = (int32_t) h9;
}
/*
Preconditions:
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
Write p=2^255-19; q=floor(h/p).
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
Proof:
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
Then 0<y<1.
Write r=h-pq.
Have 0<=r<=p-1=2^255-20.
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
Write x=r+19(2^-255)r+y.
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
*/
static void
fe25519_reduce(fe25519 h, const fe25519 f)
{
int32_t h0 = f[0];
int32_t h1 = f[1];
int32_t h2 = f[2];
int32_t h3 = f[3];
int32_t h4 = f[4];
int32_t h5 = f[5];
int32_t h6 = f[6];
int32_t h7 = f[7];
int32_t h8 = f[8];
int32_t h9 = f[9];
int32_t q;
int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
q = (h0 + q) >> 26;
q = (h1 + q) >> 25;
q = (h2 + q) >> 26;
q = (h3 + q) >> 25;
q = (h4 + q) >> 26;
q = (h5 + q) >> 25;
q = (h6 + q) >> 26;
q = (h7 + q) >> 25;
q = (h8 + q) >> 26;
q = (h9 + q) >> 25;
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
h0 += 19 * q;
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
carry0 = h0 >> 26;
h1 += carry0;
h0 -= carry0 * ((uint32_t) 1L << 26);
carry1 = h1 >> 25;
h2 += carry1;
h1 -= carry1 * ((uint32_t) 1L << 25);
carry2 = h2 >> 26;
h3 += carry2;
h2 -= carry2 * ((uint32_t) 1L << 26);
carry3 = h3 >> 25;
h4 += carry3;
h3 -= carry3 * ((uint32_t) 1L << 25);
carry4 = h4 >> 26;
h5 += carry4;
h4 -= carry4 * ((uint32_t) 1L << 26);
carry5 = h5 >> 25;
h6 += carry5;
h5 -= carry5 * ((uint32_t) 1L << 25);
carry6 = h6 >> 26;
h7 += carry6;
h6 -= carry6 * ((uint32_t) 1L << 26);
carry7 = h7 >> 25;
h8 += carry7;
h7 -= carry7 * ((uint32_t) 1L << 25);
carry8 = h8 >> 26;
h9 += carry8;
h8 -= carry8 * ((uint32_t) 1L << 26);
carry9 = h9 >> 25;
h9 -= carry9 * ((uint32_t) 1L << 25);
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
/*
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
Have h0+...+2^230 h9 between 0 and 2^255-1;
evidently 2^255 h10-2^255 q = 0.
Goal: Output h0+...+2^230 h9.
*/
void
fe25519_tobytes(unsigned char *s, const fe25519 h)
{
fe25519 t;
fe25519_reduce(t, h);
s[0] = t[0] >> 0;
s[1] = t[0] >> 8;
s[2] = t[0] >> 16;
s[3] = (t[0] >> 24) | (t[1] * ((uint32_t) 1 << 2));
s[4] = t[1] >> 6;
s[5] = t[1] >> 14;
s[6] = (t[1] >> 22) | (t[2] * ((uint32_t) 1 << 3));
s[7] = t[2] >> 5;
s[8] = t[2] >> 13;
s[9] = (t[2] >> 21) | (t[3] * ((uint32_t) 1 << 5));
s[10] = t[3] >> 3;
s[11] = t[3] >> 11;
s[12] = (t[3] >> 19) | (t[4] * ((uint32_t) 1 << 6));
s[13] = t[4] >> 2;
s[14] = t[4] >> 10;
s[15] = t[4] >> 18;
s[16] = t[5] >> 0;
s[17] = t[5] >> 8;
s[18] = t[5] >> 16;
s[19] = (t[5] >> 24) | (t[6] * ((uint32_t) 1 << 1));
s[20] = t[6] >> 7;
s[21] = t[6] >> 15;
s[22] = (t[6] >> 23) | (t[7] * ((uint32_t) 1 << 3));
s[23] = t[7] >> 5;
s[24] = t[7] >> 13;
s[25] = (t[7] >> 21) | (t[8] * ((uint32_t) 1 << 4));
s[26] = t[8] >> 4;
s[27] = t[8] >> 12;
s[28] = (t[8] >> 20) | (t[9] * ((uint32_t) 1 << 6));
s[29] = t[9] >> 2;
s[30] = t[9] >> 10;
s[31] = t[9] >> 18;
}

View File

@@ -0,0 +1,40 @@
{
{ 1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563 },
{ 62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585 },
{ 301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142 }
},
{
{ 1601611775252272, 1720807796594148, 1132070835939856, 1260455018889551, 2147779492816911 },
{ 316559037616741, 2177824224946892, 1459442586438991, 1461528397712656, 751590696113597 },
{ 1850748884277385, 1200145853858453, 1068094770532492, 672251375690438, 1586055907191707 }
},
{
{ 769950342298419, 132954430919746, 844085933195555, 974092374476333, 726076285546016 },
{ 425251763115706, 608463272472562, 442562545713235, 837766094556764, 374555092627893 },
{ 1086255230780037, 274979815921559, 1960002765731872, 929474102396301, 1190409889297339 }
},
{
{ 665000864555967, 2065379846933859, 370231110385876, 350988370788628, 1233371373142985 },
{ 2019367628972465, 676711900706637, 110710997811333, 1108646842542025, 517791959672113 },
{ 965130719900578, 247011430587952, 526356006571389, 91986625355052, 2157223321444601 }
},
{
{ 1802695059465007, 1664899123557221, 593559490740857, 2160434469266659, 927570450755031 },
{ 1725674970513508, 1933645953859181, 1542344539275782, 1767788773573747, 1297447965928905 },
{ 1381809363726107, 1430341051343062, 2061843536018959, 1551778050872521, 2036394857967624 }
},
{
{ 1970894096313054, 528066325833207, 1619374932191227, 2207306624415883, 1169170329061080 },
{ 2070390218572616, 1458919061857835, 624171843017421, 1055332792707765, 433987520732508 },
{ 893653801273833, 1168026499324677, 1242553501121234, 1306366254304474, 1086752658510815 }
},
{
{ 213454002618221, 939771523987438, 1159882208056014, 317388369627517, 621213314200687 },
{ 1971678598905747, 338026507889165, 762398079972271, 655096486107477, 42299032696322 },
{ 177130678690680, 1754759263300204, 1864311296286618, 1180675631479880, 1292726903152791 }
},
{
{ 1913163449625248, 460779200291993, 2193883288642314, 1008900146920800, 1721983679009502 },
{ 1070401523076875, 1272492007800961, 1910153608563310, 2075579521696771, 1191169788841221 },
{ 692896803108118, 500174642072499, 2068223309439677, 1162190621851337, 1426986007309901 }
}

View File

@@ -0,0 +1,41 @@
/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
static const fe25519 d = {
929955233495203, 466365720129213, 1662059464998953, 2033849074728123, 1442794654840575
};
/* 2 * d =
* 16295367250680780974490674513165176452449235426866156013048779062215315747161
*/
static const fe25519 d2 = {
1859910466990425, 932731440258426, 1072319116312658, 1815898335770999, 633789495995903
};
/* sqrt(-1) */
static const fe25519 sqrtm1 = {
1718705420411056, 234908883556509, 2233514472574048, 2117202627021982, 765476049583133
};
/* A = 486662 */
static const fe25519 curve25519_A = {
486662, 0, 0, 0, 0
};
/* sqrt(ad - 1) with a = -1 (mod p) */
static const fe25519 sqrtadm1 = {
2241493124984347, 425987919032274, 2207028919301688, 1220490630685848, 974799131293748
};
/* 1 / sqrt(a - d) */
static const fe25519 invsqrtamd = {
278908739862762, 821645201101625, 8113234426968, 1777959178193151, 2118520810568447
};
/* 1 - d ^ 2 */
static const fe25519 onemsqd = {
1136626929484150, 1998550399581263, 496427632559748, 118527312129759, 45110755273534
};
/* (d - 1) ^ 2 */
static const fe25519 sqdmone = {
1507062230895904, 1572317787530805, 683053064812840, 317374165784489, 1572899562415810
};

View File

@@ -0,0 +1,116 @@
/*
Ignores top bit of s.
*/
void
fe25519_frombytes(fe25519 h, const unsigned char *s)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint64_t h0, h1, h2, h3, h4;
h0 = (LOAD64_LE(s ) ) & mask;
h1 = (LOAD64_LE(s + 6) >> 3) & mask;
h2 = (LOAD64_LE(s + 12) >> 6) & mask;
h3 = (LOAD64_LE(s + 19) >> 1) & mask;
h4 = (LOAD64_LE(s + 24) >> 12) & mask;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}
static void
fe25519_reduce(fe25519 h, const fe25519 f)
{
const uint64_t mask = 0x7ffffffffffffULL;
uint128_t t[5];
t[0] = f[0];
t[1] = f[1];
t[2] = f[2];
t[3] = f[3];
t[4] = f[4];
t[1] += t[0] >> 51;
t[0] &= mask;
t[2] += t[1] >> 51;
t[1] &= mask;
t[3] += t[2] >> 51;
t[2] &= mask;
t[4] += t[3] >> 51;
t[3] &= mask;
t[0] += 19 * (t[4] >> 51);
t[4] &= mask;
t[1] += t[0] >> 51;
t[0] &= mask;
t[2] += t[1] >> 51;
t[1] &= mask;
t[3] += t[2] >> 51;
t[2] &= mask;
t[4] += t[3] >> 51;
t[3] &= mask;
t[0] += 19 * (t[4] >> 51);
t[4] &= mask;
/* now t is between 0 and 2^255-1, properly carried. */
/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
t[0] += 19ULL;
t[1] += t[0] >> 51;
t[0] &= mask;
t[2] += t[1] >> 51;
t[1] &= mask;
t[3] += t[2] >> 51;
t[2] &= mask;
t[4] += t[3] >> 51;
t[3] &= mask;
t[0] += 19ULL * (t[4] >> 51);
t[4] &= mask;
/* now between 19 and 2^255-1 in both cases, and offset by 19. */
t[0] += 0x8000000000000 - 19ULL;
t[1] += 0x8000000000000 - 1ULL;
t[2] += 0x8000000000000 - 1ULL;
t[3] += 0x8000000000000 - 1ULL;
t[4] += 0x8000000000000 - 1ULL;
/* now between 2^255 and 2^256-20, and offset by 2^255. */
t[1] += t[0] >> 51;
t[0] &= mask;
t[2] += t[1] >> 51;
t[1] &= mask;
t[3] += t[2] >> 51;
t[2] &= mask;
t[4] += t[3] >> 51;
t[3] &= mask;
t[4] &= mask;
h[0] = t[0];
h[1] = t[1];
h[2] = t[2];
h[3] = t[3];
h[4] = t[4];
}
void
fe25519_tobytes(unsigned char *s, const fe25519 h)
{
fe25519 t;
uint64_t t0, t1, t2, t3;
fe25519_reduce(t, h);
t0 = t[0] | (t[1] << 51);
t1 = (t[1] >> 13) | (t[2] << 38);
t2 = (t[2] >> 26) | (t[3] << 25);
t3 = (t[3] >> 39) | (t[4] << 12);
STORE64_LE(s + 0, t0);
STORE64_LE(s + 8, t1);
STORE64_LE(s + 16, t2);
STORE64_LE(s + 24, t3);
}

View File

@@ -0,0 +1,93 @@
#include <stdint.h>
#include <stdlib.h>
#include "crypto_core_hchacha20.h"
#include "private/common.h"
#define QUARTERROUND(A, B, C, D) \
do { \
A += B; D = ROTL32(D ^ A, 16); \
C += D; B = ROTL32(B ^ C, 12); \
A += B; D = ROTL32(D ^ A, 8); \
C += D; B = ROTL32(B ^ C, 7); \
} while(0)
int
crypto_core_hchacha20(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c)
{
int i;
uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
uint32_t x8, x9, x10, x11, x12, x13, x14, x15;
if (c == NULL) {
x0 = 0x61707865;
x1 = 0x3320646e;
x2 = 0x79622d32;
x3 = 0x6b206574;
} else {
x0 = LOAD32_LE(c + 0);
x1 = LOAD32_LE(c + 4);
x2 = LOAD32_LE(c + 8);
x3 = LOAD32_LE(c + 12);
}
x4 = LOAD32_LE(k + 0);
x5 = LOAD32_LE(k + 4);
x6 = LOAD32_LE(k + 8);
x7 = LOAD32_LE(k + 12);
x8 = LOAD32_LE(k + 16);
x9 = LOAD32_LE(k + 20);
x10 = LOAD32_LE(k + 24);
x11 = LOAD32_LE(k + 28);
x12 = LOAD32_LE(in + 0);
x13 = LOAD32_LE(in + 4);
x14 = LOAD32_LE(in + 8);
x15 = LOAD32_LE(in + 12);
for (i = 0; i < 10; i++) {
QUARTERROUND(x0, x4, x8, x12);
QUARTERROUND(x1, x5, x9, x13);
QUARTERROUND(x2, x6, x10, x14);
QUARTERROUND(x3, x7, x11, x15);
QUARTERROUND(x0, x5, x10, x15);
QUARTERROUND(x1, x6, x11, x12);
QUARTERROUND(x2, x7, x8, x13);
QUARTERROUND(x3, x4, x9, x14);
}
STORE32_LE(out + 0, x0);
STORE32_LE(out + 4, x1);
STORE32_LE(out + 8, x2);
STORE32_LE(out + 12, x3);
STORE32_LE(out + 16, x12);
STORE32_LE(out + 20, x13);
STORE32_LE(out + 24, x14);
STORE32_LE(out + 28, x15);
return 0;
}
size_t
crypto_core_hchacha20_outputbytes(void)
{
return crypto_core_hchacha20_OUTPUTBYTES;
}
size_t
crypto_core_hchacha20_inputbytes(void)
{
return crypto_core_hchacha20_INPUTBYTES;
}
size_t
crypto_core_hchacha20_keybytes(void)
{
return crypto_core_hchacha20_KEYBYTES;
}
size_t
crypto_core_hchacha20_constbytes(void)
{
return crypto_core_hchacha20_CONSTBYTES;
}

View File

@@ -0,0 +1,21 @@
#include "crypto_core_hsalsa20.h"
size_t
crypto_core_hsalsa20_outputbytes(void) {
return crypto_core_hsalsa20_OUTPUTBYTES;
}
size_t
crypto_core_hsalsa20_inputbytes(void) {
return crypto_core_hsalsa20_INPUTBYTES;
}
size_t
crypto_core_hsalsa20_keybytes(void) {
return crypto_core_hsalsa20_KEYBYTES;
}
size_t
crypto_core_hsalsa20_constbytes(void) {
return crypto_core_hsalsa20_CONSTBYTES;
}

View File

@@ -0,0 +1,95 @@
/*
version 20080912
D. J. Bernstein
Public domain.
*/
#include <stdint.h>
#include <stdlib.h>
#include "crypto_core_hsalsa20.h"
#include "private/common.h"
#define ROUNDS 20
#define U32C(v) (v##U)
int
crypto_core_hsalsa20(unsigned char *out,
const unsigned char *in,
const unsigned char *k,
const unsigned char *c)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8,
x9, x10, x11, x12, x13, x14, x15;
int i;
if (c == NULL) {
x0 = U32C(0x61707865);
x5 = U32C(0x3320646e);
x10 = U32C(0x79622d32);
x15 = U32C(0x6b206574);
} else {
x0 = LOAD32_LE(c + 0);
x5 = LOAD32_LE(c + 4);
x10 = LOAD32_LE(c + 8);
x15 = LOAD32_LE(c + 12);
}
x1 = LOAD32_LE(k + 0);
x2 = LOAD32_LE(k + 4);
x3 = LOAD32_LE(k + 8);
x4 = LOAD32_LE(k + 12);
x11 = LOAD32_LE(k + 16);
x12 = LOAD32_LE(k + 20);
x13 = LOAD32_LE(k + 24);
x14 = LOAD32_LE(k + 28);
x6 = LOAD32_LE(in + 0);
x7 = LOAD32_LE(in + 4);
x8 = LOAD32_LE(in + 8);
x9 = LOAD32_LE(in + 12);
for (i = ROUNDS; i > 0; i -= 2) {
x4 ^= ROTL32(x0 + x12, 7);
x8 ^= ROTL32(x4 + x0, 9);
x12 ^= ROTL32(x8 + x4, 13);
x0 ^= ROTL32(x12 + x8, 18);
x9 ^= ROTL32(x5 + x1, 7);
x13 ^= ROTL32(x9 + x5, 9);
x1 ^= ROTL32(x13 + x9, 13);
x5 ^= ROTL32(x1 + x13, 18);
x14 ^= ROTL32(x10 + x6, 7);
x2 ^= ROTL32(x14 + x10, 9);
x6 ^= ROTL32(x2 + x14, 13);
x10 ^= ROTL32(x6 + x2, 18);
x3 ^= ROTL32(x15 + x11, 7);
x7 ^= ROTL32(x3 + x15, 9);
x11 ^= ROTL32(x7 + x3, 13);
x15 ^= ROTL32(x11 + x7, 18);
x1 ^= ROTL32(x0 + x3, 7);
x2 ^= ROTL32(x1 + x0, 9);
x3 ^= ROTL32(x2 + x1, 13);
x0 ^= ROTL32(x3 + x2, 18);
x6 ^= ROTL32(x5 + x4, 7);
x7 ^= ROTL32(x6 + x5, 9);
x4 ^= ROTL32(x7 + x6, 13);
x5 ^= ROTL32(x4 + x7, 18);
x11 ^= ROTL32(x10 + x9, 7);
x8 ^= ROTL32(x11 + x10, 9);
x9 ^= ROTL32(x8 + x11, 13);
x10 ^= ROTL32(x9 + x8, 18);
x12 ^= ROTL32(x15 + x14, 7);
x13 ^= ROTL32(x12 + x15, 9);
x14 ^= ROTL32(x13 + x12, 13);
x15 ^= ROTL32(x14 + x13, 18);
}
STORE32_LE(out + 0, x0);
STORE32_LE(out + 4, x5);
STORE32_LE(out + 8, x10);
STORE32_LE(out + 12, x15);
STORE32_LE(out + 16, x6);
STORE32_LE(out + 20, x7);
STORE32_LE(out + 24, x8);
STORE32_LE(out + 28, x9);
return 0;
}

View File

@@ -0,0 +1,195 @@
#include <stdint.h>
#include <stdlib.h>
#include "crypto_core_salsa20.h"
#include "crypto_core_salsa2012.h"
#include "crypto_core_salsa208.h"
#include "private/common.h"
static void
crypto_core_salsa(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c,
const int rounds)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14,
x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14,
j15;
int i;
j0 = x0 = 0x61707865;
j5 = x5 = 0x3320646e;
j10 = x10 = 0x79622d32;
j15 = x15 = 0x6b206574;
if (c != NULL) {
j0 = x0 = LOAD32_LE(c + 0);
j5 = x5 = LOAD32_LE(c + 4);
j10 = x10 = LOAD32_LE(c + 8);
j15 = x15 = LOAD32_LE(c + 12);
}
j1 = x1 = LOAD32_LE(k + 0);
j2 = x2 = LOAD32_LE(k + 4);
j3 = x3 = LOAD32_LE(k + 8);
j4 = x4 = LOAD32_LE(k + 12);
j11 = x11 = LOAD32_LE(k + 16);
j12 = x12 = LOAD32_LE(k + 20);
j13 = x13 = LOAD32_LE(k + 24);
j14 = x14 = LOAD32_LE(k + 28);
j6 = x6 = LOAD32_LE(in + 0);
j7 = x7 = LOAD32_LE(in + 4);
j8 = x8 = LOAD32_LE(in + 8);
j9 = x9 = LOAD32_LE(in + 12);
for (i = 0; i < rounds; i += 2) {
x4 ^= ROTL32(x0 + x12, 7);
x8 ^= ROTL32(x4 + x0, 9);
x12 ^= ROTL32(x8 + x4, 13);
x0 ^= ROTL32(x12 + x8, 18);
x9 ^= ROTL32(x5 + x1, 7);
x13 ^= ROTL32(x9 + x5, 9);
x1 ^= ROTL32(x13 + x9, 13);
x5 ^= ROTL32(x1 + x13, 18);
x14 ^= ROTL32(x10 + x6, 7);
x2 ^= ROTL32(x14 + x10, 9);
x6 ^= ROTL32(x2 + x14, 13);
x10 ^= ROTL32(x6 + x2, 18);
x3 ^= ROTL32(x15 + x11, 7);
x7 ^= ROTL32(x3 + x15, 9);
x11 ^= ROTL32(x7 + x3, 13);
x15 ^= ROTL32(x11 + x7, 18);
x1 ^= ROTL32(x0 + x3, 7);
x2 ^= ROTL32(x1 + x0, 9);
x3 ^= ROTL32(x2 + x1, 13);
x0 ^= ROTL32(x3 + x2, 18);
x6 ^= ROTL32(x5 + x4, 7);
x7 ^= ROTL32(x6 + x5, 9);
x4 ^= ROTL32(x7 + x6, 13);
x5 ^= ROTL32(x4 + x7, 18);
x11 ^= ROTL32(x10 + x9, 7);
x8 ^= ROTL32(x11 + x10, 9);
x9 ^= ROTL32(x8 + x11, 13);
x10 ^= ROTL32(x9 + x8, 18);
x12 ^= ROTL32(x15 + x14, 7);
x13 ^= ROTL32(x12 + x15, 9);
x14 ^= ROTL32(x13 + x12, 13);
x15 ^= ROTL32(x14 + x13, 18);
}
STORE32_LE(out + 0, x0 + j0);
STORE32_LE(out + 4, x1 + j1);
STORE32_LE(out + 8, x2 + j2);
STORE32_LE(out + 12, x3 + j3);
STORE32_LE(out + 16, x4 + j4);
STORE32_LE(out + 20, x5 + j5);
STORE32_LE(out + 24, x6 + j6);
STORE32_LE(out + 28, x7 + j7);
STORE32_LE(out + 32, x8 + j8);
STORE32_LE(out + 36, x9 + j9);
STORE32_LE(out + 40, x10 + j10);
STORE32_LE(out + 44, x11 + j11);
STORE32_LE(out + 48, x12 + j12);
STORE32_LE(out + 52, x13 + j13);
STORE32_LE(out + 56, x14 + j14);
STORE32_LE(out + 60, x15 + j15);
}
int
crypto_core_salsa20(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c)
{
crypto_core_salsa(out, in, k, c, 20);
return 0;
}
size_t
crypto_core_salsa20_outputbytes(void)
{
return crypto_core_salsa20_OUTPUTBYTES;
}
size_t
crypto_core_salsa20_inputbytes(void)
{
return crypto_core_salsa20_INPUTBYTES;
}
size_t
crypto_core_salsa20_keybytes(void)
{
return crypto_core_salsa20_KEYBYTES;
}
size_t
crypto_core_salsa20_constbytes(void)
{
return crypto_core_salsa20_CONSTBYTES;
}
#ifndef MINIMAL
/* LCOV_EXCL_START */
int
crypto_core_salsa2012(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c)
{
crypto_core_salsa(out, in, k, c, 12);
return 0;
}
size_t
crypto_core_salsa2012_outputbytes(void)
{
return crypto_core_salsa2012_OUTPUTBYTES;
}
size_t
crypto_core_salsa2012_inputbytes(void)
{
return crypto_core_salsa2012_INPUTBYTES;
}
size_t
crypto_core_salsa2012_keybytes(void)
{
return crypto_core_salsa2012_KEYBYTES;
}
size_t
crypto_core_salsa2012_constbytes(void)
{
return crypto_core_salsa2012_CONSTBYTES;
}
int
crypto_core_salsa208(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c)
{
crypto_core_salsa(out, in, k, c, 8);
return 0;
}
size_t
crypto_core_salsa208_outputbytes(void)
{
return crypto_core_salsa208_OUTPUTBYTES;
}
size_t
crypto_core_salsa208_inputbytes(void)
{
return crypto_core_salsa208_INPUTBYTES;
}
size_t
crypto_core_salsa208_keybytes(void)
{
return crypto_core_salsa208_KEYBYTES;
}
size_t
crypto_core_salsa208_constbytes(void)
{
return crypto_core_salsa208_CONSTBYTES;
}
/* LCOV_EXCL_END */
#endif

View File

@@ -0,0 +1,143 @@
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "private/common.h"
#include "private/softaes.h"
uint32_t _aes_lut[256] __attribute__ ((visibility ("hidden"))) = {
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
};
static const uint32_t * const LUT = _aes_lut;
#ifndef SOFTAES_STRIDE
# ifdef FAVOR_PERFORMANCE
# define SOFTAES_STRIDE 256
# else
# define SOFTAES_STRIDE 16
# endif
#endif
static SoftAesBlock
_encrypt(const uint8_t ix0[4], const uint8_t ix1[4], const uint8_t ix2[4], const uint8_t ix3[4])
{
CRYPTO_ALIGN(64) uint32_t t[4][4][256 / SOFTAES_STRIDE];
CRYPTO_ALIGN(64) uint8_t of[4][4];
CRYPTO_ALIGN(64) SoftAesBlock out;
size_t i;
size_t j;
for (j = 0; j < 4; j++) {
of[j][0] = ix0[j] % SOFTAES_STRIDE;
of[j][1] = ix1[j] % SOFTAES_STRIDE;
of[j][2] = ix2[j] % SOFTAES_STRIDE;
of[j][3] = ix3[j] % SOFTAES_STRIDE;
}
for (i = 0; i < 256 / SOFTAES_STRIDE; i++) {
for (j = 0; j < 4; j++) {
t[j][0][i] = LUT[(i * SOFTAES_STRIDE) | of[j][0]];
t[j][1][i] = LUT[(i * SOFTAES_STRIDE) | of[j][1]];
t[j][2][i] = LUT[(i * SOFTAES_STRIDE) | of[j][2]];
t[j][3][i] = LUT[(i * SOFTAES_STRIDE) | of[j][3]];
}
}
#ifdef HAVE_INLINE_ASM
__asm__ __volatile__("" : : "r"(t) : "memory");
#endif
out.w0 = t[0][0][ix0[0] / SOFTAES_STRIDE];
out.w0 ^= ROTL32(t[0][1][ix1[0] / SOFTAES_STRIDE], 8);
out.w0 ^= ROTL32(t[0][2][ix2[0] / SOFTAES_STRIDE], 16);
out.w0 ^= ROTL32(t[0][3][ix3[0] / SOFTAES_STRIDE], 24);
out.w1 = t[1][0][ix0[1] / SOFTAES_STRIDE];
out.w1 ^= ROTL32(t[1][1][ix1[1] / SOFTAES_STRIDE], 8);
out.w1 ^= ROTL32(t[1][2][ix2[1] / SOFTAES_STRIDE], 16);
out.w1 ^= ROTL32(t[1][3][ix3[1] / SOFTAES_STRIDE], 24);
out.w2 = t[2][0][ix0[2] / SOFTAES_STRIDE];
out.w2 ^= ROTL32(t[2][1][ix1[2] / SOFTAES_STRIDE], 8);
out.w2 ^= ROTL32(t[2][2][ix2[2] / SOFTAES_STRIDE], 16);
out.w2 ^= ROTL32(t[2][3][ix3[2] / SOFTAES_STRIDE], 24);
out.w3 = t[3][0][ix0[3] / SOFTAES_STRIDE];
out.w3 ^= ROTL32(t[3][1][ix1[3] / SOFTAES_STRIDE], 8);
out.w3 ^= ROTL32(t[3][2][ix2[3] / SOFTAES_STRIDE], 16);
out.w3 ^= ROTL32(t[3][3][ix3[3] / SOFTAES_STRIDE], 24);
return out;
}
SoftAesBlock
softaes_block_encrypt(const SoftAesBlock block, const SoftAesBlock rk)
{
CRYPTO_ALIGN(64) SoftAesBlock out;
CRYPTO_ALIGN(64) uint8_t ix0[4], ix1[4], ix2[4], ix3[4];
const uint32_t s0 = block.w0;
const uint32_t s1 = block.w1;
const uint32_t s2 = block.w2;
const uint32_t s3 = block.w3;
ix0[0] = (uint8_t) s0;
ix0[1] = (uint8_t) s1;
ix0[2] = (uint8_t) s2;
ix0[3] = (uint8_t) s3;
ix1[0] = (uint8_t) (s1 >> 8);
ix1[1] = (uint8_t) (s2 >> 8);
ix1[2] = (uint8_t) (s3 >> 8);
ix1[3] = (uint8_t) (s0 >> 8);
ix2[0] = (uint8_t) (s2 >> 16);
ix2[1] = (uint8_t) (s3 >> 16);
ix2[2] = (uint8_t) (s0 >> 16);
ix2[3] = (uint8_t) (s1 >> 16);
ix3[0] = (uint8_t) (s3 >> 24);
ix3[1] = (uint8_t) (s0 >> 24);
ix3[2] = (uint8_t) (s1 >> 24);
ix3[3] = (uint8_t) (s2 >> 24);
out = _encrypt(ix0, ix1, ix2, ix3);
out.w0 ^= rk.w0;
out.w1 ^= rk.w1;
out.w2 ^= rk.w2;
out.w3 ^= rk.w3;
return out;
}

View File

@@ -0,0 +1,55 @@
#include "crypto_generichash_blake2b.h"
#include "randombytes.h"
size_t
crypto_generichash_blake2b_bytes_min(void) {
return crypto_generichash_blake2b_BYTES_MIN;
}
size_t
crypto_generichash_blake2b_bytes_max(void) {
return crypto_generichash_blake2b_BYTES_MAX;
}
size_t
crypto_generichash_blake2b_bytes(void) {
return crypto_generichash_blake2b_BYTES;
}
size_t
crypto_generichash_blake2b_keybytes_min(void) {
return crypto_generichash_blake2b_KEYBYTES_MIN;
}
size_t
crypto_generichash_blake2b_keybytes_max(void) {
return crypto_generichash_blake2b_KEYBYTES_MAX;
}
size_t
crypto_generichash_blake2b_keybytes(void) {
return crypto_generichash_blake2b_KEYBYTES;
}
size_t
crypto_generichash_blake2b_saltbytes(void) {
return crypto_generichash_blake2b_SALTBYTES;
}
size_t
crypto_generichash_blake2b_personalbytes(void) {
return crypto_generichash_blake2b_PERSONALBYTES;
}
size_t
crypto_generichash_blake2b_statebytes(void)
{
return (sizeof(crypto_generichash_blake2b_state) + (size_t) 63U)
& ~(size_t) 63U;
}
void
crypto_generichash_blake2b_keygen(unsigned char k[crypto_generichash_blake2b_KEYBYTES])
{
randombytes_buf(k, crypto_generichash_blake2b_KEYBYTES);
}

View File

@@ -0,0 +1,106 @@
/*
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
All code is triple-licensed under the
[CC0](http://creativecommons.org/publicdomain/zero/1.0), the
[OpenSSL Licence](https://www.openssl.org/source/license.html), or
the [Apache Public License 2.0](http://www.apache.org/licenses/LICENSE-2.0),
at your choosing.
*/
#ifndef blake2_H
#define blake2_H
#include <stddef.h>
#include <stdint.h>
#include "crypto_generichash_blake2b.h"
#include "export.h"
enum blake2b_constant {
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
#ifdef __IBMC__
# pragma pack(1)
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
# pragma pack(1)
#else
# pragma pack(push, 1)
#endif
typedef struct blake2b_param_ {
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint8_t leaf_length[4]; /* 8 */
uint8_t node_offset[8]; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
typedef struct blake2b_state {
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * 128];
size_t buflen;
uint8_t last_node;
} blake2b_state;
#ifdef __IBMC__
# pragma pack(pop)
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
# pragma pack()
#else
# pragma pack(pop)
#endif
/* Streaming API */
int blake2b_init(blake2b_state *S, const uint8_t outlen);
int blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *salt, const void *personal);
int blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
const uint8_t keylen);
int blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *key, const uint8_t keylen,
const void *salt, const void *personal);
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
int blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen);
int blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen);
/* Simple API */
int blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
const uint64_t inlen, uint8_t keylen);
int blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
const uint8_t outlen, const uint64_t inlen,
uint8_t keylen, const void *salt,
const void *personal);
typedef int (*blake2b_compress_fn)(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int blake2b_pick_best_implementation(void);
int blake2b_compress_ref(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int blake2b_compress_ssse3(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int blake2b_compress_sse41(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
int blake2b_compress_avx2(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES]);
#endif

View File

@@ -0,0 +1,49 @@
#define BLAKE2_USE_SSSE3
#define BLAKE2_USE_SSE41
#define BLAKE2_USE_AVX2
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include "private/common.h"
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && \
defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# pragma GCC target("sse4.1")
# pragma GCC target("avx2")
# endif
# include <emmintrin.h>
# include <immintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blake2b-compress-avx2.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
int
blake2b_compress_avx2(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m256i a = LOADU(&S->h[0]);
__m256i b = LOADU(&S->h[4]);
BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
STOREU(&S->h[0], a);
STOREU(&S->h[4], b);
return 0;
}
#endif

View File

@@ -0,0 +1,142 @@
#ifndef blake2b_compress_avx2_H
#define blake2b_compress_avx2_H
#define LOADU128(p) _mm_loadu_si128((const __m128i *) (p))
#define STOREU128(p, r) _mm_storeu_si128((__m128i *) (p), r)
#define LOADU(p) _mm256_loadu_si256((const __m256i *) (p))
#define STOREU(p, r) _mm256_storeu_si256((__m256i *) (p), r)
#if defined(__INTEL_COMPILER) || defined(_MSC_VER) || defined(__GNUC__)
# define LOAD(p) _mm256_load_si256((const __m256i *) (p))
# define STORE(p, r) _mm256_store_si256((__m256i *) (p), r)
#else
# define LOAD(p) LOADU(p)
# define STORE(p, r) STOREU(p, r)
#endif
static inline uint64_t
LOADU64(const void *p)
{
uint64_t v;
memcpy(&v, p, sizeof v);
return v;
}
#define ROTATE16 \
_mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \
3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
#define ROTATE24 \
_mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \
4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
#define ADD(a, b) _mm256_add_epi64(a, b)
#define SUB(a, b) _mm256_sub_epi64(a, b)
#define XOR(a, b) _mm256_xor_si256(a, b)
#define AND(a, b) _mm256_and_si256(a, b)
#define OR(a, b) _mm256_or_si256(a, b)
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
#define BLAKE2B_G1_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT32(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT24(b); \
} while (0)
#define BLAKE2B_G2_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT16(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT63(b); \
} while (0)
#define BLAKE2B_DIAG_V1(a, b, c, d) \
do { \
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(2, 1, 0, 3)); \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1, 0, 3, 2)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(0, 3, 2, 1)); \
} while(0)
#define BLAKE2B_UNDIAG_V1(a, b, c, d) \
do { \
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(0, 3, 2, 1)); \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1, 0, 3, 2)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(2, 1, 0, 3)); \
} while(0)
#include "blake2b-load-avx2.h"
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) \
do { \
__m256i b0; \
BLAKE2B_LOAD_MSG_##r##_1(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_##r##_2(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_DIAG_V1(a, b, c, d); \
BLAKE2B_LOAD_MSG_##r##_3(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_##r##_4(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_UNDIAG_V1(a, b, c, d); \
} while (0)
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \
do { \
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
} while (0)
#define DECLARE_MESSAGE_WORDS(m) \
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
__m256i t0, t1;
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \
do { \
DECLARE_MESSAGE_WORDS(m) \
const __m256i iv0 = a; \
const __m256i iv1 = b; \
__m256i c = LOAD(&blake2b_IV[0]); \
__m256i d = \
XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
a = XOR(a, c); \
b = XOR(b, d); \
a = XOR(a, iv0); \
b = XOR(b, iv1); \
} while (0)
#endif

View File

@@ -0,0 +1,93 @@
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include "private/common.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
static const uint8_t blake2b_sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
int
blake2b_compress_ref(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
{
uint64_t m[16];
uint64_t v[16];
int i;
for (i = 0; i < 16; ++i) {
m[i] = LOAD64_LE(block + i * sizeof m[i]);
}
for (i = 0; i < 8; ++i) {
v[i] = S->h[i];
}
v[8] = blake2b_IV[0];
v[9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = S->t[0] ^ blake2b_IV[4];
v[13] = S->t[1] ^ blake2b_IV[5];
v[14] = S->f[0] ^ blake2b_IV[6];
v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r, i, a, b, c, d) \
do { \
a += b + m[blake2b_sigma[r][2 * i + 0]]; \
d = ROTR64(d ^ a, 32); \
c += d; \
b = ROTR64(b ^ c, 24); \
a += b + m[blake2b_sigma[r][2 * i + 1]]; \
d = ROTR64(d ^ a, 16); \
c += d; \
b = ROTR64(b ^ c, 63); \
} while (0)
#define ROUND(r) \
do { \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while (0)
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
for (i = 0; i < 8; ++i) {
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
#undef G
#undef ROUND
return 0;
}

View File

@@ -0,0 +1,87 @@
#define BLAKE2_USE_SSSE3
#define BLAKE2_USE_SSE41
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include "private/common.h"
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
defined(HAVE_SMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# pragma GCC target("sse4.1")
# endif
# include <emmintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blake2b-compress-sse41.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
int
blake2b_compress_sse41(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
const __m128i r16 =
_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 =
_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
const __m128i m0 = LOADU(block + 00);
const __m128i m1 = LOADU(block + 16);
const __m128i m2 = LOADU(block + 32);
const __m128i m3 = LOADU(block + 48);
const __m128i m4 = LOADU(block + 64);
const __m128i m5 = LOADU(block + 80);
const __m128i m6 = LOADU(block + 96);
const __m128i m7 = LOADU(block + 112);
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
row2h = LOADU(&S->h[6]);
row3l = LOADU(&blake2b_IV[0]);
row3h = LOADU(&blake2b_IV[2]);
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
return 0;
}
#endif

View File

@@ -0,0 +1,106 @@
#ifndef blake2b_compress_sse41_H
#define blake2b_compress_sse41_H
#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
#if !(defined(_mm_roti_epi64) && defined(__XOP__))
#undef _mm_roti_epi64
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#endif
#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24);
#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63);
#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#include "blake2b-load-sse41.h"
#define ROUND(r) \
LOAD_MSG_##r##_1(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_2(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
LOAD_MSG_##r##_3(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_4(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
#endif

View File

@@ -0,0 +1,90 @@
#include <stdint.h>
#include <string.h>
#include "blake2.h"
#include "private/common.h"
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# endif
# include <emmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blake2b-compress-ssse3.h"
CRYPTO_ALIGN(64)
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
int
blake2b_compress_ssse3(blake2b_state *S,
const uint8_t block[BLAKE2B_BLOCKBYTES])
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
const __m128i r16 =
_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 =
_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
const uint64_t m0 = ((const uint64_t *) block)[0];
const uint64_t m1 = ((const uint64_t *) block)[1];
const uint64_t m2 = ((const uint64_t *) block)[2];
const uint64_t m3 = ((const uint64_t *) block)[3];
const uint64_t m4 = ((const uint64_t *) block)[4];
const uint64_t m5 = ((const uint64_t *) block)[5];
const uint64_t m6 = ((const uint64_t *) block)[6];
const uint64_t m7 = ((const uint64_t *) block)[7];
const uint64_t m8 = ((const uint64_t *) block)[8];
const uint64_t m9 = ((const uint64_t *) block)[9];
const uint64_t m10 = ((const uint64_t *) block)[10];
const uint64_t m11 = ((const uint64_t *) block)[11];
const uint64_t m12 = ((const uint64_t *) block)[12];
const uint64_t m13 = ((const uint64_t *) block)[13];
const uint64_t m14 = ((const uint64_t *) block)[14];
const uint64_t m15 = ((const uint64_t *) block)[15];
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
row2h = LOADU(&S->h[6]);
row3l = LOADU(&blake2b_IV[0]);
row3h = LOADU(&blake2b_IV[2]);
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
return 0;
}
#endif

View File

@@ -0,0 +1,106 @@
#ifndef blake2b_compress_ssse3_H
#define blake2b_compress_ssse3_H
#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
#if !(defined(_mm_roti_epi64) && defined(__XOP__))
#undef _mm_roti_epi64
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#endif
#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24);
#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63);
#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#include "blake2b-load-sse2.h"
#define ROUND(r) \
LOAD_MSG_##r##_1(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_2(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
LOAD_MSG_##r##_3(b0, b1); \
G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
LOAD_MSG_##r##_4(b0, b1); \
G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
#endif

View File

@@ -0,0 +1,340 @@
#ifndef blake2b_load_avx2_H
#define blake2b_load_avx2_H
#define BLAKE2B_LOAD_MSG_0_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m4); \
t1 = _mm256_unpacklo_epi64(m5, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_0_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_unpackhi_epi64(m5, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m0); \
t1 = _mm256_blend_epi32(m5, m0, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_1_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m1, 8); \
t1 = _mm256_blend_epi32(m3, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_1(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m5, 8); \
t1 = _mm256_unpackhi_epi64(m2, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m0); \
t1 = _mm256_blend_epi32(m6, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_3(b0) \
do { \
t0 = _mm256_alignr_epi8(m5, m4, 8); \
t1 = _mm256_unpackhi_epi64(m1, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_2_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m2, m7); \
t1 = _mm256_blend_epi32(m0, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m3, m1); \
t1 = _mm256_unpackhi_epi64(m6, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m0); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_3(b0) \
do { \
t0 = _mm256_alignr_epi8(m1, m7, 8); \
t1 = _mm256_shuffle_epi32(m2, _MM_SHUFFLE(1, 0, 3, 2)); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_3_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m3); \
t1 = _mm256_unpacklo_epi64(m5, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m2); \
t1 = _mm256_unpacklo_epi64(m1, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_2(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m0, 0x33); \
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_3(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m1, 8); \
t1 = _mm256_alignr_epi8(m3, m5, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_4_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m0); \
t1 = _mm256_unpacklo_epi64(m6, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m3); \
t1 = _mm256_unpacklo_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m5); \
t1 = _mm256_unpackhi_epi64(m5, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_3(b0) \
do { \
t0 = _mm256_alignr_epi8(m2, m0, 8); \
t1 = _mm256_unpackhi_epi64(m3, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_5_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m6); \
t1 = _mm256_alignr_epi8(m7, m2, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_1(b0) \
do { \
t0 = _mm256_blend_epi32(m0, m6, 0x33); \
t1 = _mm256_unpacklo_epi64(m7, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m7); \
t1 = _mm256_alignr_epi8(m5, m6, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m0); \
t1 = _mm256_blend_epi32(m4, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_6_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m5, m3); \
t1 = _mm256_shuffle_epi32(m1, _MM_SHUFFLE(1, 0, 3, 2)); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m3); \
t1 = _mm256_blend_epi32(m1, m6, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_2(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m5, 8); \
t1 = _mm256_unpackhi_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_3(b0) \
do { \
t0 = _mm256_blend_epi32(m2, m1, 0x33); \
t1 = _mm256_alignr_epi8(m4, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_7_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m0); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m3, m7); \
t1 = _mm256_alignr_epi8(m0, m5, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_alignr_epi8(m4, m1, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m6); \
t1 = _mm256_unpackhi_epi64(m6, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_8_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m1, m2, 8); \
t1 = _mm256_alignr_epi8(m2, m3, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_unpackhi_epi64(m3, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m2); \
t1 = _mm256_blend_epi32(m2, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m7); \
t1 = _mm256_unpackhi_epi64(m4, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_9_4(b0) \
do { \
t0 = _mm256_blend_epi32(m5, m0, 0x33); \
t1 = _mm256_unpacklo_epi64(m7, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m4); \
t1 = _mm256_unpacklo_epi64(m5, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_10_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_unpackhi_epi64(m5, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m0); \
t1 = _mm256_blend_epi32(m5, m0, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#define BLAKE2B_LOAD_MSG_11_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m1, 8); \
t1 = _mm256_blend_epi32(m3, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while (0)
#endif

View File

@@ -0,0 +1,164 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef blake2b_load_sse2_H
#define blake2b_load_sse2_H
#define LOAD_MSG_0_1(b0, b1) \
b0 = _mm_set_epi64x(m2, m0); \
b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) \
b0 = _mm_set_epi64x(m3, m1); \
b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) \
b0 = _mm_set_epi64x(m10, m8); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) \
b0 = _mm_set_epi64x(m11, m9); \
b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) \
b0 = _mm_set_epi64x(m4, m14); \
b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) \
b0 = _mm_set_epi64x(m0, m1); \
b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) \
b0 = _mm_set_epi64x(m2, m12); \
b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) \
b0 = _mm_set_epi64x(m12, m11); \
b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) \
b0 = _mm_set_epi64x(m0, m8); \
b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) \
b0 = _mm_set_epi64x(m3, m10); \
b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) \
b0 = _mm_set_epi64x(m6, m14); \
b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) \
b0 = _mm_set_epi64x(m3, m7); \
b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) \
b0 = _mm_set_epi64x(m1, m9); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) \
b0 = _mm_set_epi64x(m5, m2); \
b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) \
b0 = _mm_set_epi64x(m10, m6); \
b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) \
b0 = _mm_set_epi64x(m5, m9); \
b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) \
b0 = _mm_set_epi64x(m7, m0); \
b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) \
b0 = _mm_set_epi64x(m11, m14); \
b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) \
b0 = _mm_set_epi64x(m12, m1); \
b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) \
b0 = _mm_set_epi64x(m6, m2); \
b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) \
b0 = _mm_set_epi64x(m10, m12); \
b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) \
b0 = _mm_set_epi64x(m7, m4); \
b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) \
b0 = _mm_set_epi64x(m5, m13); \
b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) \
b0 = _mm_set_epi64x(m1, m12); \
b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) \
b0 = _mm_set_epi64x(m15, m5); \
b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) \
b0 = _mm_set_epi64x(m6, m0); \
b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) \
b0 = _mm_set_epi64x(m3, m7); \
b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) \
b0 = _mm_set_epi64x(m7, m13); \
b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) \
b0 = _mm_set_epi64x(m14, m11); \
b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) \
b0 = _mm_set_epi64x(m15, m5); \
b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) \
b0 = _mm_set_epi64x(m4, m0); \
b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) \
b0 = _mm_set_epi64x(m14, m6); \
b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) \
b0 = _mm_set_epi64x(m9, m15); \
b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) \
b0 = _mm_set_epi64x(m13, m12); \
b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) \
b0 = _mm_set_epi64x(m7, m2); \
b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) \
b0 = _mm_set_epi64x(m4, m2); \
b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) \
b0 = _mm_set_epi64x(m9, m15); \
b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) \
b0 = _mm_set_epi64x(m14, m11); \
b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) \
b0 = _mm_set_epi64x(m2, m0); \
b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) \
b0 = _mm_set_epi64x(m3, m1); \
b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) \
b0 = _mm_set_epi64x(m10, m8); \
b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) \
b0 = _mm_set_epi64x(m11, m9); \
b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) \
b0 = _mm_set_epi64x(m4, m14); \
b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) \
b0 = _mm_set_epi64x(m8, m10); \
b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) \
b0 = _mm_set_epi64x(m0, m1); \
b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) \
b0 = _mm_set_epi64x(m2, m12); \
b1 = _mm_set_epi64x(m3, m7)
#endif

View File

@@ -0,0 +1,307 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef blake2b_load_sse41_H
#define blake2b_load_sse41_H
#define LOAD_MSG_0_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_0_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_0_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_0_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_1_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while (0)
#define LOAD_MSG_1_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while (0)
#define LOAD_MSG_1_3(b0, b1) \
do { \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while (0)
#define LOAD_MSG_1_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while (0)
#define LOAD_MSG_2_1(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while (0)
#define LOAD_MSG_2_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while (0)
#define LOAD_MSG_2_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while (0)
#define LOAD_MSG_2_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while (0)
#define LOAD_MSG_3_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while (0)
#define LOAD_MSG_3_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_3_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while (0)
#define LOAD_MSG_3_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_4_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while (0)
#define LOAD_MSG_4_2(b0, b1) \
do { \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while (0)
#define LOAD_MSG_4_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while (0)
#define LOAD_MSG_4_4(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while (0)
#define LOAD_MSG_5_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_5_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while (0)
#define LOAD_MSG_5_3(b0, b1) \
do { \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while (0)
#define LOAD_MSG_5_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while (0)
#define LOAD_MSG_6_1(b0, b1) \
do { \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while (0)
#define LOAD_MSG_6_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while (0)
#define LOAD_MSG_6_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
} while (0)
#define LOAD_MSG_6_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while (0)
#define LOAD_MSG_7_1(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while (0)
#define LOAD_MSG_7_2(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while (0)
#define LOAD_MSG_7_3(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while (0)
#define LOAD_MSG_7_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while (0)
#define LOAD_MSG_8_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while (0)
#define LOAD_MSG_8_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while (0)
#define LOAD_MSG_8_3(b0, b1) \
do { \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while (0)
#define LOAD_MSG_8_4(b0, b1) \
do { \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while (0)
#define LOAD_MSG_9_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while (0)
#define LOAD_MSG_9_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while (0)
#define LOAD_MSG_9_3(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while (0)
#define LOAD_MSG_9_4(b0, b1) \
do { \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while (0)
#define LOAD_MSG_10_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_10_2(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while (0)
#define LOAD_MSG_10_3(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_10_4(b0, b1) \
do { \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while (0)
#define LOAD_MSG_11_1(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while (0)
#define LOAD_MSG_11_2(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while (0)
#define LOAD_MSG_11_3(b0, b1) \
do { \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while (0)
#define LOAD_MSG_11_4(b0, b1) \
do { \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while (0)
#endif

View File

@@ -0,0 +1,438 @@
/*
BLAKE2 reference source code package - C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with
this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#include "core.h"
#include "private/common.h"
#include "runtime.h"
#include "utils.h"
static blake2b_compress_fn blake2b_compress = blake2b_compress_ref;
static const uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/* LCOV_EXCL_START */
static inline int
blake2b_set_lastnode(blake2b_state *S)
{
S->f[1] = -1;
return 0;
}
/* LCOV_EXCL_STOP */
static inline int
blake2b_is_lastblock(const blake2b_state *S)
{
return S->f[0] != 0;
}
static inline int
blake2b_set_lastblock(blake2b_state *S)
{
if (S->last_node) {
blake2b_set_lastnode(S);
}
S->f[0] = -1;
return 0;
}
static inline int
blake2b_increment_counter(blake2b_state *S, const uint64_t inc)
{
#ifdef HAVE_TI_MODE
uint128_t t = ((uint128_t) S->t[1] << 64) | S->t[0];
t += inc;
S->t[0] = (uint64_t)(t >> 0);
S->t[1] = (uint64_t)(t >> 64);
#else
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
#endif
return 0;
}
/* Parameter-related functions */
static inline int
blake2b_param_set_salt(blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES])
{
memcpy(P->salt, salt, BLAKE2B_SALTBYTES);
return 0;
}
static inline int
blake2b_param_set_personal(blake2b_param *P,
const uint8_t personal[BLAKE2B_PERSONALBYTES])
{
memcpy(P->personal, personal, BLAKE2B_PERSONALBYTES);
return 0;
}
static inline int
blake2b_init0(blake2b_state *S)
{
int i;
for (i = 0; i < 8; i++) {
S->h[i] = blake2b_IV[i];
}
/* zero everything between .t and .last_node */
memset((void *) &S->t, 0,
offsetof(blake2b_state, last_node) + sizeof(S->last_node)
- offsetof(blake2b_state, t));
return 0;
}
/* init xors IV with input parameter block */
int
blake2b_init_param(blake2b_state *S, const blake2b_param *P)
{
size_t i;
const uint8_t *p;
COMPILER_ASSERT(sizeof *P == 64);
blake2b_init0(S);
p = (const uint8_t *) (P);
/* IV XOR ParamBlock */
for (i = 0; i < 8; i++) {
S->h[i] ^= LOAD64_LE(p + sizeof(S->h[i]) * i);
}
return 0;
}
int
blake2b_init(blake2b_state *S, const uint8_t outlen)
{
blake2b_param P[1];
if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
memset(P->salt, 0, sizeof(P->salt));
memset(P->personal, 0, sizeof(P->personal));
return blake2b_init_param(S, P);
}
int
blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *salt, const void *personal)
{
blake2b_param P[1];
if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
sodium_misuse();
}
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
if (salt != NULL) {
blake2b_param_set_salt(P, (const uint8_t *) salt);
} else {
memset(P->salt, 0, sizeof(P->salt));
}
if (personal != NULL) {
blake2b_param_set_personal(P, (const uint8_t *) personal);
} else {
memset(P->personal, 0, sizeof(P->personal));
}
return blake2b_init_param(S, P);
}
int
blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
const uint8_t keylen)
{
blake2b_param P[1];
if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
sodium_misuse();
}
if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) {
sodium_misuse(); /* does not return */
}
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
memset(P->salt, 0, sizeof(P->salt));
memset(P->personal, 0, sizeof(P->personal));
if (blake2b_init_param(S, P) < 0) {
sodium_misuse();
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen); /* key and keylen cannot be 0 */
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
}
return 0;
}
int
blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
const void *key, const uint8_t keylen,
const void *salt, const void *personal)
{
blake2b_param P[1];
if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
sodium_misuse();
}
if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) {
sodium_misuse(); /* does not return */
}
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
STORE32_LE(P->leaf_length, 0);
STORE64_LE(P->node_offset, 0);
P->node_depth = 0;
P->inner_length = 0;
memset(P->reserved, 0, sizeof(P->reserved));
if (salt != NULL) {
blake2b_param_set_salt(P, (const uint8_t *) salt);
} else {
memset(P->salt, 0, sizeof(P->salt));
}
if (personal != NULL) {
blake2b_param_set_personal(P, (const uint8_t *) personal);
} else {
memset(P->personal, 0, sizeof(P->personal));
}
if (blake2b_init_param(S, P) < 0) {
sodium_misuse();
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen); /* key and keylen cannot be 0 */
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
}
return 0;
}
/* inlen now in bytes */
int
blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen)
{
while (inlen > 0) {
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if (inlen > fill) {
memcpy(S->buf + left, in, fill); /* Fill buffer */
S->buflen += fill;
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf); /* Compress */
memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES,
BLAKE2B_BLOCKBYTES); /* Shift buffer left */
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
} else /* inlen <= fill */
{
memcpy(S->buf + left, in, inlen);
S->buflen += inlen; /* Be lazy, do not compress */
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int
blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen)
{
unsigned char buffer[BLAKE2B_OUTBYTES];
if (!outlen || outlen > BLAKE2B_OUTBYTES) {
sodium_misuse();
}
if (blake2b_is_lastblock(S)) {
return -1;
}
if (S->buflen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen -= BLAKE2B_BLOCKBYTES;
assert(S->buflen <= BLAKE2B_BLOCKBYTES);
memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen);
}
blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S);
memset(S->buf + S->buflen, 0,
2 * BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf);
COMPILER_ASSERT(sizeof buffer == 64U);
STORE64_LE(buffer + 8 * 0, S->h[0]);
STORE64_LE(buffer + 8 * 1, S->h[1]);
STORE64_LE(buffer + 8 * 2, S->h[2]);
STORE64_LE(buffer + 8 * 3, S->h[3]);
STORE64_LE(buffer + 8 * 4, S->h[4]);
STORE64_LE(buffer + 8 * 5, S->h[5]);
STORE64_LE(buffer + 8 * 6, S->h[6]);
STORE64_LE(buffer + 8 * 7, S->h[7]);
memcpy(out, buffer, outlen); /* outlen <= BLAKE2B_OUTBYTES (64) */
sodium_memzero(S->h, sizeof S->h);
sodium_memzero(S->buf, sizeof S->buf);
return 0;
}
/* inlen, at least, should be uint64_t. Others can be size_t. */
int
blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
const uint64_t inlen, uint8_t keylen)
{
CRYPTO_ALIGN(64) blake2b_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0) {
sodium_misuse();
}
if (NULL == out) {
sodium_misuse();
}
if (!outlen || outlen > BLAKE2B_OUTBYTES) {
sodium_misuse();
}
if (NULL == key && keylen > 0) {
sodium_misuse();
}
if (keylen > BLAKE2B_KEYBYTES) {
sodium_misuse();
}
if (keylen > 0) {
if (blake2b_init_key(S, outlen, key, keylen) < 0) {
sodium_misuse();
}
} else {
if (blake2b_init(S, outlen) < 0) {
sodium_misuse();
}
}
blake2b_update(S, (const uint8_t *) in, inlen);
blake2b_final(S, out, outlen);
return 0;
}
int
blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
const uint8_t outlen, const uint64_t inlen,
uint8_t keylen, const void *salt, const void *personal)
{
CRYPTO_ALIGN(64) blake2b_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0) {
sodium_misuse();
}
if (NULL == out) {
sodium_misuse();
}
if (!outlen || outlen > BLAKE2B_OUTBYTES) {
sodium_misuse();
}
if (NULL == key && keylen > 0) {
sodium_misuse();
}
if (keylen > BLAKE2B_KEYBYTES) {
sodium_misuse();
}
if (keylen > 0) {
if (blake2b_init_key_salt_personal(S, outlen, key, keylen, salt,
personal) < 0) {
sodium_misuse();
}
} else {
if (blake2b_init_salt_personal(S, outlen, salt, personal) < 0) {
sodium_misuse();
}
}
blake2b_update(S, (const uint8_t *) in, inlen);
blake2b_final(S, out, outlen);
return 0;
}
int
blake2b_pick_best_implementation(void)
{
/* LCOV_EXCL_START */
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
defined(HAVE_SMMINTRIN_H)
if (sodium_runtime_has_avx2()) {
blake2b_compress = blake2b_compress_avx2;
return 0;
}
#endif
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
defined(HAVE_SMMINTRIN_H)
if (sodium_runtime_has_sse41()) {
blake2b_compress = blake2b_compress_sse41;
return 0;
}
#endif
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
if (sodium_runtime_has_ssse3()) {
blake2b_compress = blake2b_compress_ssse3;
return 0;
}
#endif
blake2b_compress = blake2b_compress_ref;
return 0;
/* LCOV_EXCL_STOP */
}

View File

@@ -0,0 +1,116 @@
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include "blake2.h"
#include "crypto_generichash_blake2b.h"
#include "private/common.h"
#include "private/implementations.h"
int
crypto_generichash_blake2b(unsigned char *out, size_t outlen,
const unsigned char *in, unsigned long long inlen,
const unsigned char *key, size_t keylen)
{
if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
keylen > BLAKE2B_KEYBYTES || inlen > UINT64_MAX) {
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
return blake2b((uint8_t *) out, in, key, (uint8_t) outlen, (uint64_t) inlen,
(uint8_t) keylen);
}
int
crypto_generichash_blake2b_salt_personal(
unsigned char *out, size_t outlen, const unsigned char *in,
unsigned long long inlen, const unsigned char *key, size_t keylen,
const unsigned char *salt, const unsigned char *personal)
{
if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
keylen > BLAKE2B_KEYBYTES || inlen > UINT64_MAX) {
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
return blake2b_salt_personal((uint8_t *) out, in, key, (uint8_t) outlen,
(uint64_t) inlen, (uint8_t) keylen, salt,
personal);
}
int
crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state,
const unsigned char *key, const size_t keylen,
const size_t outlen)
{
if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
keylen > BLAKE2B_KEYBYTES) {
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
COMPILER_ASSERT(sizeof(blake2b_state) <= sizeof *state);
if (key == NULL || keylen <= 0U) {
if (blake2b_init((blake2b_state *) (void *) state, (uint8_t) outlen) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
} else if (blake2b_init_key((blake2b_state *) (void *) state, (uint8_t) outlen, key,
(uint8_t) keylen) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
return 0;
}
int
crypto_generichash_blake2b_init_salt_personal(
crypto_generichash_blake2b_state *state, const unsigned char *key,
const size_t keylen, const size_t outlen, const unsigned char *salt,
const unsigned char *personal)
{
if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
keylen > BLAKE2B_KEYBYTES) {
return -1;
}
assert(outlen <= UINT8_MAX);
assert(keylen <= UINT8_MAX);
if (key == NULL || keylen <= 0U) {
if (blake2b_init_salt_personal((blake2b_state *) (void *) state,
(uint8_t) outlen, salt, personal) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
} else if (blake2b_init_key_salt_personal((blake2b_state *) (void *) state,
(uint8_t) outlen, key,
(uint8_t) keylen, salt,
personal) != 0) {
return -1; /* LCOV_EXCL_LINE */
}
return 0;
}
int
crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return blake2b_update((blake2b_state *) (void *) state,
(const uint8_t *) in, (uint64_t) inlen);
}
int
crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state,
unsigned char *out, const size_t outlen)
{
assert(outlen <= UINT8_MAX);
return blake2b_final((blake2b_state *) (void *) state,
(uint8_t *) out, (uint8_t) outlen);
}
int
_crypto_generichash_blake2b_pick_best_implementation(void)
{
return blake2b_pick_best_implementation();
}

View File

@@ -0,0 +1,91 @@
#include "crypto_generichash.h"
#include "randombytes.h"
size_t
crypto_generichash_bytes_min(void)
{
return crypto_generichash_BYTES_MIN;
}
size_t
crypto_generichash_bytes_max(void)
{
return crypto_generichash_BYTES_MAX;
}
size_t
crypto_generichash_bytes(void)
{
return crypto_generichash_BYTES;
}
size_t
crypto_generichash_keybytes_min(void)
{
return crypto_generichash_KEYBYTES_MIN;
}
size_t
crypto_generichash_keybytes_max(void)
{
return crypto_generichash_KEYBYTES_MAX;
}
size_t
crypto_generichash_keybytes(void)
{
return crypto_generichash_KEYBYTES;
}
const char *
crypto_generichash_primitive(void)
{
return crypto_generichash_PRIMITIVE;
}
size_t
crypto_generichash_statebytes(void)
{
return (sizeof(crypto_generichash_state) + (size_t) 63U) & ~(size_t) 63U;
}
int
crypto_generichash(unsigned char *out, size_t outlen, const unsigned char *in,
unsigned long long inlen, const unsigned char *key,
size_t keylen)
{
return crypto_generichash_blake2b(out, outlen, in, inlen, key, keylen);
}
int
crypto_generichash_init(crypto_generichash_state *state,
const unsigned char *key,
const size_t keylen, const size_t outlen)
{
return crypto_generichash_blake2b_init
((crypto_generichash_blake2b_state *) state, key, keylen, outlen);
}
int
crypto_generichash_update(crypto_generichash_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return crypto_generichash_blake2b_update
((crypto_generichash_blake2b_state *) state, in, inlen);
}
int
crypto_generichash_final(crypto_generichash_state *state,
unsigned char *out, const size_t outlen)
{
return crypto_generichash_blake2b_final
((crypto_generichash_blake2b_state *) state, out, outlen);
}
void
crypto_generichash_keygen(unsigned char k[crypto_generichash_KEYBYTES])
{
randombytes_buf(k, crypto_generichash_KEYBYTES);
}

View File

@@ -0,0 +1,20 @@
#include "crypto_hash.h"
size_t
crypto_hash_bytes(void)
{
return crypto_hash_BYTES;
}
int
crypto_hash(unsigned char *out, const unsigned char *in,
unsigned long long inlen)
{
return crypto_hash_sha512(out, in, inlen);
}
const char *
crypto_hash_primitive(void) {
return crypto_hash_PRIMITIVE;
}

View File

@@ -0,0 +1,256 @@
/*-
* Copyright 2005,2007,2009 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "crypto_hash_sha256.h"
#include "private/common.h"
#include "utils.h"
static void
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++) {
STORE32_BE(dst + i * 4, src[i]);
}
}
static void
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++) {
dst[i] = LOAD32_BE(src + i * 4);
}
}
static const uint32_t Krnd[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ROTR32(x, n)
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
#define RND(a, b, c, d, e, f, g, h, k) \
h += S1(e) + Ch(e, f, g) + k; \
d += h; \
h += S0(a) + Maj(a, b, c);
#define RNDr(S, W, i, ii) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], S[(70 - i) % 8], S[(71 - i) % 8], \
W[i + ii] + Krnd[i + ii])
#define MSCH(W, ii, i) \
W[i + ii + 16] = \
s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
static void
SHA256_Transform(uint32_t state[8], const uint8_t block[64], uint32_t W[64],
uint32_t S[8])
{
int i;
be32dec_vect(W, block, 64);
memcpy(S, state, 32);
for (i = 0; i < 64; i += 16) {
RNDr(S, W, 0, i);
RNDr(S, W, 1, i);
RNDr(S, W, 2, i);
RNDr(S, W, 3, i);
RNDr(S, W, 4, i);
RNDr(S, W, 5, i);
RNDr(S, W, 6, i);
RNDr(S, W, 7, i);
RNDr(S, W, 8, i);
RNDr(S, W, 9, i);
RNDr(S, W, 10, i);
RNDr(S, W, 11, i);
RNDr(S, W, 12, i);
RNDr(S, W, 13, i);
RNDr(S, W, 14, i);
RNDr(S, W, 15, i);
if (i == 48) {
break;
}
MSCH(W, 0, i);
MSCH(W, 1, i);
MSCH(W, 2, i);
MSCH(W, 3, i);
MSCH(W, 4, i);
MSCH(W, 5, i);
MSCH(W, 6, i);
MSCH(W, 7, i);
MSCH(W, 8, i);
MSCH(W, 9, i);
MSCH(W, 10, i);
MSCH(W, 11, i);
MSCH(W, 12, i);
MSCH(W, 13, i);
MSCH(W, 14, i);
MSCH(W, 15, i);
}
for (i = 0; i < 8; i++) {
state[i] += S[i];
}
}
static const uint8_t PAD[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
static void
SHA256_Pad(crypto_hash_sha256_state *state, uint32_t tmp32[64 + 8])
{
unsigned int r;
unsigned int i;
ACQUIRE_FENCE;
r = (unsigned int) ((state->count >> 3) & 0x3f);
if (r < 56) {
for (i = 0; i < 56 - r; i++) {
state->buf[r + i] = PAD[i];
}
} else {
for (i = 0; i < 64 - r; i++) {
state->buf[r + i] = PAD[i];
}
SHA256_Transform(state->state, state->buf, &tmp32[0], &tmp32[64]);
memset(&state->buf[0], 0, 56);
}
STORE64_BE(&state->buf[56], state->count);
SHA256_Transform(state->state, state->buf, &tmp32[0], &tmp32[64]);
}
int
crypto_hash_sha256_init(crypto_hash_sha256_state *state)
{
static const uint32_t sha256_initial_state[8] = { 0x6a09e667, 0xbb67ae85,
0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c,
0x1f83d9ab, 0x5be0cd19 };
state->count = (uint64_t) 0U;
memcpy(state->state, sha256_initial_state, sizeof sha256_initial_state);
return 0;
}
int
crypto_hash_sha256_update(crypto_hash_sha256_state *state,
const unsigned char *in, unsigned long long inlen)
{
uint32_t tmp32[64 + 8];
unsigned long long i;
unsigned long long r;
if (inlen <= 0U) {
return 0;
}
ACQUIRE_FENCE;
r = (unsigned long long) ((state->count >> 3) & 0x3f);
state->count += ((uint64_t) inlen) << 3;
if (inlen < 64 - r) {
for (i = 0; i < inlen; i++) {
state->buf[r + i] = in[i];
}
return 0;
}
for (i = 0; i < 64 - r; i++) {
state->buf[r + i] = in[i];
}
SHA256_Transform(state->state, state->buf, &tmp32[0], &tmp32[64]);
in += 64 - r;
inlen -= 64 - r;
while (inlen >= 64) {
SHA256_Transform(state->state, in, &tmp32[0], &tmp32[64]);
in += 64;
inlen -= 64;
}
inlen &= 63;
for (i = 0; i < inlen; i++) {
state->buf[i] = in[i];
}
sodium_memzero((void *) tmp32, sizeof tmp32);
return 0;
}
int
crypto_hash_sha256_final(crypto_hash_sha256_state *state, unsigned char *out)
{
uint32_t tmp32[64 + 8];
SHA256_Pad(state, tmp32);
be32enc_vect(out, state->state, 32);
sodium_memzero((void *) tmp32, sizeof tmp32);
sodium_memzero((void *) state, sizeof *state);
return 0;
}
int
crypto_hash_sha256(unsigned char *out, const unsigned char *in,
unsigned long long inlen)
{
crypto_hash_sha256_state state;
crypto_hash_sha256_init(&state);
crypto_hash_sha256_update(&state, in, inlen);
crypto_hash_sha256_final(&state, out);
return 0;
}

View File

@@ -0,0 +1,13 @@
#include "crypto_hash_sha256.h"
size_t
crypto_hash_sha256_bytes(void)
{
return crypto_hash_sha256_BYTES;
}
size_t
crypto_hash_sha256_statebytes(void)
{
return sizeof(crypto_hash_sha256_state);
}

View File

@@ -0,0 +1,284 @@
/*-
* Copyright 2005,2007,2009 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "crypto_hash_sha512.h"
#include "private/common.h"
#include "utils.h"
static void
be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
{
size_t i;
for (i = 0; i < len / 8; i++) {
STORE64_BE(dst + i * 8, src[i]);
}
}
static void
be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
{
size_t i;
for (i = 0; i < len / 8; i++) {
dst[i] = LOAD64_BE(src + i * 8);
}
}
static const uint64_t Krnd[80] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL,
0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL,
0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL,
0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL,
0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL,
0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL,
0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL,
0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL,
0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL,
0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL,
0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL,
0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL,
0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ROTR64(x, n)
#define S0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
#define S1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
#define s0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
#define s1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
#define RND(a, b, c, d, e, f, g, h, k) \
h += S1(e) + Ch(e, f, g) + k; \
d += h; \
h += S0(a) + Maj(a, b, c);
#define RNDr(S, W, i, ii) \
RND(S[(80 - i) % 8], S[(81 - i) % 8], S[(82 - i) % 8], S[(83 - i) % 8], \
S[(84 - i) % 8], S[(85 - i) % 8], S[(86 - i) % 8], S[(87 - i) % 8], \
W[i + ii] + Krnd[i + ii])
#define MSCH(W, ii, i) \
W[i + ii + 16] = \
s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
static void
SHA512_Transform(uint64_t *state, const uint8_t block[128], uint64_t W[80],
uint64_t S[8])
{
int i;
be64dec_vect(W, block, 128);
memcpy(S, state, 64);
for (i = 0; i < 80; i += 16) {
RNDr(S, W, 0, i);
RNDr(S, W, 1, i);
RNDr(S, W, 2, i);
RNDr(S, W, 3, i);
RNDr(S, W, 4, i);
RNDr(S, W, 5, i);
RNDr(S, W, 6, i);
RNDr(S, W, 7, i);
RNDr(S, W, 8, i);
RNDr(S, W, 9, i);
RNDr(S, W, 10, i);
RNDr(S, W, 11, i);
RNDr(S, W, 12, i);
RNDr(S, W, 13, i);
RNDr(S, W, 14, i);
RNDr(S, W, 15, i);
if (i == 64) {
break;
}
MSCH(W, 0, i);
MSCH(W, 1, i);
MSCH(W, 2, i);
MSCH(W, 3, i);
MSCH(W, 4, i);
MSCH(W, 5, i);
MSCH(W, 6, i);
MSCH(W, 7, i);
MSCH(W, 8, i);
MSCH(W, 9, i);
MSCH(W, 10, i);
MSCH(W, 11, i);
MSCH(W, 12, i);
MSCH(W, 13, i);
MSCH(W, 14, i);
MSCH(W, 15, i);
}
for (i = 0; i < 8; i++) {
state[i] += S[i];
}
}
static const uint8_t PAD[128] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static void
SHA512_Pad(crypto_hash_sha512_state *state, uint64_t tmp64[80 + 8])
{
unsigned int r;
unsigned int i;
ACQUIRE_FENCE;
r = (unsigned int) ((state->count[1] >> 3) & 0x7f);
if (r < 112) {
for (i = 0; i < 112 - r; i++) {
state->buf[r + i] = PAD[i];
}
} else {
for (i = 0; i < 128 - r; i++) {
state->buf[r + i] = PAD[i];
}
SHA512_Transform(state->state, state->buf, &tmp64[0], &tmp64[80]);
memset(&state->buf[0], 0, 112);
}
be64enc_vect(&state->buf[112], state->count, 16);
SHA512_Transform(state->state, state->buf, &tmp64[0], &tmp64[80]);
}
int
crypto_hash_sha512_init(crypto_hash_sha512_state *state)
{
static const uint64_t sha512_initial_state[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
state->count[0] = state->count[1] = (uint64_t) 0U;
memcpy(state->state, sha512_initial_state, sizeof sha512_initial_state);
return 0;
}
int
crypto_hash_sha512_update(crypto_hash_sha512_state *state,
const unsigned char *in, unsigned long long inlen)
{
uint64_t tmp64[80 + 8];
uint64_t bitlen[2];
unsigned long long i;
unsigned long long r;
if (inlen <= 0U) {
return 0;
}
ACQUIRE_FENCE;
r = (unsigned long long) ((state->count[1] >> 3) & 0x7f);
bitlen[1] = ((uint64_t) inlen) << 3;
bitlen[0] = ((uint64_t) inlen) >> 61;
/* LCOV_EXCL_START */
if ((state->count[1] += bitlen[1]) < bitlen[1]) {
state->count[0]++;
}
/* LCOV_EXCL_STOP */
state->count[0] += bitlen[0];
if (inlen < 128 - r) {
for (i = 0; i < inlen; i++) {
state->buf[r + i] = in[i];
}
return 0;
}
for (i = 0; i < 128 - r; i++) {
state->buf[r + i] = in[i];
}
SHA512_Transform(state->state, state->buf, &tmp64[0], &tmp64[80]);
in += 128 - r;
inlen -= 128 - r;
while (inlen >= 128) {
SHA512_Transform(state->state, in, &tmp64[0], &tmp64[80]);
in += 128;
inlen -= 128;
}
inlen &= 127;
for (i = 0; i < inlen; i++) {
state->buf[i] = in[i];
}
sodium_memzero((void *) tmp64, sizeof tmp64);
return 0;
}
int
crypto_hash_sha512_final(crypto_hash_sha512_state *state, unsigned char *out)
{
uint64_t tmp64[80 + 8];
SHA512_Pad(state, tmp64);
be64enc_vect(out, state->state, 64);
sodium_memzero((void *) tmp64, sizeof tmp64);
sodium_memzero((void *) state, sizeof *state);
return 0;
}
int
crypto_hash_sha512(unsigned char *out, const unsigned char *in,
unsigned long long inlen)
{
crypto_hash_sha512_state state;
crypto_hash_sha512_init(&state);
crypto_hash_sha512_update(&state, in, inlen);
crypto_hash_sha512_final(&state, out);
return 0;
}

View File

@@ -0,0 +1,13 @@
#include "crypto_hash_sha512.h"
size_t
crypto_hash_sha512_bytes(void)
{
return crypto_hash_sha512_BYTES;
}
size_t
crypto_hash_sha512_statebytes(void)
{
return sizeof(crypto_hash_sha512_state);
}

View File

@@ -0,0 +1,52 @@
#include <errno.h>
#include "crypto_kdf_blake2b.h"
#include "crypto_generichash_blake2b.h"
#include "private/common.h"
size_t
crypto_kdf_blake2b_bytes_min(void)
{
return crypto_kdf_blake2b_BYTES_MIN;
}
size_t
crypto_kdf_blake2b_bytes_max(void)
{
return crypto_kdf_blake2b_BYTES_MAX;
}
size_t
crypto_kdf_blake2b_contextbytes(void)
{
return crypto_kdf_blake2b_CONTEXTBYTES;
}
size_t
crypto_kdf_blake2b_keybytes(void)
{
return crypto_kdf_blake2b_KEYBYTES;
}
int crypto_kdf_blake2b_derive_from_key(unsigned char *subkey, size_t subkey_len,
uint64_t subkey_id,
const char ctx[crypto_kdf_blake2b_CONTEXTBYTES],
const unsigned char key[crypto_kdf_blake2b_KEYBYTES])
{
unsigned char ctx_padded[crypto_generichash_blake2b_PERSONALBYTES];
unsigned char salt[crypto_generichash_blake2b_SALTBYTES];
memcpy(ctx_padded, ctx, crypto_kdf_blake2b_CONTEXTBYTES);
memset(ctx_padded + crypto_kdf_blake2b_CONTEXTBYTES, 0, sizeof ctx_padded - crypto_kdf_blake2b_CONTEXTBYTES);
STORE64_LE(salt, subkey_id);
memset(salt + 8, 0, (sizeof salt) - 8);
if (subkey_len < crypto_kdf_blake2b_BYTES_MIN ||
subkey_len > crypto_kdf_blake2b_BYTES_MAX) {
errno = EINVAL;
return -1;
}
return crypto_generichash_blake2b_salt_personal(subkey, subkey_len,
NULL, 0,
key, crypto_kdf_blake2b_KEYBYTES,
salt, ctx_padded);
}

View File

@@ -0,0 +1,49 @@
#include "crypto_kdf.h"
#include "randombytes.h"
const char *
crypto_kdf_primitive(void)
{
return crypto_kdf_PRIMITIVE;
}
size_t
crypto_kdf_bytes_min(void)
{
return crypto_kdf_BYTES_MIN;
}
size_t
crypto_kdf_bytes_max(void)
{
return crypto_kdf_BYTES_MAX;
}
size_t
crypto_kdf_contextbytes(void)
{
return crypto_kdf_CONTEXTBYTES;
}
size_t
crypto_kdf_keybytes(void)
{
return crypto_kdf_KEYBYTES;
}
int
crypto_kdf_derive_from_key(unsigned char *subkey, size_t subkey_len,
uint64_t subkey_id,
const char ctx[crypto_kdf_CONTEXTBYTES],
const unsigned char key[crypto_kdf_KEYBYTES])
{
return crypto_kdf_blake2b_derive_from_key(subkey, subkey_len,
subkey_id, ctx, key);
}
void
crypto_kdf_keygen(unsigned char k[crypto_kdf_KEYBYTES])
{
randombytes_buf(k, crypto_kdf_KEYBYTES);
}

View File

@@ -0,0 +1,123 @@
#include <errno.h>
#include <string.h>
#include "crypto_auth_hmacsha256.h"
#include "crypto_kdf.h"
#include "crypto_kdf_hkdf_sha256.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_kdf_hkdf_sha256_extract_init(crypto_kdf_hkdf_sha256_state *state,
const unsigned char *salt, size_t salt_len)
{
return crypto_auth_hmacsha256_init(&state->st, salt, salt_len);
}
int
crypto_kdf_hkdf_sha256_extract_update(crypto_kdf_hkdf_sha256_state *state,
const unsigned char *ikm, size_t ikm_len)
{
return crypto_auth_hmacsha256_update(&state->st, ikm, ikm_len);
}
int
crypto_kdf_hkdf_sha256_extract_final(crypto_kdf_hkdf_sha256_state *state,
unsigned char prk[crypto_kdf_hkdf_sha256_KEYBYTES])
{
crypto_auth_hmacsha256_final(&state->st, prk);
sodium_memzero(state, sizeof state);
return 0;
}
int
crypto_kdf_hkdf_sha256_extract(
unsigned char prk[crypto_kdf_hkdf_sha256_KEYBYTES],
const unsigned char *salt, size_t salt_len, const unsigned char *ikm,
size_t ikm_len)
{
crypto_kdf_hkdf_sha256_state state;
crypto_kdf_hkdf_sha256_extract_init(&state, salt, salt_len);
crypto_kdf_hkdf_sha256_extract_update(&state, ikm, ikm_len);
return crypto_kdf_hkdf_sha256_extract_final(&state, prk);
}
void
crypto_kdf_hkdf_sha256_keygen(unsigned char prk[crypto_kdf_hkdf_sha256_KEYBYTES])
{
randombytes_buf(prk, crypto_kdf_hkdf_sha256_KEYBYTES);
}
int
crypto_kdf_hkdf_sha256_expand(unsigned char *out, size_t out_len,
const char *ctx, size_t ctx_len,
const unsigned char prk[crypto_kdf_hkdf_sha256_KEYBYTES])
{
crypto_auth_hmacsha256_state st;
unsigned char tmp[crypto_auth_hmacsha256_BYTES];
size_t i;
size_t left;
unsigned char counter = 1U;
if (out_len > crypto_kdf_hkdf_sha256_BYTES_MAX) {
errno = EINVAL;
return -1;
}
for (i = (size_t) 0U; i + crypto_auth_hmacsha256_BYTES <= out_len;
i += crypto_auth_hmacsha256_BYTES) {
crypto_auth_hmacsha256_init(&st, prk, crypto_kdf_hkdf_sha256_KEYBYTES);
if (i != (size_t) 0U) {
crypto_auth_hmacsha256_update(&st,
&out[i - crypto_auth_hmacsha256_BYTES],
crypto_auth_hmacsha256_BYTES);
}
crypto_auth_hmacsha256_update(&st,
(const unsigned char *) ctx, ctx_len);
crypto_auth_hmacsha256_update(&st, &counter, (size_t) 1U);
crypto_auth_hmacsha256_final(&st, &out[i]);
counter++;
}
if ((left = out_len & (crypto_auth_hmacsha256_BYTES - 1U)) != (size_t) 0U) {
crypto_auth_hmacsha256_init(&st, prk, crypto_kdf_hkdf_sha256_KEYBYTES);
if (i != (size_t) 0U) {
crypto_auth_hmacsha256_update(&st,
&out[i - crypto_auth_hmacsha256_BYTES],
crypto_auth_hmacsha256_BYTES);
}
crypto_auth_hmacsha256_update(&st,
(const unsigned char *) ctx, ctx_len);
crypto_auth_hmacsha256_update(&st, &counter, (size_t) 1U);
crypto_auth_hmacsha256_final(&st, tmp);
memcpy(&out[i], tmp, left);
sodium_memzero(tmp, sizeof tmp);
}
sodium_memzero(&st, sizeof st);
return 0;
}
size_t
crypto_kdf_hkdf_sha256_keybytes(void)
{
return crypto_kdf_hkdf_sha256_KEYBYTES;
}
size_t
crypto_kdf_hkdf_sha256_bytes_min(void)
{
return crypto_kdf_hkdf_sha256_BYTES_MIN;
}
size_t
crypto_kdf_hkdf_sha256_bytes_max(void)
{
return crypto_kdf_hkdf_sha256_BYTES_MAX;
}
size_t crypto_kdf_hkdf_sha256_statebytes(void)
{
return sizeof(crypto_kdf_hkdf_sha256_state);
}

View File

@@ -0,0 +1,118 @@
#include <errno.h>
#include <string.h>
#include "crypto_auth_hmacsha512.h"
#include "crypto_kdf.h"
#include "crypto_kdf_hkdf_sha512.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_kdf_hkdf_sha512_extract_init(crypto_kdf_hkdf_sha512_state *state,
const unsigned char *salt, size_t salt_len)
{
return crypto_auth_hmacsha512_init(&state->st, salt, salt_len);
}
int
crypto_kdf_hkdf_sha512_extract_update(crypto_kdf_hkdf_sha512_state *state,
const unsigned char *ikm, size_t ikm_len)
{
return crypto_auth_hmacsha512_update(&state->st, ikm, ikm_len);
}
int
crypto_kdf_hkdf_sha512_extract_final(crypto_kdf_hkdf_sha512_state *state,
unsigned char prk[crypto_kdf_hkdf_sha512_KEYBYTES])
{
crypto_auth_hmacsha512_final(&state->st, prk);
sodium_memzero(state, sizeof state);
return 0;
}
int
crypto_kdf_hkdf_sha512_extract(
unsigned char prk[crypto_kdf_hkdf_sha512_KEYBYTES],
const unsigned char *salt, size_t salt_len, const unsigned char *ikm,
size_t ikm_len)
{
crypto_kdf_hkdf_sha512_state state;
crypto_kdf_hkdf_sha512_extract_init(&state, salt, salt_len);
crypto_kdf_hkdf_sha512_extract_update(&state, ikm, ikm_len);
return crypto_kdf_hkdf_sha512_extract_final(&state, prk);
}
void
crypto_kdf_hkdf_sha512_keygen(unsigned char prk[crypto_kdf_hkdf_sha512_KEYBYTES])
{
randombytes_buf(prk, crypto_kdf_hkdf_sha512_KEYBYTES);
}
int
crypto_kdf_hkdf_sha512_expand(unsigned char *out, size_t out_len,
const char *ctx, size_t ctx_len,
const unsigned char prk[crypto_kdf_hkdf_sha512_KEYBYTES])
{
crypto_auth_hmacsha512_state st;
unsigned char tmp[crypto_auth_hmacsha512_BYTES];
size_t i;
size_t left;
unsigned char counter = 1U;
if (out_len > crypto_kdf_hkdf_sha512_BYTES_MAX) {
errno = EINVAL;
return -1;
}
for (i = (size_t) 0U; i + crypto_auth_hmacsha512_BYTES <= out_len;
i += crypto_auth_hmacsha512_BYTES) {
crypto_auth_hmacsha512_init(&st, prk, crypto_kdf_hkdf_sha512_KEYBYTES);
if (i != (size_t) 0U) {
crypto_auth_hmacsha512_update(&st,
&out[i - crypto_auth_hmacsha512_BYTES],
crypto_auth_hmacsha512_BYTES);
}
crypto_auth_hmacsha512_update(&st,
(const unsigned char *) ctx, ctx_len);
crypto_auth_hmacsha512_update(&st, &counter, (size_t) 1U);
crypto_auth_hmacsha512_final(&st, &out[i]);
counter++;
}
if ((left = out_len & (crypto_auth_hmacsha512_BYTES - 1U)) != (size_t) 0U) {
crypto_auth_hmacsha512_init(&st, prk, crypto_kdf_hkdf_sha512_KEYBYTES);
if (i != (size_t) 0U) {
crypto_auth_hmacsha512_update(&st,
&out[i - crypto_auth_hmacsha512_BYTES],
crypto_auth_hmacsha512_BYTES);
}
crypto_auth_hmacsha512_update(&st,
(const unsigned char *) ctx, ctx_len);
crypto_auth_hmacsha512_update(&st, &counter, (size_t) 1U);
crypto_auth_hmacsha512_final(&st, tmp);
memcpy(&out[i], tmp, left);
sodium_memzero(tmp, sizeof tmp);
}
sodium_memzero(&st, sizeof st);
return 0;
}
size_t
crypto_kdf_hkdf_sha512_keybytes(void)
{
return crypto_kdf_hkdf_sha512_KEYBYTES;
}
size_t
crypto_kdf_hkdf_sha512_bytes_min(void)
{
return crypto_kdf_hkdf_sha512_BYTES_MIN;
}
size_t
crypto_kdf_hkdf_sha512_bytes_max(void)
{
return crypto_kdf_hkdf_sha512_BYTES_MAX;
}

View File

@@ -0,0 +1,143 @@
#include <stddef.h>
#include "core.h"
#include "crypto_generichash.h"
#include "crypto_kx.h"
#include "crypto_scalarmult.h"
#include "private/common.h"
#include "randombytes.h"
#include "utils.h"
int
crypto_kx_seed_keypair(unsigned char pk[crypto_kx_PUBLICKEYBYTES],
unsigned char sk[crypto_kx_SECRETKEYBYTES],
const unsigned char seed[crypto_kx_SEEDBYTES])
{
crypto_generichash(sk, crypto_kx_SECRETKEYBYTES,
seed, crypto_kx_SEEDBYTES, NULL, 0);
return crypto_scalarmult_base(pk, sk);
}
int
crypto_kx_keypair(unsigned char pk[crypto_kx_PUBLICKEYBYTES],
unsigned char sk[crypto_kx_SECRETKEYBYTES])
{
COMPILER_ASSERT(crypto_kx_SECRETKEYBYTES == crypto_scalarmult_SCALARBYTES);
COMPILER_ASSERT(crypto_kx_PUBLICKEYBYTES == crypto_scalarmult_BYTES);
randombytes_buf(sk, crypto_kx_SECRETKEYBYTES);
return crypto_scalarmult_base(pk, sk);
}
int
crypto_kx_client_session_keys(unsigned char rx[crypto_kx_SESSIONKEYBYTES],
unsigned char tx[crypto_kx_SESSIONKEYBYTES],
const unsigned char client_pk[crypto_kx_PUBLICKEYBYTES],
const unsigned char client_sk[crypto_kx_SECRETKEYBYTES],
const unsigned char server_pk[crypto_kx_PUBLICKEYBYTES])
{
crypto_generichash_state h;
unsigned char q[crypto_scalarmult_BYTES];
unsigned char keys[2 * crypto_kx_SESSIONKEYBYTES];
int i;
if (rx == NULL) {
rx = tx;
}
if (tx == NULL) {
tx = rx;
}
if (rx == NULL) {
sodium_misuse(); /* LCOV_EXCL_LINE */
}
if (crypto_scalarmult(q, client_sk, server_pk) != 0) {
return -1;
}
COMPILER_ASSERT(sizeof keys <= crypto_generichash_BYTES_MAX);
crypto_generichash_init(&h, NULL, 0U, sizeof keys);
crypto_generichash_update(&h, q, crypto_scalarmult_BYTES);
sodium_memzero(q, sizeof q);
crypto_generichash_update(&h, client_pk, crypto_kx_PUBLICKEYBYTES);
crypto_generichash_update(&h, server_pk, crypto_kx_PUBLICKEYBYTES);
crypto_generichash_final(&h, keys, sizeof keys);
sodium_memzero(&h, sizeof h);
for (i = 0; i < crypto_kx_SESSIONKEYBYTES; i++) {
rx[i] = keys[i]; /* rx cannot be NULL */
tx[i] = keys[i + crypto_kx_SESSIONKEYBYTES]; /* tx cannot be NULL */
}
sodium_memzero(keys, sizeof keys);
return 0;
}
int
crypto_kx_server_session_keys(unsigned char rx[crypto_kx_SESSIONKEYBYTES],
unsigned char tx[crypto_kx_SESSIONKEYBYTES],
const unsigned char server_pk[crypto_kx_PUBLICKEYBYTES],
const unsigned char server_sk[crypto_kx_SECRETKEYBYTES],
const unsigned char client_pk[crypto_kx_PUBLICKEYBYTES])
{
crypto_generichash_state h;
unsigned char q[crypto_scalarmult_BYTES];
unsigned char keys[2 * crypto_kx_SESSIONKEYBYTES];
int i;
if (rx == NULL) {
rx = tx;
}
if (tx == NULL) {
tx = rx;
}
if (rx == NULL) {
sodium_misuse(); /* LCOV_EXCL_LINE */
}
if (crypto_scalarmult(q, server_sk, client_pk) != 0) {
return -1;
}
COMPILER_ASSERT(sizeof keys <= crypto_generichash_BYTES_MAX);
crypto_generichash_init(&h, NULL, 0U, sizeof keys);
crypto_generichash_update(&h, q, crypto_scalarmult_BYTES);
sodium_memzero(q, sizeof q);
crypto_generichash_update(&h, client_pk, crypto_kx_PUBLICKEYBYTES);
crypto_generichash_update(&h, server_pk, crypto_kx_PUBLICKEYBYTES);
crypto_generichash_final(&h, keys, sizeof keys);
sodium_memzero(&h, sizeof h);
for (i = 0; i < crypto_kx_SESSIONKEYBYTES; i++) {
tx[i] = keys[i];
rx[i] = keys[i + crypto_kx_SESSIONKEYBYTES];
}
sodium_memzero(keys, sizeof keys);
return 0;
}
size_t
crypto_kx_publickeybytes(void)
{
return crypto_kx_PUBLICKEYBYTES;
}
size_t
crypto_kx_secretkeybytes(void)
{
return crypto_kx_SECRETKEYBYTES;
}
size_t
crypto_kx_seedbytes(void)
{
return crypto_kx_SEEDBYTES;
}
size_t
crypto_kx_sessionkeybytes(void)
{
return crypto_kx_SESSIONKEYBYTES;
}
const char *
crypto_kx_primitive(void)
{
return crypto_kx_PRIMITIVE;
}

View File

@@ -0,0 +1,71 @@
#include "crypto_onetimeauth.h"
#include "randombytes.h"
size_t
crypto_onetimeauth_statebytes(void)
{
return sizeof(crypto_onetimeauth_state);
}
size_t
crypto_onetimeauth_bytes(void)
{
return crypto_onetimeauth_BYTES;
}
size_t
crypto_onetimeauth_keybytes(void)
{
return crypto_onetimeauth_KEYBYTES;
}
int
crypto_onetimeauth(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
return crypto_onetimeauth_poly1305(out, in, inlen, k);
}
int
crypto_onetimeauth_verify(const unsigned char *h, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
return crypto_onetimeauth_poly1305_verify(h, in, inlen, k);
}
int
crypto_onetimeauth_init(crypto_onetimeauth_state *state,
const unsigned char *key)
{
return crypto_onetimeauth_poly1305_init
((crypto_onetimeauth_poly1305_state *) state, key);
}
int
crypto_onetimeauth_update(crypto_onetimeauth_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return crypto_onetimeauth_poly1305_update
((crypto_onetimeauth_poly1305_state *) state, in, inlen);
}
int
crypto_onetimeauth_final(crypto_onetimeauth_state *state,
unsigned char *out)
{
return crypto_onetimeauth_poly1305_final
((crypto_onetimeauth_poly1305_state *) state, out);
}
const char *
crypto_onetimeauth_primitive(void)
{
return crypto_onetimeauth_PRIMITIVE;
}
void crypto_onetimeauth_keygen(unsigned char k[crypto_onetimeauth_KEYBYTES])
{
randombytes_buf(k, crypto_onetimeauth_KEYBYTES);
}

View File

@@ -0,0 +1,124 @@
#include "poly1305_donna.h"
#include "crypto_verify_16.h"
#include "private/common.h"
#include "utils.h"
#ifdef HAVE_TI_MODE
#include "poly1305_donna64.h"
#else
#include "poly1305_donna32.h"
#endif
#include "../onetimeauth_poly1305.h"
static void
poly1305_update(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long bytes)
{
unsigned long long i;
/* handle leftover */
if (st->leftover) {
unsigned long long want = (poly1305_block_size - st->leftover);
if (want > bytes) {
want = bytes;
}
for (i = 0; i < want; i++) {
st->buffer[st->leftover + i] = m[i];
}
bytes -= want;
m += want;
st->leftover += want;
if (st->leftover < poly1305_block_size) {
return;
}
poly1305_blocks(st, st->buffer, poly1305_block_size);
st->leftover = 0;
}
/* process full blocks */
if (bytes >= poly1305_block_size) {
unsigned long long want = (bytes & ~(poly1305_block_size - 1));
poly1305_blocks(st, m, want);
m += want;
bytes -= want;
}
/* store leftover */
if (bytes) {
for (i = 0; i < bytes; i++) {
st->buffer[st->leftover + i] = m[i];
}
st->leftover += bytes;
}
}
static int
crypto_onetimeauth_poly1305_donna(unsigned char *out, const unsigned char *m,
unsigned long long inlen,
const unsigned char *key)
{
CRYPTO_ALIGN(64) poly1305_state_internal_t state;
poly1305_init(&state, key);
poly1305_update(&state, m, inlen);
poly1305_finish(&state, out);
return 0;
}
static int
crypto_onetimeauth_poly1305_donna_init(crypto_onetimeauth_poly1305_state *state,
const unsigned char *key)
{
COMPILER_ASSERT(sizeof(crypto_onetimeauth_poly1305_state) >=
sizeof(poly1305_state_internal_t));
poly1305_init((poly1305_state_internal_t *) (void *) state, key);
return 0;
}
static int
crypto_onetimeauth_poly1305_donna_update(
crypto_onetimeauth_poly1305_state *state, const unsigned char *in,
unsigned long long inlen)
{
poly1305_update((poly1305_state_internal_t *) (void *) state, in, inlen);
return 0;
}
static int
crypto_onetimeauth_poly1305_donna_final(
crypto_onetimeauth_poly1305_state *state, unsigned char *out)
{
poly1305_finish((poly1305_state_internal_t *) (void *) state, out);
return 0;
}
static int
crypto_onetimeauth_poly1305_donna_verify(const unsigned char *h,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k)
{
unsigned char correct[16];
crypto_onetimeauth_poly1305_donna(correct, in, inlen, k);
return crypto_verify_16(h, correct);
}
struct crypto_onetimeauth_poly1305_implementation
crypto_onetimeauth_poly1305_donna_implementation = {
SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_donna,
SODIUM_C99(.onetimeauth_verify =)
crypto_onetimeauth_poly1305_donna_verify,
SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_donna_init,
SODIUM_C99(.onetimeauth_update =)
crypto_onetimeauth_poly1305_donna_update,
SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_donna_final
};

View File

@@ -0,0 +1,12 @@
#ifndef poly1305_donna_H
#define poly1305_donna_H
#include <stddef.h>
#include "../onetimeauth_poly1305.h"
#include "crypto_onetimeauth_poly1305.h"
extern struct crypto_onetimeauth_poly1305_implementation
crypto_onetimeauth_poly1305_donna_implementation;
#endif /* poly1305_donna_H */

View File

@@ -0,0 +1,235 @@
/*
poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
and 64 bit addition
*/
#if defined(_MSC_VER)
# define POLY1305_NOINLINE __declspec(noinline)
#elif defined(__clang__) || defined(__GNUC__)
# define POLY1305_NOINLINE __attribute__((noinline))
#else
# define POLY1305_NOINLINE
#endif
#include "private/common.h"
#define poly1305_block_size 16
/* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */
typedef struct poly1305_state_internal_t {
unsigned long r[5];
unsigned long h[5];
unsigned long pad[4];
unsigned long long leftover;
unsigned char buffer[poly1305_block_size];
unsigned char final;
} poly1305_state_internal_t;
static void
poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff - wiped after finalization */
st->r[0] = (LOAD32_LE(&key[0])) & 0x3ffffff;
st->r[1] = (LOAD32_LE(&key[3]) >> 2) & 0x3ffff03;
st->r[2] = (LOAD32_LE(&key[6]) >> 4) & 0x3ffc0ff;
st->r[3] = (LOAD32_LE(&key[9]) >> 6) & 0x3f03fff;
st->r[4] = (LOAD32_LE(&key[12]) >> 8) & 0x00fffff;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
st->h[3] = 0;
st->h[4] = 0;
/* save pad for later */
st->pad[0] = LOAD32_LE(&key[16]);
st->pad[1] = LOAD32_LE(&key[20]);
st->pad[2] = LOAD32_LE(&key[24]);
st->pad[3] = LOAD32_LE(&key[28]);
st->leftover = 0;
st->final = 0;
}
static void
poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long bytes)
{
const unsigned long hibit = (st->final) ? 0UL : (1UL << 24); /* 1 << 128 */
unsigned long r0, r1, r2, r3, r4;
unsigned long s1, s2, s3, s4;
unsigned long h0, h1, h2, h3, h4;
unsigned long long d0, d1, d2, d3, d4;
unsigned long c;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
r3 = st->r[3];
r4 = st->r[4];
s1 = r1 * 5;
s2 = r2 * 5;
s3 = r3 * 5;
s4 = r4 * 5;
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
while (bytes >= poly1305_block_size) {
/* h += m[i] */
h0 += (LOAD32_LE(m + 0)) & 0x3ffffff;
h1 += (LOAD32_LE(m + 3) >> 2) & 0x3ffffff;
h2 += (LOAD32_LE(m + 6) >> 4) & 0x3ffffff;
h3 += (LOAD32_LE(m + 9) >> 6) & 0x3ffffff;
h4 += (LOAD32_LE(m + 12) >> 8) | hibit;
/* h *= r */
d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) +
((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) +
((unsigned long long) h4 * s1);
d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) +
((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) +
((unsigned long long) h4 * s2);
d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) +
((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) +
((unsigned long long) h4 * s3);
d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) +
((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) +
((unsigned long long) h4 * s4);
d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) +
((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) +
((unsigned long long) h4 * r0);
/* (partial) h %= p */
c = (unsigned long) (d0 >> 26);
h0 = (unsigned long) d0 & 0x3ffffff;
d1 += c;
c = (unsigned long) (d1 >> 26);
h1 = (unsigned long) d1 & 0x3ffffff;
d2 += c;
c = (unsigned long) (d2 >> 26);
h2 = (unsigned long) d2 & 0x3ffffff;
d3 += c;
c = (unsigned long) (d3 >> 26);
h3 = (unsigned long) d3 & 0x3ffffff;
d4 += c;
c = (unsigned long) (d4 >> 26);
h4 = (unsigned long) d4 & 0x3ffffff;
h0 += c * 5;
c = (h0 >> 26);
h0 &= 0x3ffffff;
h1 += c;
m += poly1305_block_size;
bytes -= poly1305_block_size;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
st->h[3] = h3;
st->h[4] = h4;
}
static POLY1305_NOINLINE void
poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
{
unsigned long h0, h1, h2, h3, h4, c;
unsigned long g0, g1, g2, g3, g4;
unsigned long long f;
unsigned long mask;
/* process the remaining block */
if (st->leftover) {
unsigned long long i = st->leftover;
st->buffer[i++] = 1;
for (; i < poly1305_block_size; i++) {
st->buffer[i] = 0;
}
st->final = 1;
poly1305_blocks(st, st->buffer, poly1305_block_size);
}
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
c = h1 >> 26;
h1 = h1 & 0x3ffffff;
h2 += c;
c = h2 >> 26;
h2 = h2 & 0x3ffffff;
h3 += c;
c = h3 >> 26;
h3 = h3 & 0x3ffffff;
h4 += c;
c = h4 >> 26;
h4 = h4 & 0x3ffffff;
h0 += c * 5;
c = h0 >> 26;
h0 = h0 & 0x3ffffff;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 26;
g0 &= 0x3ffffff;
g1 = h1 + c;
c = g1 >> 26;
g1 &= 0x3ffffff;
g2 = h2 + c;
c = g2 >> 26;
g2 &= 0x3ffffff;
g3 = h3 + c;
c = g3 >> 26;
g3 &= 0x3ffffff;
g4 = h4 + c - (1UL << 26);
/* select h if h < p, or h + -p if h >= p */
mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
g0 &= mask;
g1 &= mask;
g2 &= mask;
g3 &= mask;
g4 &= mask;
mask = ~mask;
h0 = (h0 & mask) | g0;
h1 = (h1 & mask) | g1;
h2 = (h2 & mask) | g2;
h3 = (h3 & mask) | g3;
h4 = (h4 & mask) | g4;
/* h = h % (2^128) */
h0 = ((h0) | (h1 << 26)) & 0xffffffff;
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
/* mac = (h + pad) % (2^128) */
f = (unsigned long long) h0 + st->pad[0];
h0 = (unsigned long) f;
f = (unsigned long long) h1 + st->pad[1] + (f >> 32);
h1 = (unsigned long) f;
f = (unsigned long long) h2 + st->pad[2] + (f >> 32);
h2 = (unsigned long) f;
f = (unsigned long long) h3 + st->pad[3] + (f >> 32);
h3 = (unsigned long) f;
STORE32_LE(mac + 0, (uint32_t) h0);
STORE32_LE(mac + 4, (uint32_t) h1);
STORE32_LE(mac + 8, (uint32_t) h2);
STORE32_LE(mac + 12, (uint32_t) h3);
/* zero out the state */
sodium_memzero((void *) st, sizeof *st);
}

View File

@@ -0,0 +1,221 @@
/*
poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication
and 128 bit addition
*/
#include "private/common.h"
#define MUL(out, x, y) out = ((uint128_t) x * y)
#define ADD(out, in) out += in
#define ADDLO(out, in) out += in
#define SHR(in, shift) (unsigned long long) (in >> (shift))
#define LO(in) (unsigned long long) (in)
#if defined(_MSC_VER)
# define POLY1305_NOINLINE __declspec(noinline)
#elif defined(__clang__) || defined(__GNUC__)
# define POLY1305_NOINLINE __attribute__((noinline))
#else
# define POLY1305_NOINLINE
#endif
#define poly1305_block_size 16
/* 17 + sizeof(unsigned long long) + 8*sizeof(unsigned long long) */
typedef struct poly1305_state_internal_t {
unsigned long long r[3];
unsigned long long h[3];
unsigned long long pad[2];
unsigned long long leftover;
unsigned char buffer[poly1305_block_size];
unsigned char final;
} poly1305_state_internal_t;
static void
poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
{
unsigned long long t0, t1;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
t0 = LOAD64_LE(&key[0]);
t1 = LOAD64_LE(&key[8]);
/* wiped after finalization */
st->r[0] = (t0) & 0xffc0fffffff;
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
/* save pad for later */
st->pad[0] = LOAD64_LE(&key[16]);
st->pad[1] = LOAD64_LE(&key[24]);
st->leftover = 0;
st->final = 0;
}
static void
poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long bytes)
{
const unsigned long long hibit =
(st->final) ? 0ULL : (1ULL << 40); /* 1 << 128 */
unsigned long long r0, r1, r2;
unsigned long long s1, s2;
unsigned long long h0, h1, h2;
unsigned long long c;
uint128_t d0, d1, d2, d;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
s1 = r1 * (5 << 2);
s2 = r2 * (5 << 2);
while (bytes >= poly1305_block_size) {
unsigned long long t0, t1;
/* h += m[i] */
t0 = LOAD64_LE(&m[0]);
t1 = LOAD64_LE(&m[8]);
h0 += t0 & 0xfffffffffff;
h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffff;
h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit;
/* h *= r */
MUL(d0, h0, r0);
MUL(d, h1, s2);
ADD(d0, d);
MUL(d, h2, s1);
ADD(d0, d);
MUL(d1, h0, r1);
MUL(d, h1, r0);
ADD(d1, d);
MUL(d, h2, s2);
ADD(d1, d);
MUL(d2, h0, r2);
MUL(d, h1, r1);
ADD(d2, d);
MUL(d, h2, r0);
ADD(d2, d);
/* (partial) h %= p */
c = SHR(d0, 44);
h0 = LO(d0) & 0xfffffffffff;
ADDLO(d1, c);
c = SHR(d1, 44);
h1 = LO(d1) & 0xfffffffffff;
ADDLO(d2, c);
c = SHR(d2, 42);
h2 = LO(d2) & 0x3ffffffffff;
h0 += c * 5;
c = (h0 >> 44);
h0 &= 0xfffffffffff;
h1 += c;
m += poly1305_block_size;
bytes -= poly1305_block_size;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
}
static POLY1305_NOINLINE void
poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
{
unsigned long long h0, h1, h2, c;
unsigned long long g0, g1, g2;
unsigned long long t0, t1;
unsigned long long mask;
/* process the remaining block */
if (st->leftover) {
unsigned long long i = st->leftover;
st->buffer[i] = 1;
for (i = i + 1; i < poly1305_block_size; i++) {
st->buffer[i] = 0;
}
st->final = 1;
poly1305_blocks(st, st->buffer, poly1305_block_size);
}
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
c = h1 >> 44;
h1 &= 0xfffffffffff;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffff;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffff;
h1 += c;
c = h1 >> 44;
h1 &= 0xfffffffffff;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffff;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffff;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 44;
g0 &= 0xfffffffffff;
g1 = h1 + c;
c = g1 >> 44;
g1 &= 0xfffffffffff;
g2 = h2 + c - (1ULL << 42);
/* select h if h < p, or h + -p if h >= p */
mask = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
g0 &= mask;
g1 &= mask;
g2 &= mask;
mask = ~mask;
h0 = (h0 & mask) | g0;
h1 = (h1 & mask) | g1;
h2 = (h2 & mask) | g2;
/* h = (h + pad) */
t0 = st->pad[0];
t1 = st->pad[1];
h0 += ((t0) &0xfffffffffff);
c = (h0 >> 44);
h0 &= 0xfffffffffff;
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
c = (h1 >> 44);
h1 &= 0xfffffffffff;
h2 += (((t1 >> 24)) & 0x3ffffffffff) + c;
h2 &= 0x3ffffffffff;
/* mac = h % (2^128) */
h0 = (h0) | (h1 << 44);
h1 = (h1 >> 20) | (h2 << 24);
STORE64_LE(&mac[0], h0);
STORE64_LE(&mac[8], h1);
/* zero out the state */
sodium_memzero((void *) st, sizeof *st);
}

View File

@@ -0,0 +1,90 @@
#include "onetimeauth_poly1305.h"
#include "crypto_onetimeauth_poly1305.h"
#include "private/common.h"
#include "private/implementations.h"
#include "randombytes.h"
#include "runtime.h"
#include "donna/poly1305_donna.h"
#if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
# include "sse2/poly1305_sse2.h"
#endif
static const crypto_onetimeauth_poly1305_implementation *implementation =
&crypto_onetimeauth_poly1305_donna_implementation;
int
crypto_onetimeauth_poly1305(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k)
{
return implementation->onetimeauth(out, in, inlen, k);
}
int
crypto_onetimeauth_poly1305_verify(const unsigned char *h,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k)
{
return implementation->onetimeauth_verify(h, in, inlen, k);
}
int
crypto_onetimeauth_poly1305_init(crypto_onetimeauth_poly1305_state *state,
const unsigned char *key)
{
return implementation->onetimeauth_init(state, key);
}
int
crypto_onetimeauth_poly1305_update(crypto_onetimeauth_poly1305_state *state,
const unsigned char *in,
unsigned long long inlen)
{
return implementation->onetimeauth_update(state, in, inlen);
}
int
crypto_onetimeauth_poly1305_final(crypto_onetimeauth_poly1305_state *state,
unsigned char *out)
{
return implementation->onetimeauth_final(state, out);
}
size_t
crypto_onetimeauth_poly1305_bytes(void)
{
return crypto_onetimeauth_poly1305_BYTES;
}
size_t
crypto_onetimeauth_poly1305_keybytes(void)
{
return crypto_onetimeauth_poly1305_KEYBYTES;
}
size_t
crypto_onetimeauth_poly1305_statebytes(void)
{
return sizeof(crypto_onetimeauth_poly1305_state);
}
void
crypto_onetimeauth_poly1305_keygen(
unsigned char k[crypto_onetimeauth_poly1305_KEYBYTES])
{
randombytes_buf(k, crypto_onetimeauth_poly1305_KEYBYTES);
}
int
_crypto_onetimeauth_poly1305_pick_best_implementation(void)
{
implementation = &crypto_onetimeauth_poly1305_donna_implementation;
#if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
if (sodium_runtime_has_sse2()) {
implementation = &crypto_onetimeauth_poly1305_sse2_implementation;
}
#endif
return 0;
}

View File

@@ -0,0 +1,21 @@
#ifndef onetimeauth_poly1305_H
#define onetimeauth_poly1305_H
#include "crypto_onetimeauth_poly1305.h"
typedef struct crypto_onetimeauth_poly1305_implementation {
int (*onetimeauth)(unsigned char *out, const unsigned char *in,
unsigned long long inlen, const unsigned char *k);
int (*onetimeauth_verify)(const unsigned char *h, const unsigned char *in,
unsigned long long inlen, const unsigned char *k);
int (*onetimeauth_init)(crypto_onetimeauth_poly1305_state *state,
const unsigned char * key);
int (*onetimeauth_update)(crypto_onetimeauth_poly1305_state *state,
const unsigned char * in,
unsigned long long inlen);
int (*onetimeauth_final)(crypto_onetimeauth_poly1305_state *state,
unsigned char * out);
} crypto_onetimeauth_poly1305_implementation;
#endif

View File

@@ -0,0 +1,949 @@
#include <stdint.h>
#include <string.h>
#include "../onetimeauth_poly1305.h"
#include "crypto_verify_16.h"
#include "poly1305_sse2.h"
#include "private/common.h"
#include "utils.h"
#if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# endif
# include <emmintrin.h>
# include "private/sse2_64_32.h"
typedef __m128i xmmi;
# if defined(_MSC_VER)
# define POLY1305_NOINLINE __declspec(noinline)
# elif defined(__clang__) || defined(__GNUC__)
# define POLY1305_NOINLINE __attribute__((noinline))
# else
# define POLY1305_NOINLINE
# endif
# define poly1305_block_size 32
enum poly1305_state_flags_t {
poly1305_started = 1,
poly1305_final_shift8 = 4,
poly1305_final_shift16 = 8,
poly1305_final_r2_r = 16, /* use [r^2,r] for the final block */
poly1305_final_r_1 = 32 /* use [r,1] for the final block */
};
typedef struct poly1305_state_internal_t {
union {
uint64_t h[3];
uint32_t hh[10];
} H; /* 40 bytes */
uint32_t R[5]; /* 20 bytes */
uint32_t R2[5]; /* 20 bytes */
uint32_t R4[5]; /* 20 bytes */
uint64_t pad[2]; /* 16 bytes */
uint64_t flags; /* 8 bytes */
unsigned long long leftover; /* 8 bytes */
unsigned char buffer[poly1305_block_size]; /* 32 bytes */
} poly1305_state_internal_t; /* 164 bytes total */
/*
* _mm_loadl_epi64() is turned into a simple MOVQ. So, unaligned accesses are
* totally fine, even though this intrinsic requires a __m128i* input.
* This confuses dynamic analysis, so force alignment, only in debug mode.
*/
# ifdef DEBUG
static xmmi
_fakealign_mm_loadl_epi64(const void *m)
{
xmmi tmp;
memcpy(&tmp, m, 8);
return _mm_loadl_epi64(&tmp);
}
# define _mm_loadl_epi64(X) _fakealign_mm_loadl_epi64(X)
#endif
/* copy 0-31 bytes */
static inline void
poly1305_block_copy31(unsigned char *dst, const unsigned char *src,
unsigned long long bytes)
{
if (bytes & 16) {
_mm_store_si128((xmmi *) (void *) dst,
_mm_loadu_si128((const xmmi *) (const void *) src));
src += 16;
dst += 16;
}
if (bytes & 8) {
memcpy(dst, src, 8);
src += 8;
dst += 8;
}
if (bytes & 4) {
memcpy(dst, src, 4);
src += 4;
dst += 4;
}
if (bytes & 2) {
memcpy(dst, src, 2);
src += 2;
dst += 2;
}
if (bytes & 1) {
*dst = *src;
}
}
static POLY1305_NOINLINE void
poly1305_init_ext(poly1305_state_internal_t *st, const unsigned char key[32],
unsigned long long bytes)
{
uint32_t *R;
uint128_t d[3];
uint64_t r0, r1, r2;
uint64_t rt0, rt1, rt2, st2, c;
uint64_t t0, t1;
unsigned long long i;
if (!bytes) {
bytes = ~(unsigned long long) 0;
}
/* H = 0 */
_mm_storeu_si128((xmmi *) (void *) &st->H.hh[0], _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) &st->H.hh[4], _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) &st->H.hh[8], _mm_setzero_si128());
/* clamp key */
memcpy(&t0, key, 8);
memcpy(&t1, key + 8, 8);
r0 = t0 & 0xffc0fffffff;
t0 >>= 44;
t0 |= t1 << 20;
r1 = t0 & 0xfffffc0ffff;
t1 >>= 24;
r2 = t1 & 0x00ffffffc0f;
/* r^1 */
R = st->R;
R[0] = (uint32_t)(r0) &0x3ffffff;
R[1] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
R[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
R[3] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
R[4] = (uint32_t)((r2 >> 16));
/* save pad */
memcpy(&st->pad[0], key + 16, 8);
memcpy(&st->pad[1], key + 24, 8);
rt0 = r0;
rt1 = r1;
rt2 = r2;
/* r^2, r^4 */
for (i = 0; i < 2; i++) {
if (i == 0) {
R = st->R2;
if (bytes <= 16) {
break;
}
} else if (i == 1) {
R = st->R4;
if (bytes < 96) {
break;
}
}
st2 = rt2 * (5 << 2);
d[0] = ((uint128_t) rt0 * rt0) + ((uint128_t)(rt1 * 2) * st2);
d[1] = ((uint128_t) rt2 * st2) + ((uint128_t)(rt0 * 2) * rt1);
d[2] = ((uint128_t) rt1 * rt1) + ((uint128_t)(rt2 * 2) * rt0);
rt0 = (uint64_t) d[0] & 0xfffffffffff;
c = (uint64_t)(d[0] >> 44);
d[1] += c;
rt1 = (uint64_t) d[1] & 0xfffffffffff;
c = (uint64_t)(d[1] >> 44);
d[2] += c;
rt2 = (uint64_t) d[2] & 0x3ffffffffff;
c = (uint64_t)(d[2] >> 42);
rt0 += c * 5;
c = (rt0 >> 44);
rt0 = rt0 & 0xfffffffffff;
rt1 += c;
c = (rt1 >> 44);
rt1 = rt1 & 0xfffffffffff;
rt2 += c; /* even if rt2 overflows, it will still fit in rp4 safely, and
is safe to multiply with */
R[0] = (uint32_t)(rt0) &0x3ffffff;
R[1] = (uint32_t)((rt0 >> 26) | (rt1 << 18)) & 0x3ffffff;
R[2] = (uint32_t)((rt1 >> 8)) & 0x3ffffff;
R[3] = (uint32_t)((rt1 >> 34) | (rt2 << 10)) & 0x3ffffff;
R[4] = (uint32_t)((rt2 >> 16));
}
st->flags = 0;
st->leftover = 0U;
}
static POLY1305_NOINLINE void
poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long bytes)
{
CRYPTO_ALIGN(64)
xmmi HIBIT =
_mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1, 0, 1, 0));
const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1),
_MM_SHUFFLE(1, 0, 1, 0));
const xmmi FIVE =
_mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1, 0, 1, 0));
xmmi H0, H1, H2, H3, H4;
xmmi T0, T1, T2, T3, T4, T5, T6, T7, T8;
xmmi M0, M1, M2, M3, M4;
xmmi M5, M6, M7, M8;
xmmi C1, C2;
xmmi R20, R21, R22, R23, R24, S21, S22, S23, S24;
xmmi R40, R41, R42, R43, R44, S41, S42, S43, S44;
if (st->flags & poly1305_final_shift8) {
HIBIT = _mm_srli_si128(HIBIT, 8);
}
if (st->flags & poly1305_final_shift16) {
HIBIT = _mm_setzero_si128();
}
if (!(st->flags & poly1305_started)) {
/* H = [Mx,My] */
T5 = _mm_unpacklo_epi64(
_mm_loadl_epi64((const xmmi *) (const void *) (m + 0)),
_mm_loadl_epi64((const xmmi *) (const void *) (m + 16)));
T6 = _mm_unpacklo_epi64(
_mm_loadl_epi64((const xmmi *) (const void *) (m + 8)),
_mm_loadl_epi64((const xmmi *) (const void *) (m + 24)));
H0 = _mm_and_si128(MMASK, T5);
H1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
H2 = _mm_and_si128(MMASK, T5);
H3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
H4 = _mm_srli_epi64(T6, 40);
H4 = _mm_or_si128(H4, HIBIT);
m += 32;
bytes -= 32;
st->flags |= poly1305_started;
} else {
T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->H.hh[0]);
T1 = _mm_loadu_si128((const xmmi *) (const void *) &st->H.hh[4]);
T2 = _mm_loadu_si128((const xmmi *) (const void *) &st->H.hh[8]);
H0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 0, 0));
H1 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 2, 2));
H2 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(1, 1, 0, 0));
H3 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 3, 2, 2));
H4 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(1, 1, 0, 0));
}
if (st->flags & (poly1305_final_r2_r | poly1305_final_r_1)) {
if (st->flags & poly1305_final_r2_r) {
/* use [r^2, r] */
T2 = _mm_loadu_si128((const xmmi *) (const void *) &st->R[0]);
T3 = _mm_cvtsi32_si128(st->R[4]);
T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R2[0]);
T1 = _mm_cvtsi32_si128(st->R2[4]);
T4 = _mm_unpacklo_epi32(T0, T2);
T5 = _mm_unpackhi_epi32(T0, T2);
R24 = _mm_unpacklo_epi64(T1, T3);
} else {
/* use [r^1, 1] */
T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R[0]);
T1 = _mm_cvtsi32_si128(st->R[4]);
T2 = _mm_cvtsi32_si128(1);
T4 = _mm_unpacklo_epi32(T0, T2);
T5 = _mm_unpackhi_epi32(T0, T2);
R24 = T1;
}
R20 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(1, 1, 0, 0));
R21 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(3, 3, 2, 2));
R22 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(1, 1, 0, 0));
R23 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(3, 3, 2, 2));
} else {
/* use [r^2, r^2] */
T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R2[0]);
T1 = _mm_cvtsi32_si128(st->R2[4]);
R20 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0, 0, 0, 0));
R21 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 1, 1));
R22 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2, 2, 2, 2));
R23 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 3, 3));
R24 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0, 0, 0, 0));
}
S21 = _mm_mul_epu32(R21, FIVE);
S22 = _mm_mul_epu32(R22, FIVE);
S23 = _mm_mul_epu32(R23, FIVE);
S24 = _mm_mul_epu32(R24, FIVE);
if (bytes >= 64) {
T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R4[0]);
T1 = _mm_cvtsi32_si128(st->R4[4]);
R40 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0, 0, 0, 0));
R41 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 1, 1));
R42 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2, 2, 2, 2));
R43 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 3, 3));
R44 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0, 0, 0, 0));
S41 = _mm_mul_epu32(R41, FIVE);
S42 = _mm_mul_epu32(R42, FIVE);
S43 = _mm_mul_epu32(R43, FIVE);
S44 = _mm_mul_epu32(R44, FIVE);
while (bytes >= 64) {
xmmi v00, v01, v02, v03, v04;
xmmi v10, v11, v12, v13, v14;
xmmi v20, v21, v22, v23, v24;
xmmi v30, v31, v32, v33, v34;
xmmi v40, v41, v42, v43, v44;
xmmi T14, T15;
/* H *= [r^4,r^4], preload [Mx,My] */
T15 = S42;
T0 = H4;
T0 = _mm_mul_epu32(T0, S41);
v01 = H3;
v01 = _mm_mul_epu32(v01, T15);
T14 = S43;
T1 = H4;
T1 = _mm_mul_epu32(T1, T15);
v11 = H3;
v11 = _mm_mul_epu32(v11, T14);
T2 = H4;
T2 = _mm_mul_epu32(T2, T14);
T0 = _mm_add_epi64(T0, v01);
T15 = S44;
v02 = H2;
v02 = _mm_mul_epu32(v02, T14);
T3 = H4;
T3 = _mm_mul_epu32(T3, T15);
T1 = _mm_add_epi64(T1, v11);
v03 = H1;
v03 = _mm_mul_epu32(v03, T15);
v12 = H2;
v12 = _mm_mul_epu32(v12, T15);
T0 = _mm_add_epi64(T0, v02);
T14 = R40;
v21 = H3;
v21 = _mm_mul_epu32(v21, T15);
v31 = H3;
v31 = _mm_mul_epu32(v31, T14);
T0 = _mm_add_epi64(T0, v03);
T4 = H4;
T4 = _mm_mul_epu32(T4, T14);
T1 = _mm_add_epi64(T1, v12);
v04 = H0;
v04 = _mm_mul_epu32(v04, T14);
T2 = _mm_add_epi64(T2, v21);
v13 = H1;
v13 = _mm_mul_epu32(v13, T14);
T3 = _mm_add_epi64(T3, v31);
T15 = R41;
v22 = H2;
v22 = _mm_mul_epu32(v22, T14);
v32 = H2;
v32 = _mm_mul_epu32(v32, T15);
T0 = _mm_add_epi64(T0, v04);
v41 = H3;
v41 = _mm_mul_epu32(v41, T15);
T1 = _mm_add_epi64(T1, v13);
v14 = H0;
v14 = _mm_mul_epu32(v14, T15);
T2 = _mm_add_epi64(T2, v22);
T14 = R42;
T5 = _mm_unpacklo_epi64(
_mm_loadl_epi64((const xmmi *) (const void *) (m + 0)),
_mm_loadl_epi64((const xmmi *) (const void *) (m + 16)));
v23 = H1;
v23 = _mm_mul_epu32(v23, T15);
T3 = _mm_add_epi64(T3, v32);
v33 = H1;
v33 = _mm_mul_epu32(v33, T14);
T4 = _mm_add_epi64(T4, v41);
v42 = H2;
v42 = _mm_mul_epu32(v42, T14);
T1 = _mm_add_epi64(T1, v14);
T15 = R43;
T6 = _mm_unpacklo_epi64(
_mm_loadl_epi64((const xmmi *) (const void *) (m + 8)),
_mm_loadl_epi64((const xmmi *) (const void *) (m + 24)));
v24 = H0;
v24 = _mm_mul_epu32(v24, T14);
T2 = _mm_add_epi64(T2, v23);
v34 = H0;
v34 = _mm_mul_epu32(v34, T15);
T3 = _mm_add_epi64(T3, v33);
M0 = _mm_and_si128(MMASK, T5);
v43 = H1;
v43 = _mm_mul_epu32(v43, T15);
T4 = _mm_add_epi64(T4, v42);
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
v44 = H0;
v44 = _mm_mul_epu32(v44, R44);
T2 = _mm_add_epi64(T2, v24);
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
T3 = _mm_add_epi64(T3, v34);
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T6, 14));
T4 = _mm_add_epi64(T4, v43);
M2 = _mm_and_si128(MMASK, T5);
T4 = _mm_add_epi64(T4, v44);
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
/* H += [Mx',My'] */
T5 = _mm_loadu_si128((const xmmi *) (const void *) (m + 32));
T6 = _mm_loadu_si128((const xmmi *) (const void *) (m + 48));
T7 = _mm_unpacklo_epi32(T5, T6);
T8 = _mm_unpackhi_epi32(T5, T6);
M5 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
M6 = _mm_unpackhi_epi32(T7, _mm_setzero_si128());
M7 = _mm_unpacklo_epi32(T8, _mm_setzero_si128());
M8 = _mm_unpackhi_epi32(T8, _mm_setzero_si128());
M6 = _mm_slli_epi64(M6, 6);
M7 = _mm_slli_epi64(M7, 12);
M8 = _mm_slli_epi64(M8, 18);
T0 = _mm_add_epi64(T0, M5);
T1 = _mm_add_epi64(T1, M6);
T2 = _mm_add_epi64(T2, M7);
T3 = _mm_add_epi64(T3, M8);
T4 = _mm_add_epi64(T4, HIBIT);
/* H += [Mx,My]*[r^2,r^2] */
T15 = S22;
v00 = M4;
v00 = _mm_mul_epu32(v00, S21);
v01 = M3;
v01 = _mm_mul_epu32(v01, T15);
T14 = S23;
v10 = M4;
v10 = _mm_mul_epu32(v10, T15);
v11 = M3;
v11 = _mm_mul_epu32(v11, T14);
T0 = _mm_add_epi64(T0, v00);
v20 = M4;
v20 = _mm_mul_epu32(v20, T14);
T0 = _mm_add_epi64(T0, v01);
T15 = S24;
v02 = M2;
v02 = _mm_mul_epu32(v02, T14);
T1 = _mm_add_epi64(T1, v10);
v30 = M4;
v30 = _mm_mul_epu32(v30, T15);
T1 = _mm_add_epi64(T1, v11);
v03 = M1;
v03 = _mm_mul_epu32(v03, T15);
T2 = _mm_add_epi64(T2, v20);
v12 = M2;
v12 = _mm_mul_epu32(v12, T15);
T0 = _mm_add_epi64(T0, v02);
T14 = R20;
v21 = M3;
v21 = _mm_mul_epu32(v21, T15);
T3 = _mm_add_epi64(T3, v30);
v31 = M3;
v31 = _mm_mul_epu32(v31, T14);
T0 = _mm_add_epi64(T0, v03);
v40 = M4;
v40 = _mm_mul_epu32(v40, T14);
T1 = _mm_add_epi64(T1, v12);
v04 = M0;
v04 = _mm_mul_epu32(v04, T14);
T2 = _mm_add_epi64(T2, v21);
v13 = M1;
v13 = _mm_mul_epu32(v13, T14);
T3 = _mm_add_epi64(T3, v31);
T15 = R21;
v22 = M2;
v22 = _mm_mul_epu32(v22, T14);
T4 = _mm_add_epi64(T4, v40);
v32 = M2;
v32 = _mm_mul_epu32(v32, T15);
T0 = _mm_add_epi64(T0, v04);
v41 = M3;
v41 = _mm_mul_epu32(v41, T15);
T1 = _mm_add_epi64(T1, v13);
v14 = M0;
v14 = _mm_mul_epu32(v14, T15);
T2 = _mm_add_epi64(T2, v22);
T14 = R22;
v23 = M1;
v23 = _mm_mul_epu32(v23, T15);
T3 = _mm_add_epi64(T3, v32);
v33 = M1;
v33 = _mm_mul_epu32(v33, T14);
T4 = _mm_add_epi64(T4, v41);
v42 = M2;
v42 = _mm_mul_epu32(v42, T14);
T1 = _mm_add_epi64(T1, v14);
T15 = R23;
v24 = M0;
v24 = _mm_mul_epu32(v24, T14);
T2 = _mm_add_epi64(T2, v23);
v34 = M0;
v34 = _mm_mul_epu32(v34, T15);
T3 = _mm_add_epi64(T3, v33);
v43 = M1;
v43 = _mm_mul_epu32(v43, T15);
T4 = _mm_add_epi64(T4, v42);
v44 = M0;
v44 = _mm_mul_epu32(v44, R24);
T2 = _mm_add_epi64(T2, v24);
T3 = _mm_add_epi64(T3, v34);
T4 = _mm_add_epi64(T4, v43);
T4 = _mm_add_epi64(T4, v44);
/* reduce */
C1 = _mm_srli_epi64(T0, 26);
C2 = _mm_srli_epi64(T3, 26);
T0 = _mm_and_si128(T0, MMASK);
T3 = _mm_and_si128(T3, MMASK);
T1 = _mm_add_epi64(T1, C1);
T4 = _mm_add_epi64(T4, C2);
C1 = _mm_srli_epi64(T1, 26);
C2 = _mm_srli_epi64(T4, 26);
T1 = _mm_and_si128(T1, MMASK);
T4 = _mm_and_si128(T4, MMASK);
T2 = _mm_add_epi64(T2, C1);
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
C1 = _mm_srli_epi64(T2, 26);
C2 = _mm_srli_epi64(T0, 26);
T2 = _mm_and_si128(T2, MMASK);
T0 = _mm_and_si128(T0, MMASK);
T3 = _mm_add_epi64(T3, C1);
T1 = _mm_add_epi64(T1, C2);
C1 = _mm_srli_epi64(T3, 26);
T3 = _mm_and_si128(T3, MMASK);
T4 = _mm_add_epi64(T4, C1);
/* Final: H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx',My']) */
H0 = T0;
H1 = T1;
H2 = T2;
H3 = T3;
H4 = T4;
m += 64;
bytes -= 64;
}
}
if (bytes >= 32) {
xmmi v01, v02, v03, v04;
xmmi v11, v12, v13, v14;
xmmi v21, v22, v23, v24;
xmmi v31, v32, v33, v34;
xmmi v41, v42, v43, v44;
xmmi T14, T15;
/* H *= [r^2,r^2] */
T15 = S22;
T0 = H4;
T0 = _mm_mul_epu32(T0, S21);
v01 = H3;
v01 = _mm_mul_epu32(v01, T15);
T14 = S23;
T1 = H4;
T1 = _mm_mul_epu32(T1, T15);
v11 = H3;
v11 = _mm_mul_epu32(v11, T14);
T2 = H4;
T2 = _mm_mul_epu32(T2, T14);
T0 = _mm_add_epi64(T0, v01);
T15 = S24;
v02 = H2;
v02 = _mm_mul_epu32(v02, T14);
T3 = H4;
T3 = _mm_mul_epu32(T3, T15);
T1 = _mm_add_epi64(T1, v11);
v03 = H1;
v03 = _mm_mul_epu32(v03, T15);
v12 = H2;
v12 = _mm_mul_epu32(v12, T15);
T0 = _mm_add_epi64(T0, v02);
T14 = R20;
v21 = H3;
v21 = _mm_mul_epu32(v21, T15);
v31 = H3;
v31 = _mm_mul_epu32(v31, T14);
T0 = _mm_add_epi64(T0, v03);
T4 = H4;
T4 = _mm_mul_epu32(T4, T14);
T1 = _mm_add_epi64(T1, v12);
v04 = H0;
v04 = _mm_mul_epu32(v04, T14);
T2 = _mm_add_epi64(T2, v21);
v13 = H1;
v13 = _mm_mul_epu32(v13, T14);
T3 = _mm_add_epi64(T3, v31);
T15 = R21;
v22 = H2;
v22 = _mm_mul_epu32(v22, T14);
v32 = H2;
v32 = _mm_mul_epu32(v32, T15);
T0 = _mm_add_epi64(T0, v04);
v41 = H3;
v41 = _mm_mul_epu32(v41, T15);
T1 = _mm_add_epi64(T1, v13);
v14 = H0;
v14 = _mm_mul_epu32(v14, T15);
T2 = _mm_add_epi64(T2, v22);
T14 = R22;
v23 = H1;
v23 = _mm_mul_epu32(v23, T15);
T3 = _mm_add_epi64(T3, v32);
v33 = H1;
v33 = _mm_mul_epu32(v33, T14);
T4 = _mm_add_epi64(T4, v41);
v42 = H2;
v42 = _mm_mul_epu32(v42, T14);
T1 = _mm_add_epi64(T1, v14);
T15 = R23;
v24 = H0;
v24 = _mm_mul_epu32(v24, T14);
T2 = _mm_add_epi64(T2, v23);
v34 = H0;
v34 = _mm_mul_epu32(v34, T15);
T3 = _mm_add_epi64(T3, v33);
v43 = H1;
v43 = _mm_mul_epu32(v43, T15);
T4 = _mm_add_epi64(T4, v42);
v44 = H0;
v44 = _mm_mul_epu32(v44, R24);
T2 = _mm_add_epi64(T2, v24);
T3 = _mm_add_epi64(T3, v34);
T4 = _mm_add_epi64(T4, v43);
T4 = _mm_add_epi64(T4, v44);
/* H += [Mx,My] */
if (m) {
T5 = _mm_loadu_si128((const xmmi *) (const void *) (m + 0));
T6 = _mm_loadu_si128((const xmmi *) (const void *) (m + 16));
T7 = _mm_unpacklo_epi32(T5, T6);
T8 = _mm_unpackhi_epi32(T5, T6);
M0 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
M1 = _mm_unpackhi_epi32(T7, _mm_setzero_si128());
M2 = _mm_unpacklo_epi32(T8, _mm_setzero_si128());
M3 = _mm_unpackhi_epi32(T8, _mm_setzero_si128());
M1 = _mm_slli_epi64(M1, 6);
M2 = _mm_slli_epi64(M2, 12);
M3 = _mm_slli_epi64(M3, 18);
T0 = _mm_add_epi64(T0, M0);
T1 = _mm_add_epi64(T1, M1);
T2 = _mm_add_epi64(T2, M2);
T3 = _mm_add_epi64(T3, M3);
T4 = _mm_add_epi64(T4, HIBIT);
}
/* reduce */
C1 = _mm_srli_epi64(T0, 26);
C2 = _mm_srli_epi64(T3, 26);
T0 = _mm_and_si128(T0, MMASK);
T3 = _mm_and_si128(T3, MMASK);
T1 = _mm_add_epi64(T1, C1);
T4 = _mm_add_epi64(T4, C2);
C1 = _mm_srli_epi64(T1, 26);
C2 = _mm_srli_epi64(T4, 26);
T1 = _mm_and_si128(T1, MMASK);
T4 = _mm_and_si128(T4, MMASK);
T2 = _mm_add_epi64(T2, C1);
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
C1 = _mm_srli_epi64(T2, 26);
C2 = _mm_srli_epi64(T0, 26);
T2 = _mm_and_si128(T2, MMASK);
T0 = _mm_and_si128(T0, MMASK);
T3 = _mm_add_epi64(T3, C1);
T1 = _mm_add_epi64(T1, C2);
C1 = _mm_srli_epi64(T3, 26);
T3 = _mm_and_si128(T3, MMASK);
T4 = _mm_add_epi64(T4, C1);
/* H = (H*[r^2,r^2] + [Mx,My]) */
H0 = T0;
H1 = T1;
H2 = T2;
H3 = T3;
H4 = T4;
}
if (m) {
T0 = _mm_shuffle_epi32(H0, _MM_SHUFFLE(0, 0, 2, 0));
T1 = _mm_shuffle_epi32(H1, _MM_SHUFFLE(0, 0, 2, 0));
T2 = _mm_shuffle_epi32(H2, _MM_SHUFFLE(0, 0, 2, 0));
T3 = _mm_shuffle_epi32(H3, _MM_SHUFFLE(0, 0, 2, 0));
T4 = _mm_shuffle_epi32(H4, _MM_SHUFFLE(0, 0, 2, 0));
T0 = _mm_unpacklo_epi64(T0, T1);
T1 = _mm_unpacklo_epi64(T2, T3);
_mm_storeu_si128((xmmi *) (void *) &st->H.hh[0], T0);
_mm_storeu_si128((xmmi *) (void *) &st->H.hh[4], T1);
_mm_storel_epi64((xmmi *) (void *) &st->H.hh[8], T4);
} else {
uint32_t t0, t1, t2, t3, t4, b;
uint64_t h0, h1, h2, g0, g1, g2, c, nc;
/* H = H[0]+H[1] */
T0 = H0;
T1 = H1;
T2 = H2;
T3 = H3;
T4 = H4;
T0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
T1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
T2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
T3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
T4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
t0 = _mm_cvtsi128_si32(T0);
b = (t0 >> 26);
t0 &= 0x3ffffff;
t1 = _mm_cvtsi128_si32(T1) + b;
b = (t1 >> 26);
t1 &= 0x3ffffff;
t2 = _mm_cvtsi128_si32(T2) + b;
b = (t2 >> 26);
t2 &= 0x3ffffff;
t3 = _mm_cvtsi128_si32(T3) + b;
b = (t3 >> 26);
t3 &= 0x3ffffff;
t4 = _mm_cvtsi128_si32(T4) + b;
/* everything except t4 is in range, so this is all safe */
h0 = (((uint64_t) t0) | ((uint64_t) t1 << 26)) & 0xfffffffffffull;
h1 = (((uint64_t) t1 >> 18) | ((uint64_t) t2 << 8) |
((uint64_t) t3 << 34)) &
0xfffffffffffull;
h2 = (((uint64_t) t3 >> 10) | ((uint64_t) t4 << 16));
c = (h2 >> 42);
h2 &= 0x3ffffffffff;
h0 += c * 5;
c = (h0 >> 44);
h0 &= 0xfffffffffff;
h1 += c;
c = (h1 >> 44);
h1 &= 0xfffffffffff;
h2 += c;
c = (h2 >> 42);
h2 &= 0x3ffffffffff;
h0 += c * 5;
c = (h0 >> 44);
h0 &= 0xfffffffffff;
h1 += c;
g0 = h0 + 5;
c = (g0 >> 44);
g0 &= 0xfffffffffff;
g1 = h1 + c;
c = (g1 >> 44);
g1 &= 0xfffffffffff;
g2 = h2 + c - ((uint64_t) 1 << 42);
c = (g2 >> 63) - 1;
nc = ~c;
h0 = (h0 & nc) | (g0 & c);
h1 = (h1 & nc) | (g1 & c);
h2 = (h2 & nc) | (g2 & c);
st->H.h[0] = h0;
st->H.h[1] = h1;
st->H.h[2] = h2;
}
}
static void
poly1305_update(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long bytes)
{
unsigned long long i;
/* handle leftover */
if (st->leftover) {
unsigned long long want = (poly1305_block_size - st->leftover);
if (want > bytes) {
want = bytes;
}
for (i = 0; i < want; i++) {
st->buffer[st->leftover + i] = m[i];
}
bytes -= want;
m += want;
st->leftover += want;
if (st->leftover < poly1305_block_size) {
return;
}
poly1305_blocks(st, st->buffer, poly1305_block_size);
st->leftover = 0;
}
/* process full blocks */
if (bytes >= poly1305_block_size) {
unsigned long long want = (bytes & ~(poly1305_block_size - 1));
poly1305_blocks(st, m, want);
m += want;
bytes -= want;
}
/* store leftover */
if (bytes) {
for (i = 0; i < bytes; i++) {
st->buffer[st->leftover + i] = m[i];
}
st->leftover += bytes;
}
}
static POLY1305_NOINLINE void
poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long leftover, unsigned char mac[16])
{
uint64_t h0, h1, h2;
if (leftover) {
CRYPTO_ALIGN(16) unsigned char final[32] = { 0 };
poly1305_block_copy31(final, m, leftover);
if (leftover != 16) {
final[leftover] = 1;
}
st->flags |=
(leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16;
poly1305_blocks(st, final, 32);
}
if (st->flags & poly1305_started) {
/* finalize, H *= [r^2,r], or H *= [r,1] */
if (!leftover || (leftover > 16)) {
st->flags |= poly1305_final_r2_r;
} else {
st->flags |= poly1305_final_r_1;
}
poly1305_blocks(st, NULL, 32);
}
h0 = st->H.h[0];
h1 = st->H.h[1];
h2 = st->H.h[2];
/* pad */
h0 = ((h0) | (h1 << 44));
h1 = ((h1 >> 20) | (h2 << 24));
#ifdef HAVE_AMD64_ASM
__asm__ __volatile__(
"addq %2, %0 ;\n"
"adcq %3, %1 ;\n"
: "+r"(h0), "+r"(h1)
: "r"(st->pad[0]), "r"(st->pad[1])
: "flags", "cc");
#else
{
uint128_t h;
memcpy(&h, &st->pad[0], 16);
h += ((uint128_t) h1 << 64) | h0;
h0 = (uint64_t) h;
h1 = (uint64_t)(h >> 64);
}
#endif
_mm_storeu_si128((xmmi *) (void *) st + 0, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 1, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 2, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 3, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 4, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 5, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 6, _mm_setzero_si128());
_mm_storeu_si128((xmmi *) (void *) st + 7, _mm_setzero_si128());
memcpy(&mac[0], &h0, 8);
memcpy(&mac[8], &h1, 8);
sodium_memzero((void *) st, sizeof *st);
}
static void
poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
{
poly1305_finish_ext(st, st->buffer, st->leftover, mac);
}
static int
crypto_onetimeauth_poly1305_sse2_init(crypto_onetimeauth_poly1305_state *state,
const unsigned char *key)
{
COMPILER_ASSERT(sizeof(crypto_onetimeauth_poly1305_state) >=
sizeof(poly1305_state_internal_t));
poly1305_init_ext((poly1305_state_internal_t *) (void *) state, key, 0U);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2_update(
crypto_onetimeauth_poly1305_state *state, const unsigned char *in,
unsigned long long inlen)
{
poly1305_update((poly1305_state_internal_t *) (void *) state, in, inlen);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2_final(crypto_onetimeauth_poly1305_state *state,
unsigned char *out)
{
poly1305_finish((poly1305_state_internal_t *) (void *) state, out);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2(unsigned char *out, const unsigned char *m,
unsigned long long inlen,
const unsigned char *key)
{
CRYPTO_ALIGN(64) poly1305_state_internal_t st;
unsigned long long blocks;
poly1305_init_ext(&st, key, inlen);
blocks = inlen & ~31;
if (blocks > 0) {
poly1305_blocks(&st, m, blocks);
m += blocks;
inlen -= blocks;
}
poly1305_finish_ext(&st, m, inlen, out);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2_verify(const unsigned char *h,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k)
{
unsigned char correct[16];
crypto_onetimeauth_poly1305_sse2(correct, in, inlen, k);
return crypto_verify_16(h, correct);
}
struct crypto_onetimeauth_poly1305_implementation
crypto_onetimeauth_poly1305_sse2_implementation = {
SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_sse2,
SODIUM_C99(.onetimeauth_verify =)
crypto_onetimeauth_poly1305_sse2_verify,
SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_sse2_init,
SODIUM_C99(.onetimeauth_update =)
crypto_onetimeauth_poly1305_sse2_update,
SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_sse2_final
};
#endif

View File

@@ -0,0 +1,12 @@
#ifndef poly1305_sse2_H
#define poly1305_sse2_H
#include <stddef.h>
#include "../onetimeauth_poly1305.h"
#include "crypto_onetimeauth_poly1305.h"
extern struct crypto_onetimeauth_poly1305_implementation
crypto_onetimeauth_poly1305_sse2_implementation;
#endif /* poly1305_sse2_H */

View File

@@ -0,0 +1,556 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#ifdef HAVE_SYS_MMAN_H
# include <sys/mman.h>
#endif
#include "crypto_generichash_blake2b.h"
#include "private/common.h"
#include "private/implementations.h"
#include "runtime.h"
#include "utils.h"
#include "argon2-core.h"
#include "blake2b-long.h"
#if !defined(MAP_ANON) && defined(MAP_ANONYMOUS)
# define MAP_ANON MAP_ANONYMOUS
#endif
#ifndef MAP_NOCORE
# ifdef MAP_CONCEAL
# define MAP_NOCORE MAP_CONCEAL
# else
# define MAP_NOCORE 0
# endif
#endif
#ifndef MAP_POPULATE
# define MAP_POPULATE 0
#endif
static fill_segment_fn fill_segment = argon2_fill_segment_ref;
static void
load_block(block *dst, const void *input)
{
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] = LOAD64_LE((const uint8_t *) input + i * sizeof(dst->v[i]));
}
}
static void
store_block(void *output, const block *src)
{
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
STORE64_LE((uint8_t *) output + i * sizeof(src->v[i]), src->v[i]);
}
}
/***************Memory allocators*****************/
/* Allocates memory to the given pointer
* @param memory pointer to the pointer to the memory
* @param m_cost number of blocks to allocate in the memory
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
*/
static int allocate_memory(block_region **region, uint32_t m_cost);
static int
allocate_memory(block_region **region, uint32_t m_cost)
{
void *base;
block *memory;
size_t memory_size;
if (region == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR; /* LCOV_EXCL_LINE */
}
memory_size = sizeof(block) * m_cost;
if (m_cost == 0 || memory_size / m_cost != sizeof(block)) {
return ARGON2_MEMORY_ALLOCATION_ERROR; /* LCOV_EXCL_LINE */
}
*region = (block_region *) malloc(sizeof(block_region));
if (*region == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR; /* LCOV_EXCL_LINE */
}
(*region)->base = (*region)->memory = NULL;
#if defined(MAP_ANON) && defined(HAVE_MMAP)
if ((base = mmap(NULL, memory_size, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_PRIVATE | MAP_NOCORE | MAP_POPULATE,
-1, 0)) == MAP_FAILED) {
base = NULL; /* LCOV_EXCL_LINE */
} /* LCOV_EXCL_LINE */
memory = (block *) base;
#elif defined(HAVE_POSIX_MEMALIGN)
if ((errno = posix_memalign((void **) &base, 64, memory_size)) != 0) {
base = NULL;
}
memory = (block *) base;
#else
memory = NULL;
if (memory_size + 63 < memory_size) {
base = NULL;
errno = ENOMEM;
} else if ((base = malloc(memory_size + 63)) != NULL) {
uint8_t *aligned = ((uint8_t *) base) + 63;
aligned -= (uintptr_t) aligned & 63;
memory = (block *) aligned;
}
#endif
if (base == NULL) {
/* LCOV_EXCL_START */
free(*region);
*region = NULL;
return ARGON2_MEMORY_ALLOCATION_ERROR;
/* LCOV_EXCL_STOP */
}
(*region)->base = base;
(*region)->memory = memory;
(*region)->size = memory_size;
return ARGON2_OK;
}
/*********Memory functions*/
/* Clears memory
* @param instance pointer to the current instance
* @param clear_memory indicates if we clear the memory with zeros.
*/
static void clear_memory(argon2_instance_t *instance, int clear);
static void
clear_memory(argon2_instance_t *instance, int clear)
{
/* LCOV_EXCL_START */
if (clear) {
if (instance->region != NULL) {
sodium_memzero(instance->region->memory,
sizeof(block) * instance->memory_blocks);
}
if (instance->pseudo_rands != NULL) {
sodium_memzero(instance->pseudo_rands,
sizeof(uint64_t) * instance->segment_length);
}
}
/* LCOV_EXCL_STOP */
}
/* Deallocates memory
* @param memory pointer to the blocks
*/
static void free_memory(block_region *region);
static void
free_memory(block_region *region)
{
if (region && region->base) {
#if defined(MAP_ANON) && defined(HAVE_MMAP)
if (munmap(region->base, region->size)) {
return; /* LCOV_EXCL_LINE */
}
#else
free(region->base);
#endif
}
free(region);
}
static void
argon2_free_instance(argon2_instance_t *instance, int flags)
{
/* Clear memory */
clear_memory(instance, flags & ARGON2_FLAG_CLEAR_MEMORY);
/* Deallocate the memory */
free(instance->pseudo_rands);
instance->pseudo_rands = NULL;
free_memory(instance->region);
instance->region = NULL;
}
void
argon2_finalize(const argon2_context *context, argon2_instance_t *instance)
{
if (context != NULL && instance != NULL) {
block blockhash;
uint32_t l;
copy_block(&blockhash,
instance->region->memory + instance->lane_length - 1);
/* XOR the last blocks */
for (l = 1; l < instance->lanes; ++l) {
uint32_t last_block_in_lane =
l * instance->lane_length + (instance->lane_length - 1);
xor_block(&blockhash,
instance->region->memory + last_block_in_lane);
}
/* Hash the result */
{
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
store_block(blockhash_bytes, &blockhash);
blake2b_long(context->out, context->outlen, blockhash_bytes,
ARGON2_BLOCK_SIZE);
sodium_memzero(blockhash.v,
ARGON2_BLOCK_SIZE); /* clear blockhash */
sodium_memzero(blockhash_bytes,
ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
}
argon2_free_instance(instance, context->flags);
}
}
void
argon2_fill_memory_blocks(argon2_instance_t *instance, uint32_t pass)
{
argon2_position_t position;
uint32_t l;
uint32_t s;
if (instance == NULL || instance->lanes == 0) {
return; /* LCOV_EXCL_LINE */
}
position.pass = pass;
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
position.slice = (uint8_t) s;
for (l = 0; l < instance->lanes; ++l) {
position.lane = l;
position.index = 0;
fill_segment(instance, position);
}
}
}
int
argon2_validate_inputs(const argon2_context *context)
{
/* LCOV_EXCL_START */
if (NULL == context) {
return ARGON2_INCORRECT_PARAMETER;
}
if (NULL == context->out) {
return ARGON2_OUTPUT_PTR_NULL;
}
/* Validate output length */
if (ARGON2_MIN_OUTLEN > context->outlen) {
return ARGON2_OUTPUT_TOO_SHORT;
}
if (ARGON2_MAX_OUTLEN < context->outlen) {
return ARGON2_OUTPUT_TOO_LONG;
}
/* Validate password (required param) */
if (NULL == context->pwd) {
if (0 != context->pwdlen) {
return ARGON2_PWD_PTR_MISMATCH;
}
}
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
return ARGON2_PWD_TOO_SHORT;
}
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
return ARGON2_PWD_TOO_LONG;
}
/* Validate salt (required param) */
if (NULL == context->salt) {
if (0 != context->saltlen) {
return ARGON2_SALT_PTR_MISMATCH;
}
}
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
return ARGON2_SALT_TOO_SHORT;
}
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
return ARGON2_SALT_TOO_LONG;
}
/* Validate secret (optional param) */
if (NULL == context->secret) {
if (0 != context->secretlen) {
return ARGON2_SECRET_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_SECRET > context->secretlen) {
return ARGON2_SECRET_TOO_SHORT;
}
if (ARGON2_MAX_SECRET < context->secretlen) {
return ARGON2_SECRET_TOO_LONG;
}
}
/* Validate associated data (optional param) */
if (NULL == context->ad) {
if (0 != context->adlen) {
return ARGON2_AD_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
return ARGON2_AD_TOO_SHORT;
}
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
return ARGON2_AD_TOO_LONG;
}
}
/* Validate lanes */
if (ARGON2_MIN_LANES > context->lanes) {
return ARGON2_LANES_TOO_FEW;
}
if (ARGON2_MAX_LANES < context->lanes) {
return ARGON2_LANES_TOO_MANY;
}
/* Validate memory cost */
if (ARGON2_MIN_MEMORY > context->m_cost) {
return ARGON2_MEMORY_TOO_LITTLE;
}
if (ARGON2_MAX_MEMORY < context->m_cost) {
return ARGON2_MEMORY_TOO_MUCH;
}
if (context->m_cost < 8 * context->lanes) {
return ARGON2_MEMORY_TOO_LITTLE;
}
/* Validate time cost */
if (ARGON2_MIN_TIME > context->t_cost) {
return ARGON2_TIME_TOO_SMALL;
}
if (ARGON2_MAX_TIME < context->t_cost) {
return ARGON2_TIME_TOO_LARGE;
}
/* Validate threads */
if (ARGON2_MIN_THREADS > context->threads) {
return ARGON2_THREADS_TOO_FEW;
}
if (ARGON2_MAX_THREADS < context->threads) {
return ARGON2_THREADS_TOO_MANY;
}
/* LCOV_EXCL_STOP */
return ARGON2_OK;
}
static void
argon2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance)
{
uint32_t l;
/* Make the first and second block in each lane as G(H0||i||0) or
G(H0||i||1) */
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
for (l = 0; l < instance->lanes; ++l) {
STORE32_LE(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
STORE32_LE(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->region->memory[l * instance->lane_length + 0],
blockhash_bytes);
STORE32_LE(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->region->memory[l * instance->lane_length + 1],
blockhash_bytes);
}
sodium_memzero(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
static void
argon2_initial_hash(uint8_t *blockhash, argon2_context *context,
argon2_type type)
{
crypto_generichash_blake2b_state BlakeHash;
uint8_t value[4U /* sizeof(uint32_t) */];
if (NULL == context || NULL == blockhash) {
return; /* LCOV_EXCL_LINE */
}
crypto_generichash_blake2b_init(&BlakeHash, NULL, 0U,
ARGON2_PREHASH_DIGEST_LENGTH);
STORE32_LE(value, context->lanes);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, context->outlen);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, context->m_cost);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, context->t_cost);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, ARGON2_VERSION_NUMBER);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, (uint32_t) type);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
STORE32_LE(value, context->pwdlen);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
if (context->pwd != NULL) {
crypto_generichash_blake2b_update(
&BlakeHash, (const uint8_t *) context->pwd, context->pwdlen);
/* LCOV_EXCL_START */
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
sodium_memzero(context->pwd, context->pwdlen);
context->pwdlen = 0;
}
/* LCOV_EXCL_STOP */
}
STORE32_LE(value, context->saltlen);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
if (context->salt != NULL) {
crypto_generichash_blake2b_update(
&BlakeHash, (const uint8_t *) context->salt, context->saltlen);
}
STORE32_LE(value, context->secretlen);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
/* LCOV_EXCL_START */
if (context->secret != NULL) {
crypto_generichash_blake2b_update(
&BlakeHash, (const uint8_t *) context->secret, context->secretlen);
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
sodium_memzero(context->secret, context->secretlen);
context->secretlen = 0;
}
}
/* LCOV_EXCL_STOP */
STORE32_LE(value, context->adlen);
crypto_generichash_blake2b_update(&BlakeHash, value, sizeof(value));
/* LCOV_EXCL_START */
if (context->ad != NULL) {
crypto_generichash_blake2b_update(
&BlakeHash, (const uint8_t *) context->ad, context->adlen);
}
/* LCOV_EXCL_STOP */
crypto_generichash_blake2b_final(&BlakeHash, blockhash,
ARGON2_PREHASH_DIGEST_LENGTH);
}
int
argon2_initialize(argon2_instance_t *instance, argon2_context *context)
{
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
int result = ARGON2_OK;
if (instance == NULL || context == NULL) {
return ARGON2_INCORRECT_PARAMETER;
}
/* 1. Memory allocation */
if ((instance->pseudo_rands = (uint64_t *)
malloc(sizeof(uint64_t) * instance->segment_length)) == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
result = allocate_memory(&(instance->region), instance->memory_blocks);
if (ARGON2_OK != result) {
argon2_free_instance(instance, context->flags);
return result;
}
/* 2. Initial hashing */
/* H_0 + 8 extra bytes to produce the first blocks */
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
/* Hashing all inputs */
argon2_initial_hash(blockhash, context, instance->type);
/* Zeroing 8 extra bytes */
sodium_memzero(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
/* 3. Creating first blocks, we always have at least two blocks in a slice
*/
argon2_fill_first_blocks(blockhash, instance);
/* Clearing the hash */
sodium_memzero(blockhash, ARGON2_PREHASH_SEED_LENGTH);
return ARGON2_OK;
}
static int
argon2_pick_best_implementation(void)
{
/* LCOV_EXCL_START */
#if defined(HAVE_AVX512FINTRIN_H) && defined(HAVE_AVX2INTRIN_H) && \
defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H) && \
!defined(__APPLE__)
if (sodium_runtime_has_avx512f()) {
fill_segment = argon2_fill_segment_avx512f;
return 0;
}
#endif
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
defined(HAVE_SMMINTRIN_H)
if (sodium_runtime_has_avx2()) {
fill_segment = argon2_fill_segment_avx2;
return 0;
}
#endif
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
if (sodium_runtime_has_ssse3()) {
fill_segment = argon2_fill_segment_ssse3;
return 0;
}
#endif
fill_segment = argon2_fill_segment_ref;
return 0;
/* LCOV_EXCL_STOP */
}
int
_crypto_pwhash_argon2_pick_best_implementation(void)
{
return argon2_pick_best_implementation();
}

View File

@@ -0,0 +1,271 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef argon2_core_H
#define argon2_core_H
#include <string.h>
#include "argon2.h"
/*************************Argon2 internal
* constants**************************************************/
enum argon2_ctx_constants {
/* Version of the algorithm */
ARGON2_VERSION_NUMBER = 0x13,
/* Memory block size in bytes */
ARGON2_BLOCK_SIZE = 1024,
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
/* Number of pseudo-random values generated by one call to Blake in Argon2i
to
generate reference block positions */
ARGON2_ADDRESSES_IN_BLOCK = 128,
/* Pre-hashing digest length and its extension*/
ARGON2_PREHASH_DIGEST_LENGTH = 64,
ARGON2_PREHASH_SEED_LENGTH = 72
};
/*************************Argon2 internal data
* types**************************************************/
/*
* Structure for the (1KB) memory block implemented as 128 64-bit words.
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
* bounds checking).
*/
typedef struct block_ {
uint64_t v[ARGON2_QWORDS_IN_BLOCK];
} block;
typedef struct block_region_ {
void * base;
block *memory;
size_t size;
} block_region;
/*****************Functions that work with the block******************/
/* Initialize each byte of the block with @in */
static inline void
init_block_value(block *b, uint8_t in)
{
memset(b->v, in, sizeof(b->v));
}
/* Copy block @src to block @dst */
static inline void
copy_block(block *dst, const block *src)
{
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
/* XOR @src onto @dst bytewise */
static inline void
xor_block(block *dst, const block *src)
{
int i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] ^= src->v[i];
}
}
/*
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
* and derived values.
* Used to evaluate the number and location of blocks to construct in each
* thread
*/
typedef struct Argon2_instance_t {
block_region *region; /* Memory region pointer */
uint64_t *pseudo_rands;
uint32_t passes; /* Number of passes */
uint32_t current_pass;
uint32_t memory_blocks; /* Number of blocks in memory */
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t threads;
argon2_type type;
int print_internals; /* whether to print the memory blocks */
} argon2_instance_t;
/*
* Argon2 position: where we construct the block right now. Used to distribute
* work between threads.
*/
typedef struct Argon2_position_t {
uint32_t pass;
uint32_t lane;
uint8_t slice;
uint32_t index;
} argon2_position_t;
/*Struct that holds the inputs for thread handling FillSegment*/
typedef struct Argon2_thread_data {
argon2_instance_t *instance_ptr;
argon2_position_t pos;
} argon2_thread_data;
/*************************Argon2 core
* functions**************************************************/
/*
* Computes absolute position of reference block in the lane following a skewed
* distribution and using a pseudo-random value as input
* @param instance Pointer to the current instance
* @param position Pointer to the current position
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
* @param same_lane Indicates if the block will be taken from the current lane.
* If so we can reference the current segment
* @pre All pointers must be valid
*/
static uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane)
{
/*
* Pass 0:
* This lane : all already finished segments plus already constructed
* blocks in this segment
* Other lanes : all already finished segments
* Pass 1+:
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
* blocks in this segment
* Other lanes : (SYNC_POINTS - 1) last segments
*/
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
if (position->pass == 0) {
/* First pass */
if (position->slice == 0) {
/* First slice */
reference_area_size =
position->index - 1; /* all but the previous */
} else {
if (same_lane) {
/* The same lane => add current segment */
reference_area_size =
position->slice * instance->segment_length +
position->index - 1;
} else {
reference_area_size =
position->slice * instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
} else {
/* Second pass */
if (same_lane) {
reference_area_size = instance->lane_length -
instance->segment_length + position->index -
1;
} else {
reference_area_size = instance->lane_length -
instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
* relative position */
relative_position = pseudo_rand;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 -
(reference_area_size * relative_position >> 32);
/* 1.2.5 Computing starting position */
start_position = 0;
if (position->pass != 0) {
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
? 0
: (position->slice + 1) * instance->segment_length;
}
/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) %
instance->lane_length; /* absolute position */
return absolute_position;
}
/*
* Function that validates all inputs against predefined restrictions and return
* an error code
* @param context Pointer to current Argon2 context
* @return ARGON2_OK if everything is all right, otherwise one of error codes
* (all defined in <argon2.h>
*/
int argon2_validate_inputs(const argon2_context *context);
/*
* Function allocates memory, hashes the inputs with Blake, and creates first
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
* initialized
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param instance Current Argon2 instance
* @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful.
*/
int argon2_initialize(argon2_instance_t *instance, argon2_context *context);
/*
* XORing the last block of each lane, hashing it, making the tag. Deallocates
* the memory.
* @param context Pointer to current Argon2 context (use only the out parameters
* from it)
* @param instance Pointer to current instance of Argon2
* @pre instance->state must point to necessary amount of memory
* @pre context->out must point to outlen bytes of memory
* @pre if context->free_cbk is not NULL, it should point to a function that
* deallocates memory
*/
void argon2_finalize(const argon2_context *context,
argon2_instance_t *instance);
/*
* Function that fills the segment using previous segments also from other
* threads
* @param instance Pointer to the current instance
* @param position Current position
* @pre all block pointers must be valid
*/
typedef void (*fill_segment_fn)(const argon2_instance_t *instance,
argon2_position_t position);
void argon2_fill_segment_avx512f(const argon2_instance_t *instance,
argon2_position_t position);
void argon2_fill_segment_avx2(const argon2_instance_t *instance,
argon2_position_t position);
void argon2_fill_segment_ssse3(const argon2_instance_t *instance,
argon2_position_t position);
void argon2_fill_segment_ref(const argon2_instance_t *instance,
argon2_position_t position);
/*
* Function that fills the entire memory t_cost times based on the first two
* blocks in each lane
* @param instance Pointer to the current instance
* @return Zero if successful, -1 if memory failed to allocate
*/
void argon2_fill_memory_blocks(argon2_instance_t *instance, uint32_t pass);
#endif

View File

@@ -0,0 +1,306 @@
#include "argon2-encoding.h"
#include "argon2-core.h"
#include "utils.h"
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* Example code for a decoder and encoder of "hash strings", with Argon2
* parameters.
*
* The code was originally written by Thomas Pornin <pornin@bolet.org>,
* to whom comments and remarks may be sent. It is released under what
* should amount to Public Domain or its closest equivalent; the
* following mantra is supposed to incarnate that fact with all the
* proper legal rituals:
*
* ---------------------------------------------------------------------
* This file is provided under the terms of Creative Commons CC0 1.0
* Public Domain Dedication. To the extent possible under law, the
* author (Thomas Pornin) has waived all copyright and related or
* neighboring rights to this file. This work is published from: Canada.
* ---------------------------------------------------------------------
*
* Copyright (c) 2015 Thomas Pornin
*/
/* ==================================================================== */
/*
* Decode decimal integer from 'str'; the value is written in '*v'.
* Returned value is a pointer to the next non-decimal character in the
* string. If there is no digit at all, or the value encoding is not
* minimal (extra leading zeros), or the value does not fit in an
* 'unsigned long', then NULL is returned.
*/
static const char *
decode_decimal(const char *str, unsigned long *v)
{
const char *orig;
unsigned long acc;
acc = 0;
for (orig = str;; str++) {
int c;
c = *str;
if (c < '0' || c > '9') {
break;
}
c -= '0';
if (acc > (ULONG_MAX / 10)) {
return NULL;
}
acc *= 10;
if ((unsigned long) c > (ULONG_MAX - acc)) {
return NULL;
}
acc += (unsigned long) c;
}
if (str == orig || (*orig == '0' && str != (orig + 1))) {
return NULL;
}
*v = acc;
return str;
}
/* ==================================================================== */
/*
* Code specific to Argon2.
*
* The code below applies the following format:
*
* $argon2<T>[$v=<num>]$m=<num>,t=<num>,p=<num>$<bin>$<bin>
*
* where <T> is either 'i', <num> is a decimal integer (positive, fits in an
* 'unsigned long') and <bin> is Base64-encoded data (no '=' padding characters,
* no newline or whitespace).
*
* The last two binary chunks (encoded in Base64) are, in that order,
* the salt and the output. Both are required. The binary salt length and the
* output length must be in the allowed ranges defined in argon2.h.
*
* The ctx struct must contain buffers large enough to hold the salt and pwd
* when it is fed into argon2_decode_string.
*/
/*
* Decode an Argon2i hash string into the provided structure 'ctx'.
* Returned value is ARGON2_OK on success.
*/
int
argon2_decode_string(argon2_context *ctx, const char *str, argon2_type type)
{
/* Prefix checking */
#define CC(prefix) \
do { \
size_t cc_len = strlen(prefix); \
if (strncmp(str, prefix, cc_len) != 0) { \
return ARGON2_DECODING_FAIL; \
} \
str += cc_len; \
} while ((void) 0, 0)
/* Optional prefix checking with supplied code */
#define CC_opt(prefix, code) \
do { \
size_t cc_len = strlen(prefix); \
if (strncmp(str, prefix, cc_len) == 0) { \
str += cc_len; \
{ \
code; \
} \
} \
} while ((void) 0, 0)
/* Decoding prefix into decimal */
#define DECIMAL(x) \
do { \
unsigned long dec_x; \
str = decode_decimal(str, &dec_x); \
if (str == NULL) { \
return ARGON2_DECODING_FAIL; \
} \
(x) = dec_x; \
} while ((void) 0, 0)
/* Decoding prefix into uint32_t decimal */
#define DECIMAL_U32(x) \
do { \
unsigned long dec_x; \
str = decode_decimal(str, &dec_x); \
if (str == NULL || dec_x > UINT32_MAX) { \
return ARGON2_DECODING_FAIL; \
} \
(x) = (uint32_t)dec_x; \
} while ((void)0, 0)
/* Decoding base64 into a binary buffer */
#define BIN(buf, max_len, len) \
do { \
size_t bin_len = (max_len); \
const char *str_end; \
if (sodium_base642bin((buf), (max_len), str, strlen(str), NULL, \
&bin_len, &str_end, \
sodium_base64_VARIANT_ORIGINAL_NO_PADDING) != 0 || \
bin_len > UINT32_MAX) { \
return ARGON2_DECODING_FAIL; \
} \
(len) = (uint32_t) bin_len; \
str = str_end; \
} while ((void) 0, 0)
size_t maxsaltlen = ctx->saltlen;
size_t maxoutlen = ctx->outlen;
int validation_result;
uint32_t version = 0;
ctx->saltlen = 0;
ctx->outlen = 0;
if (type == Argon2_id) {
CC("$argon2id");
} else if (type == Argon2_i) {
CC("$argon2i");
} else {
return ARGON2_INCORRECT_TYPE;
}
CC("$v=");
DECIMAL_U32(version);
if (version != ARGON2_VERSION_NUMBER) {
return ARGON2_INCORRECT_TYPE;
}
CC("$m=");
DECIMAL_U32(ctx->m_cost);
if (ctx->m_cost > UINT32_MAX) {
return ARGON2_INCORRECT_TYPE;
}
CC(",t=");
DECIMAL_U32(ctx->t_cost);
if (ctx->t_cost > UINT32_MAX) {
return ARGON2_INCORRECT_TYPE;
}
CC(",p=");
DECIMAL_U32(ctx->lanes);
if (ctx->lanes > UINT32_MAX) {
return ARGON2_INCORRECT_TYPE;
}
ctx->threads = ctx->lanes;
CC("$");
BIN(ctx->salt, maxsaltlen, ctx->saltlen);
CC("$");
BIN(ctx->out, maxoutlen, ctx->outlen);
validation_result = argon2_validate_inputs(ctx);
if (validation_result != ARGON2_OK) {
return validation_result;
}
if (*str == 0) {
return ARGON2_OK;
}
return ARGON2_DECODING_FAIL;
#undef CC
#undef CC_opt
#undef DECIMAL
#undef BIN
}
#define U32_STR_MAXSIZE 11U
static void
u32_to_string(char *str, uint32_t x)
{
char tmp[U32_STR_MAXSIZE - 1U];
size_t i;
i = sizeof tmp;
do {
tmp[--i] = (x % (uint32_t) 10U) + '0';
x /= (uint32_t) 10U;
} while (x != 0U && i != 0U);
memcpy(str, &tmp[i], (sizeof tmp) - i);
str[(sizeof tmp) - i] = 0;
}
/*
* Encode an argon2i hash string into the provided buffer. 'dst_len'
* contains the size, in characters, of the 'dst' buffer; if 'dst_len'
* is less than the number of required characters (including the
* terminating 0), then this function returns 0.
*
* If pp->output_len is 0, then the hash string will be a salt string
* (no output). if pp->salt_len is also 0, then the string will be a
* parameter-only string (no salt and no output).
*
* On success, ARGON2_OK is returned.
*/
int
argon2_encode_string(char *dst, size_t dst_len, argon2_context *ctx,
argon2_type type)
{
#define SS(str) \
do { \
size_t pp_len = strlen(str); \
if (pp_len >= dst_len) { \
return ARGON2_ENCODING_FAIL; \
} \
memcpy(dst, str, pp_len + 1); \
dst += pp_len; \
dst_len -= pp_len; \
} while ((void) 0, 0)
#define SX(x) \
do { \
char tmp[U32_STR_MAXSIZE]; \
u32_to_string(tmp, x); \
SS(tmp); \
} while ((void) 0, 0)
#define SB(buf, len) \
do { \
size_t sb_len; \
if (sodium_bin2base64(dst, dst_len, (buf), (len), \
sodium_base64_VARIANT_ORIGINAL_NO_PADDING) == NULL) { \
return ARGON2_ENCODING_FAIL; \
} \
sb_len = strlen(dst); \
dst += sb_len; \
dst_len -= sb_len; \
} while ((void) 0, 0)
int validation_result;
switch (type) {
case Argon2_id:
SS("$argon2id$v="); break;
case Argon2_i:
SS("$argon2i$v="); break;
default:
return ARGON2_ENCODING_FAIL;
}
validation_result = argon2_validate_inputs(ctx);
if (validation_result != ARGON2_OK) {
return validation_result;
}
SX(ARGON2_VERSION_NUMBER);
SS("$m=");
SX(ctx->m_cost);
SS(",t=");
SX(ctx->t_cost);
SS(",p=");
SX(ctx->lanes);
SS("$");
SB(ctx->salt, ctx->saltlen);
SS("$");
SB(ctx->out, ctx->outlen);
return ARGON2_OK;
#undef SS
#undef SX
#undef SB
}

View File

@@ -0,0 +1,34 @@
#ifndef argon2_encoding_H
#define argon2_encoding_H
#include "argon2.h"
/*
* encode an Argon2 hash string into the provided buffer. 'dst_len'
* contains the size, in characters, of the 'dst' buffer; if 'dst_len'
* is less than the number of required characters (including the
* terminating 0), then this function returns 0.
*
* if ctx->outlen is 0, then the hash string will be a salt string
* (no output). if ctx->saltlen is also 0, then the string will be a
* parameter-only string (no salt and no output).
*
* On success, ARGON2_OK is returned.
*
* No other parameters are checked
*/
int argon2_encode_string(char *dst, size_t dst_len, argon2_context *ctx,
argon2_type type);
/*
* Decodes an Argon2 hash string into the provided structure 'ctx'.
* The fields ctx.saltlen, ctx.adlen, ctx.outlen set the maximal salt, ad, out
* length values
* that are allowed; invalid input string causes an error
*
* Returned value is ARGON2_OK on success.
*/
int argon2_decode_string(argon2_context *ctx, const char *str,
argon2_type type);
#endif

View File

@@ -0,0 +1,239 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "argon2-core.h"
#include "argon2.h"
#include "private/common.h"
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && \
defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# pragma GCC target("sse4.1")
# pragma GCC target("avx2")
# endif
# ifdef _MSC_VER
# include <intrin.h> /* for _mm_set_epi64x */
# endif
# include <emmintrin.h>
# include <immintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blamka-round-avx2.h"
static void
fill_block(__m256i *state, const uint8_t *ref_block, uint8_t *next_block)
{
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((__m256i const *) (&ref_block[32 * i])));
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
}
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
_mm256_storeu_si256((__m256i *) (&next_block[32 * i]), state[i]);
}
}
static void
fill_block_with_xor(__m256i *state, const uint8_t *ref_block,
uint8_t *next_block)
{
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((__m256i const *) (&ref_block[32 * i])));
block_XY[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((__m256i const *) (&next_block[32 * i])));
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
}
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
_mm256_storeu_si256((__m256i *) (&next_block[32 * i]), state[i]);
}
}
static void
generate_addresses(const argon2_instance_t *instance,
const argon2_position_t *position, uint64_t *pseudo_rands)
{
block address_block, input_block, tmp_block;
uint32_t i;
init_block_value(&address_block, 0);
init_block_value(&input_block, 0);
if (instance != NULL && position != NULL) {
input_block.v[0] = position->pass;
input_block.v[1] = position->lane;
input_block.v[2] = position->slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
for (i = 0; i < instance->segment_length; ++i) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
/* Temporary zero-initialized blocks */
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
init_block_value(&address_block, 0);
init_block_value(&tmp_block, 0);
/* Increasing index counter */
input_block.v[6]++;
/* First iteration of G */
fill_block_with_xor(zero_block, (uint8_t *) &input_block.v,
(uint8_t *) &tmp_block.v);
/* Second iteration of G */
fill_block_with_xor(zero2_block, (uint8_t *) &tmp_block.v,
(uint8_t *) &address_block.v);
}
pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
}
}
}
void
argon2_fill_segment_avx2(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m256i state[ARGON2_HWORDS_IN_BLOCK];
int data_independent_addressing = 1;
/* Pseudo-random values that determine the reference block position */
uint64_t *pseudo_rands = NULL;
if (instance == NULL) {
return;
}
if (instance->type == Argon2_id &&
(position.pass != 0 || position.slice >= ARGON2_SYNC_POINTS / 2)) {
data_independent_addressing = 0;
}
pseudo_rands = instance->pseudo_rands;
if (data_independent_addressing) {
generate_addresses(instance, &position, pseudo_rands);
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->region->memory + prev_offset)->v),
ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
#pragma warning(push)
#pragma warning(disable : 6385)
pseudo_rand = pseudo_rands[i];
#pragma warning(pop)
} else {
pseudo_rand = instance->region->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block = instance->region->memory +
instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
if (position.pass != 0) {
fill_block_with_xor(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
} else {
fill_block(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
}
}
}
#endif

View File

@@ -0,0 +1,244 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "argon2-core.h"
#include "argon2.h"
#include "private/common.h"
#if defined(HAVE_AVX512FINTRIN_H) && defined(HAVE_AVX2INTRIN_H) && \
defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# pragma GCC target("sse4.1")
# pragma GCC target("avx2")
# pragma GCC target("avx512f")
# endif
# ifdef _MSC_VER
# include <intrin.h> /* for _mm_set_epi64x */
# endif
# include <emmintrin.h>
# include <immintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blamka-round-avx512f.h"
static void
fill_block(__m512i *state, const uint8_t *ref_block, uint8_t *next_block)
{
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((__m512i const *) (&ref_block[64 * i])));
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_1(
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_2(
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
}
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
_mm512_storeu_si512((__m512i *) (&next_block[64 * i]), state[i]);
}
}
static void
fill_block_with_xor(__m512i *state, const uint8_t *ref_block,
uint8_t *next_block)
{
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((__m512i const *) (&ref_block[64 * i])));
block_XY[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((__m512i const *) (&next_block[64 * i])));
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_1(
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_2(
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
}
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
_mm512_storeu_si512((__m512i *) (&next_block[64 * i]), state[i]);
}
}
static void
generate_addresses(const argon2_instance_t *instance,
const argon2_position_t *position, uint64_t *pseudo_rands)
{
block address_block, input_block, tmp_block;
uint32_t i;
init_block_value(&address_block, 0);
init_block_value(&input_block, 0);
if (instance != NULL && position != NULL) {
input_block.v[0] = position->pass;
input_block.v[1] = position->lane;
input_block.v[2] = position->slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
for (i = 0; i < instance->segment_length; ++i) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
/* Temporary zero-initialized blocks */
__m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
__m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
init_block_value(&address_block, 0);
init_block_value(&tmp_block, 0);
/* Increasing index counter */
input_block.v[6]++;
/* First iteration of G */
fill_block_with_xor(zero_block, (uint8_t *) &input_block.v,
(uint8_t *) &tmp_block.v);
/* Second iteration of G */
fill_block_with_xor(zero2_block, (uint8_t *) &tmp_block.v,
(uint8_t *) &address_block.v);
}
pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
}
}
}
void
argon2_fill_segment_avx512f(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
int data_independent_addressing = 1;
/* Pseudo-random values that determine the reference block position */
uint64_t *pseudo_rands = NULL;
if (instance == NULL) {
return;
}
if (instance->type == Argon2_id &&
(position.pass != 0 || position.slice >= ARGON2_SYNC_POINTS / 2)) {
data_independent_addressing = 0;
}
pseudo_rands = instance->pseudo_rands;
if (data_independent_addressing) {
generate_addresses(instance, &position, pseudo_rands);
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->region->memory + prev_offset)->v),
ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
#pragma warning(push)
#pragma warning(disable : 6385)
pseudo_rand = pseudo_rands[i];
#pragma warning(pop)
} else {
pseudo_rand = instance->region->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block = instance->region->memory +
instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
if (position.pass != 0) {
fill_block_with_xor(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
} else {
fill_block(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
}
}
}
#endif

View File

@@ -0,0 +1,234 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "argon2-core.h"
#include "argon2.h"
#include "blamka-round-ref.h"
#include "private/common.h"
static void
fill_block(const block *prev_block, const block *ref_block, block *next_block)
{
block blockR, block_tmp;
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
/* Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
Apply Blake2 on columns of 64-bit words: (0,1,...,15), then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
static void
fill_block_with_xor(const block *prev_block, const block *ref_block,
block *next_block)
{
block blockR, block_tmp;
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
xor_block(&block_tmp,
next_block); /* Saving the next block contents for XOR over */
/* Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
* + next_block */
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
/*
* Generate pseudo-random values to reference blocks in the segment and puts
* them into the array
* @param instance Pointer to the current instance
* @param position Pointer to the current position
* @param pseudo_rands Pointer to the array of 64-bit values
* @pre pseudo_rands must point to @a instance->segment_length allocated values
*/
static void
generate_addresses(const argon2_instance_t *instance,
const argon2_position_t *position, uint64_t *pseudo_rands)
{
block zero_block, input_block, address_block, tmp_block;
uint32_t i;
init_block_value(&zero_block, 0);
init_block_value(&input_block, 0);
if (instance != NULL && position != NULL) {
input_block.v[0] = position->pass;
input_block.v[1] = position->lane;
input_block.v[2] = position->slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
for (i = 0; i < instance->segment_length; ++i) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
input_block.v[6]++;
init_block_value(&tmp_block, 0);
init_block_value(&address_block, 0);
fill_block_with_xor(&zero_block, &input_block, &tmp_block);
fill_block_with_xor(&zero_block, &tmp_block, &address_block);
}
pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
}
}
}
void
argon2_fill_segment_ref(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
/* Pseudo-random values that determine the reference block position */
uint64_t *pseudo_rands = NULL;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index;
uint32_t i;
int data_independent_addressing = 1;
if (instance == NULL) {
return;
}
if (instance->type == Argon2_id &&
(position.pass != 0 || position.slice >= ARGON2_SYNC_POINTS / 2)) {
data_independent_addressing = 0;
}
pseudo_rands = instance->pseudo_rands;
if (data_independent_addressing) {
generate_addresses(instance, &position, pseudo_rands);
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
#pragma warning(push)
#pragma warning(disable : 6385)
pseudo_rand = pseudo_rands[i];
#pragma warning(pop)
} else {
pseudo_rand = instance->region->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block = instance->region->memory +
instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
if (position.pass != 0) {
fill_block_with_xor(instance->region->memory + prev_offset,
ref_block, curr_block);
} else {
fill_block(instance->region->memory + prev_offset, ref_block,
curr_block);
}
}
}

View File

@@ -0,0 +1,238 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "argon2-core.h"
#include "argon2.h"
#include "private/common.h"
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
# ifdef __GNUC__
# pragma GCC target("sse2")
# pragma GCC target("ssse3")
# endif
# ifdef _MSC_VER
# include <intrin.h> /* for _mm_set_epi64x */
# endif
# include <emmintrin.h>
# include <tmmintrin.h>
# include "private/sse2_64_32.h"
# include "blamka-round-ssse3.h"
static void
fill_block(__m128i *state, const uint8_t *ref_block, uint8_t *next_block)
{
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((__m128i const *) (&ref_block[16 * i])));
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *) (&next_block[16 * i]), state[i]);
}
}
static void
fill_block_with_xor(__m128i *state, const uint8_t *ref_block,
uint8_t *next_block)
{
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((__m128i const *) (&ref_block[16 * i])));
block_XY[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((__m128i const *) (&next_block[16 * i])));
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *) (&next_block[16 * i]), state[i]);
}
}
static void
generate_addresses(const argon2_instance_t *instance,
const argon2_position_t *position, uint64_t *pseudo_rands)
{
block address_block, input_block, tmp_block;
uint32_t i;
init_block_value(&address_block, 0);
init_block_value(&input_block, 0);
if (instance != NULL && position != NULL) {
input_block.v[0] = position->pass;
input_block.v[1] = position->lane;
input_block.v[2] = position->slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
for (i = 0; i < instance->segment_length; ++i) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
/* Temporary zero-initialized blocks */
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
init_block_value(&address_block, 0);
init_block_value(&tmp_block, 0);
/* Increasing index counter */
input_block.v[6]++;
/* First iteration of G */
fill_block_with_xor(zero_block, (uint8_t *) &input_block.v,
(uint8_t *) &tmp_block.v);
/* Second iteration of G */
fill_block_with_xor(zero2_block, (uint8_t *) &tmp_block.v,
(uint8_t *) &address_block.v);
}
pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
}
}
}
void
argon2_fill_segment_ssse3(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m128i state[ARGON2_OWORDS_IN_BLOCK];
int data_independent_addressing = 1;
/* Pseudo-random values that determine the reference block position */
uint64_t *pseudo_rands = NULL;
if (instance == NULL) {
return;
}
if (instance->type == Argon2_id &&
(position.pass != 0 || position.slice >= ARGON2_SYNC_POINTS / 2)) {
data_independent_addressing = 0;
}
pseudo_rands = instance->pseudo_rands;
if (data_independent_addressing) {
generate_addresses(instance, &position, pseudo_rands);
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->region->memory + prev_offset)->v),
ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
#pragma warning(push)
#pragma warning(disable : 6385)
pseudo_rand = pseudo_rands[i];
#pragma warning(pop)
} else {
pseudo_rand = instance->region->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block = instance->region->memory +
instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
if (position.pass != 0) {
fill_block_with_xor(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
} else {
fill_block(state, (uint8_t *) ref_block->v,
(uint8_t *) curr_block->v);
}
}
}
#endif

View File

@@ -0,0 +1,283 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "randombytes.h"
#include "utils.h"
#include "argon2-core.h"
#include "argon2-encoding.h"
#include "argon2.h"
int
argon2_ctx(argon2_context *context, argon2_type type)
{
/* 1. Validate all inputs */
int result = argon2_validate_inputs(context);
uint32_t memory_blocks, segment_length;
uint32_t pass;
argon2_instance_t instance;
if (ARGON2_OK != result) {
return result;
}
if (type != Argon2_id && type != Argon2_i) {
return ARGON2_INCORRECT_TYPE;
}
/* 2. Align memory size */
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
memory_blocks = context->m_cost;
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) {
memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes;
}
segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
/* Ensure that all segments have equal length */
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
instance.region = NULL;
instance.passes = context->t_cost;
instance.current_pass = ~ 0U;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context->lanes;
instance.threads = context->threads;
instance.type = type;
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
result = argon2_initialize(&instance, context);
if (ARGON2_OK != result) {
return result;
}
/* 4. Filling memory */
for (pass = 0; pass < instance.passes; pass++) {
argon2_fill_memory_blocks(&instance, pass);
}
/* 5. Finalization */
argon2_finalize(context, &instance);
return ARGON2_OK;
}
int
argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen, void *hash,
const size_t hashlen, char *encoded, const size_t encodedlen,
argon2_type type)
{
argon2_context context;
int result;
uint8_t *out;
if (hash != NULL) {
randombytes_buf(hash, hashlen);
}
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
return ARGON2_PWD_TOO_LONG;
}
if (hashlen > ARGON2_MAX_OUTLEN) {
return ARGON2_OUTPUT_TOO_LONG;
}
if (saltlen > ARGON2_MAX_SALT_LENGTH) {
return ARGON2_SALT_TOO_LONG;
}
out = (uint8_t *) malloc(hashlen);
if (!out) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
context.out = (uint8_t *) out;
context.outlen = (uint32_t) hashlen;
context.pwd = (uint8_t *) pwd;
context.pwdlen = (uint32_t) pwdlen;
context.salt = (uint8_t *) salt;
context.saltlen = (uint32_t) saltlen;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = t_cost;
context.m_cost = m_cost;
context.lanes = parallelism;
context.threads = parallelism;
context.flags = ARGON2_DEFAULT_FLAGS;
result = argon2_ctx(&context, type);
if (result != ARGON2_OK) {
sodium_memzero(out, hashlen);
free(out);
return result;
}
/* if encoding requested, write it */
if (encoded && encodedlen) {
if (argon2_encode_string(encoded, encodedlen,
&context, type) != ARGON2_OK) {
sodium_memzero(out, hashlen);
sodium_memzero(encoded, encodedlen);
free(out);
return ARGON2_ENCODING_FAIL;
}
}
/* if raw hash requested, write it */
if (hash) {
memcpy(hash, out, hashlen);
}
sodium_memzero(out, hashlen);
free(out);
return ARGON2_OK;
}
int
argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen, char *encoded,
const size_t encodedlen)
{
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_i);
}
int
argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen)
{
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_i);
}
int
argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen, char *encoded,
const size_t encodedlen)
{
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_id);
}
int
argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen)
{
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_id);
}
int
argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen,
argon2_type type)
{
argon2_context ctx;
uint8_t *out;
int decode_result;
int ret;
size_t encoded_len;
memset(&ctx, 0, sizeof ctx);
ctx.pwd = NULL;
ctx.pwdlen = 0;
ctx.secret = NULL;
ctx.secretlen = 0;
/* max values, to be updated in argon2_decode_string */
encoded_len = strlen(encoded);
if (encoded_len > UINT32_MAX) {
return ARGON2_DECODING_LENGTH_FAIL;
}
ctx.adlen = (uint32_t) encoded_len;
ctx.saltlen = (uint32_t) encoded_len;
ctx.outlen = (uint32_t) encoded_len;
ctx.ad = (uint8_t *) malloc(ctx.adlen);
ctx.salt = (uint8_t *) malloc(ctx.saltlen);
ctx.out = (uint8_t *) malloc(ctx.outlen);
if (!ctx.out || !ctx.salt || !ctx.ad) {
free(ctx.ad);
free(ctx.salt);
free(ctx.out);
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
out = (uint8_t *) malloc(ctx.outlen);
if (!out) {
free(ctx.ad);
free(ctx.salt);
free(ctx.out);
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
decode_result = argon2_decode_string(&ctx, encoded, type);
if (decode_result != ARGON2_OK) {
free(ctx.ad);
free(ctx.salt);
free(ctx.out);
free(out);
return decode_result;
}
ret = argon2_hash(ctx.t_cost, ctx.m_cost, ctx.threads, pwd, pwdlen,
ctx.salt, ctx.saltlen, out, ctx.outlen, NULL, 0, type);
free(ctx.ad);
free(ctx.salt);
if (ret == ARGON2_OK && sodium_memcmp(out, ctx.out, ctx.outlen) != 0) {
ret = ARGON2_VERIFY_MISMATCH;
}
free(out);
free(ctx.out);
return ret;
}
int
argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen)
{
return argon2_verify(encoded, pwd, pwdlen, Argon2_i);
}
int
argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen)
{
return argon2_verify(encoded, pwd, pwdlen, Argon2_id);
}

View File

@@ -0,0 +1,305 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef argon2_H
#define argon2_H
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
/*
* Argon2 input parameter restrictions
*/
/* Minimum and maximum number of lanes (degree of parallelism) */
#define ARGON2_MIN_LANES UINT32_C(1)
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
/* Minimum and maximum number of threads */
#define ARGON2_MIN_THREADS UINT32_C(1)
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
/* Number of synchronization points between lanes per pass */
#define ARGON2_SYNC_POINTS UINT32_C(4)
/* Minimum and maximum digest size in bytes */
#define ARGON2_MIN_OUTLEN UINT32_C(16)
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
/* Max memory size is half the addressing space, topping at 2^32 blocks (4 TB)
*/
#define ARGON2_MAX_MEMORY_BITS \
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
#define ARGON2_MAX_MEMORY \
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
/* Minimum and maximum number of passes */
#define ARGON2_MIN_TIME UINT32_C(1)
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
/* Minimum and maximum password length in bytes */
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum associated data length in bytes */
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum salt length in bytes */
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum key length in bytes */
#define ARGON2_MIN_SECRET UINT32_C(0)
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
#define ARGON2_FLAG_CLEAR_MEMORY (UINT32_C(1) << 2)
#define ARGON2_DEFAULT_FLAGS (UINT32_C(0))
/* Error codes */
typedef enum Argon2_ErrorCodes {
ARGON2_OK = 0,
ARGON2_OUTPUT_PTR_NULL = -1,
ARGON2_OUTPUT_TOO_SHORT = -2,
ARGON2_OUTPUT_TOO_LONG = -3,
ARGON2_PWD_TOO_SHORT = -4,
ARGON2_PWD_TOO_LONG = -5,
ARGON2_SALT_TOO_SHORT = -6,
ARGON2_SALT_TOO_LONG = -7,
ARGON2_AD_TOO_SHORT = -8,
ARGON2_AD_TOO_LONG = -9,
ARGON2_SECRET_TOO_SHORT = -10,
ARGON2_SECRET_TOO_LONG = -11,
ARGON2_TIME_TOO_SMALL = -12,
ARGON2_TIME_TOO_LARGE = -13,
ARGON2_MEMORY_TOO_LITTLE = -14,
ARGON2_MEMORY_TOO_MUCH = -15,
ARGON2_LANES_TOO_FEW = -16,
ARGON2_LANES_TOO_MANY = -17,
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
ARGON2_FREE_MEMORY_CBK_NULL = -23,
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
ARGON2_INCORRECT_PARAMETER = -25,
ARGON2_INCORRECT_TYPE = -26,
ARGON2_OUT_PTR_MISMATCH = -27,
ARGON2_THREADS_TOO_FEW = -28,
ARGON2_THREADS_TOO_MANY = -29,
ARGON2_MISSING_ARGS = -30,
ARGON2_ENCODING_FAIL = -31,
ARGON2_DECODING_FAIL = -32,
ARGON2_THREAD_FAIL = -33,
ARGON2_DECODING_LENGTH_FAIL = -34,
ARGON2_VERIFY_MISMATCH = -35
} argon2_error_codes;
/* Argon2 external data structures */
/*
* Context: structure to hold Argon2 inputs:
* output array and its length,
* password and its length,
* salt and its length,
* secret and its length,
* associated data and its length,
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
* number of parallel threads that will be run.
* All the parameters above affect the output hash value.
* Additionally, two function pointers can be provided to allocate and
* deallocate the memory (if NULL, memory will be allocated internally).
* Also, three flags indicate whether to erase password, secret as soon as they
* are pre-hashed (and thus not needed anymore), and the entire memory
*****
* Simplest situation: you have output array out[8], password is stored in
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
*data.
* You need to spend 1 GB of RAM and you run 5 passes of Argon2 with 4 parallel
*lanes.
* You want to erase the password, but you're OK with last pass not being
*erased.
* You want to use the default memory allocator.
* Then you initialize:
* Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false).
*/
typedef struct Argon2_Context {
uint8_t *out; /* output array */
uint32_t outlen; /* digest length */
uint8_t *pwd; /* password array */
uint32_t pwdlen; /* password length */
uint8_t *salt; /* salt array */
uint32_t saltlen; /* salt length */
uint8_t *secret; /* key array */
uint32_t secretlen; /* key length */
uint8_t *ad; /* associated data array */
uint32_t adlen; /* associated data length */
uint32_t t_cost; /* number of passes */
uint32_t m_cost; /* amount of memory requested (KB) */
uint32_t lanes; /* number of lanes */
uint32_t threads; /* maximum number of threads */
uint32_t flags; /* array of bool options */
} argon2_context;
/* Argon2 primitive type */
typedef enum Argon2_type { Argon2_i = 1, Argon2_id = 2 } argon2_type;
/*
* Function that performs memory-hard hashing with certain degree of parallelism
* @param context Pointer to the Argon2 internal structure
* @return Error code if smth is wrong, ARGON2_OK otherwise
*/
int argon2_ctx(argon2_context *context, argon2_type type);
/**
* Hashes a password with Argon2i, producing an encoded hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hashlen Desired length of the hash in bytes
* @param encoded Buffer where to write the encoded hash
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen);
/**
* Hashes a password with Argon2id, producing an encoded hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hashlen Desired length of the hash in bytes
* @param encoded Buffer where to write the encoded hash
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen);
/**
* Hashes a password with Argon2i, producing a raw hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hash Buffer where to write the raw hash
* @param hashlen Desired length of the hash in bytes
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen);
/**
* Hashes a password with Argon2id, producing a raw hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hash Buffer where to write the raw hash
* @param hashlen Desired length of the hash in bytes
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen);
/* generic function underlying the above ones */
int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen, char *encoded,
const size_t encodedlen, argon2_type type);
/**
* Verifies a password against an encoded string
* Encoded string is restricted as in argon2_validate_inputs()
* @param encoded String encoding parameters, salt, hash
* @param pwd Pointer to password
* @pre Returns ARGON2_OK if successful
*/
int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen);
/**
* Verifies a password against an encoded string
* Encoded string is restricted as in argon2_validate_inputs()
* @param encoded String encoding parameters, salt, hash
* @param pwd Pointer to password
* @pre Returns ARGON2_OK if successful
*/
int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen);
/* generic function underlying the above ones */
int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen,
argon2_type type);
#endif

View File

@@ -0,0 +1,79 @@
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "crypto_generichash_blake2b.h"
#include "private/common.h"
#include "utils.h"
#include "blake2b-long.h"
int
blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen)
{
uint8_t *out = (uint8_t *) pout;
crypto_generichash_blake2b_state blake_state;
uint8_t outlen_bytes[4 /* sizeof(uint32_t) */] = { 0 };
int ret = -1;
if (outlen > UINT32_MAX) {
goto fail; /* LCOV_EXCL_LINE */
}
/* Ensure little-endian byte order! */
STORE32_LE(outlen_bytes, (uint32_t) outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void) 0, 0)
if (outlen <= crypto_generichash_blake2b_BYTES_MAX) {
TRY(crypto_generichash_blake2b_init(&blake_state, NULL, 0U, outlen));
TRY(crypto_generichash_blake2b_update(&blake_state, outlen_bytes,
sizeof(outlen_bytes)));
TRY(crypto_generichash_blake2b_update(
&blake_state, (const unsigned char *) in, inlen));
TRY(crypto_generichash_blake2b_final(&blake_state, out, outlen));
} else {
uint32_t toproduce;
uint8_t out_buffer[crypto_generichash_blake2b_BYTES_MAX];
uint8_t in_buffer[crypto_generichash_blake2b_BYTES_MAX];
TRY(crypto_generichash_blake2b_init(
&blake_state, NULL, 0U, crypto_generichash_blake2b_BYTES_MAX));
TRY(crypto_generichash_blake2b_update(&blake_state, outlen_bytes,
sizeof(outlen_bytes)));
TRY(crypto_generichash_blake2b_update(
&blake_state, (const unsigned char *) in, inlen));
TRY(crypto_generichash_blake2b_final(
&blake_state, out_buffer, crypto_generichash_blake2b_BYTES_MAX));
memcpy(out, out_buffer, crypto_generichash_blake2b_BYTES_MAX / 2);
out += crypto_generichash_blake2b_BYTES_MAX / 2;
toproduce =
(uint32_t) outlen - crypto_generichash_blake2b_BYTES_MAX / 2;
while (toproduce > crypto_generichash_blake2b_BYTES_MAX) {
memcpy(in_buffer, out_buffer, crypto_generichash_blake2b_BYTES_MAX);
TRY(crypto_generichash_blake2b(
out_buffer, crypto_generichash_blake2b_BYTES_MAX, in_buffer,
crypto_generichash_blake2b_BYTES_MAX, NULL, 0U));
memcpy(out, out_buffer, crypto_generichash_blake2b_BYTES_MAX / 2);
out += crypto_generichash_blake2b_BYTES_MAX / 2;
toproduce -= crypto_generichash_blake2b_BYTES_MAX / 2;
}
memcpy(in_buffer, out_buffer, crypto_generichash_blake2b_BYTES_MAX);
TRY(crypto_generichash_blake2b(out_buffer, toproduce, in_buffer,
crypto_generichash_blake2b_BYTES_MAX,
NULL, 0U));
memcpy(out, out_buffer, toproduce);
}
fail:
sodium_memzero(&blake_state, sizeof(blake_state));
return ret;
#undef TRY
}

View File

@@ -0,0 +1,8 @@
#ifndef blake2b_long_H
#define blake2b_long_H
#include <stddef.h>
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen);
#endif

View File

@@ -0,0 +1,150 @@
#ifndef blamka_round_avx2_H
#define blamka_round_avx2_H
#include "private/common.h"
#include "private/sse2_64_32.h"
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr32(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
\
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr24(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr32(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
\
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr24(B1); \
} while((void)0, 0);
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr16(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr63(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr16(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr63(B1); \
} while((void)0, 0);
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while((void)0, 0);
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while(0);
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while((void)0, 0);
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while((void)0, 0);
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
} while((void)0, 0);
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
} while((void)0, 0);
#endif

View File

@@ -0,0 +1,145 @@
#ifndef blamka_round_avx512f_H
#define blamka_round_avx512f_H
#include "private/common.h"
#include "private/sse2_64_32.h"
#define ror64(x, n) _mm512_ror_epi64((x), (n))
static inline __m512i
muladd(__m512i x, __m512i y)
{
__m512i z = _mm512_mul_epu32(x, y);
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
}
#define G1_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
} while ((void)0, 0)
#define G2_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
G1_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1); \
G2_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1); \
G2_AVX512F(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#define SWAP_HALVES(A0, A1) \
do { \
__m512i t0, t1; \
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
A0 = t0; \
A1 = t1; \
} while((void)0, 0)
#define SWAP_QUARTERS(A0, A1) \
do { \
SWAP_HALVES(A0, A1); \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
} while((void)0, 0)
#define UNSWAP_QUARTERS(A0, A1) \
do { \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
SWAP_HALVES(A0, A1); \
} while((void)0, 0)
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
do { \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
} while ((void)0, 0)
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
SWAP_QUARTERS(A0, A1); \
SWAP_QUARTERS(B0, B1); \
SWAP_QUARTERS(C0, C1); \
SWAP_QUARTERS(D0, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
UNSWAP_QUARTERS(A0, A1); \
UNSWAP_QUARTERS(B0, B1); \
UNSWAP_QUARTERS(C0, C1); \
UNSWAP_QUARTERS(D0, D1); \
} while ((void)0, 0)
#endif

View File

@@ -0,0 +1,40 @@
#ifndef blamka_round_ref_H
#define blamka_round_ref_H
#include "private/common.h"
/*designed by the Lyra PHC team */
static inline uint64_t
fBlaMka(uint64_t x, uint64_t y)
{
const uint64_t m = UINT64_C(0xFFFFFFFF);
const uint64_t xy = (x & m) * (y & m);
return x + y + 2 * xy;
}
#define G(a, b, c, d) \
do { \
a = fBlaMka(a, b); \
d = ROTR64(d ^ a, 32); \
c = fBlaMka(c, d); \
b = ROTR64(b ^ c, 24); \
a = fBlaMka(a, b); \
d = ROTR64(d ^ a, 16); \
c = fBlaMka(c, d); \
b = ROTR64(b ^ c, 63); \
} while ((void) 0, 0)
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
v12, v13, v14, v15) \
do { \
G(v0, v4, v8, v12); \
G(v1, v5, v9, v13); \
G(v2, v6, v10, v14); \
G(v3, v7, v11, v15); \
G(v0, v5, v10, v15); \
G(v1, v6, v11, v12); \
G(v2, v7, v8, v13); \
G(v3, v4, v9, v14); \
} while ((void) 0, 0)
#endif

Some files were not shown because too many files have changed in this diff Show More